Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// `crate::output_writer::write_outputs`.
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826    /// Framework string for classifier dispatch in
827    /// [`PeriodTrialBalance::into_canonical`] (`"us_gaap"` / `"ifrs"` /
828    /// `"french_gaap"` / `"german_gaap"` / `"dual_reporting"`). Set by
829    /// the orchestrator at TB-emit time; defaults to `"us_gaap"` when
830    /// constructed by ad-hoc callers (e.g. test fixtures).
831    #[serde(default = "default_framework")]
832    pub framework: String,
833}
834
835fn default_framework() -> String {
836    "us_gaap".to_string()
837}
838
839impl PeriodTrialBalance {
840    /// Convert this in-memory period TB into the canonical
841    /// [`datasynth_core::models::balance::TrialBalance`] shape used
842    /// for the on-disk artefact.
843    ///
844    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
845    /// aggregate's `tb_loader` consumes the canonical type directly,
846    /// dropping the v5.0 dual-shape detection that converted from
847    /// `PeriodTrialBalance` JSON on the fly.
848    ///
849    /// v5.33: framework-aware classification — `category` and
850    /// `account_type` are now resolved via
851    /// [`datasynth_core::framework_accounts::FrameworkAccounts`] for the
852    /// framework recorded on `self.framework`, fixing the v5.32-and-prior
853    /// regression where every line was stamped `AccountType::Asset`
854    /// regardless of code (Defect C in the 3-year medium-chain
855    /// FINDINGS doc).
856    ///
857    /// The `is_balanced` / `is_equation_valid` flags are now set to
858    /// `true` with `out_of_balance` / `equation_difference` clamped to
859    /// zero. The interim-TB shape this writer produces is "cumulative
860    /// BS positions + period-only P&L", which is the standard adjusted
861    /// TB layout but has no `Σ debits == Σ credits` invariant — that
862    /// comparison is meaningful only for a gross-flow TB built from
863    /// fully-balanced JEs over a single time window. The integrity that
864    /// IS guaranteed is the underlying per-JE balance invariant
865    /// enforced by [`datasynth_core::models::journal_entry::JournalEntry::new`].
866    /// Downstream consumers that need a real signed-equation check
867    /// (`Σ A = Σ L + Σ E + NI`) should derive it from opening balances
868    /// plus the period-only P&L lines, not from the raw debit/credit
869    /// totals stamped here.
870    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
871        let framework = &self.framework;
872        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
873        let mut total_debits = Decimal::ZERO;
874        let mut total_credits = Decimal::ZERO;
875        let lines: Vec<TrialBalanceLine> = self
876            .entries
877            .into_iter()
878            .map(|e| {
879                total_debits += e.debit_balance;
880                total_credits += e.credit_balance;
881                let category =
882                    AccountCategory::from_account_code_with_framework(&e.account_code, framework);
883                let account_type = fa.classify_account_type(&e.account_code);
884                TrialBalanceLine {
885                    account_code: e.account_code,
886                    account_description: e.account_name,
887                    category,
888                    account_type,
889                    opening_balance: Decimal::ZERO,
890                    period_debits: e.debit_balance,
891                    period_credits: e.credit_balance,
892                    closing_balance: e.debit_balance - e.credit_balance,
893                    debit_balance: e.debit_balance,
894                    credit_balance: e.credit_balance,
895                    cost_center: None,
896                    profit_center: None,
897                }
898            })
899            .collect();
900        TrialBalance {
901            trial_balance_id: format!(
902                "{company_code}-{:04}{:02}",
903                self.fiscal_year, self.fiscal_period
904            ),
905            company_code: company_code.to_string(),
906            company_name: None,
907            as_of_date: self.period_end,
908            fiscal_year: self.fiscal_year as i32,
909            fiscal_period: self.fiscal_period as u32,
910            currency: currency.to_string(),
911            balance_type: TrialBalanceType::Adjusted,
912            lines,
913            total_debits,
914            total_credits,
915            is_balanced: true,
916            out_of_balance: Decimal::ZERO,
917            is_equation_valid: true,
918            equation_difference: Decimal::ZERO,
919            category_summary: std::collections::HashMap::new(),
920            created_at: self
921                .period_start
922                .and_hms_opt(0, 0, 0)
923                .expect("midnight is a valid time"),
924            created_by: "ORCHESTRATOR".to_string(),
925            approved_by: None,
926            approved_at: None,
927            status: TrialBalanceStatus::Final,
928        }
929    }
930}
931
932/// Financial reporting snapshot (financial statements + bank reconciliations).
933#[derive(Debug, Clone, Default)]
934pub struct FinancialReportingSnapshot {
935    /// Financial statements (balance sheet, income statement, cash flow).
936    /// For multi-entity configs this includes all standalone statements.
937    pub financial_statements: Vec<FinancialStatement>,
938    /// Standalone financial statements keyed by entity code.
939    /// Each entity has its own slice of statements.
940    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
941    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
942    pub consolidated_statements: Vec<FinancialStatement>,
943    /// Consolidation schedules (one per period) showing pre/post elimination detail.
944    pub consolidation_schedules: Vec<ConsolidationSchedule>,
945    /// Bank reconciliations.
946    pub bank_reconciliations: Vec<BankReconciliation>,
947    /// Period-close trial balances (one per period).
948    pub trial_balances: Vec<PeriodTrialBalance>,
949    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
950    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
951    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
952    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
953    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
954    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
955}
956
957/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
958#[derive(Debug, Clone, Default)]
959pub struct HrSnapshot {
960    /// Payroll runs (actual data).
961    pub payroll_runs: Vec<PayrollRun>,
962    /// Payroll line items (actual data).
963    pub payroll_line_items: Vec<PayrollLineItem>,
964    /// Time entries (actual data).
965    pub time_entries: Vec<TimeEntry>,
966    /// Expense reports (actual data).
967    pub expense_reports: Vec<ExpenseReport>,
968    /// Benefit enrollments (actual data).
969    pub benefit_enrollments: Vec<BenefitEnrollment>,
970    /// Defined benefit pension plans (IAS 19 / ASC 715).
971    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
972    /// Pension obligation (DBO) roll-forwards.
973    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
974    /// Plan asset roll-forwards.
975    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
976    /// Pension disclosures.
977    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
978    /// Journal entries generated from pension expense and OCI remeasurements.
979    pub pension_journal_entries: Vec<JournalEntry>,
980    /// Stock grants (ASC 718 / IFRS 2).
981    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
982    /// Stock-based compensation period expense records.
983    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
984    /// Journal entries generated from stock-based compensation expense.
985    pub stock_comp_journal_entries: Vec<JournalEntry>,
986    /// Payroll runs.
987    pub payroll_run_count: usize,
988    /// Payroll line item count.
989    pub payroll_line_item_count: usize,
990    /// Time entry count.
991    pub time_entry_count: usize,
992    /// Expense report count.
993    pub expense_report_count: usize,
994    /// Benefit enrollment count.
995    pub benefit_enrollment_count: usize,
996    /// Pension plan count.
997    pub pension_plan_count: usize,
998    /// Stock grant count.
999    pub stock_grant_count: usize,
1000}
1001
1002/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
1003#[derive(Debug, Clone, Default)]
1004pub struct AccountingStandardsSnapshot {
1005    /// Revenue recognition contracts (actual data).
1006    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1007    /// Impairment tests (actual data).
1008    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1009    /// Business combinations (IFRS 3 / ASC 805).
1010    pub business_combinations:
1011        Vec<datasynth_core::models::business_combination::BusinessCombination>,
1012    /// Journal entries generated from business combinations (Day 1 + amortization).
1013    pub business_combination_journal_entries: Vec<JournalEntry>,
1014    /// ECL models (IFRS 9 / ASC 326).
1015    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1016    /// ECL provision movements.
1017    pub ecl_provision_movements:
1018        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1019    /// Journal entries from ECL provision.
1020    pub ecl_journal_entries: Vec<JournalEntry>,
1021    /// Provisions (IAS 37 / ASC 450).
1022    pub provisions: Vec<datasynth_core::models::provision::Provision>,
1023    /// Provision movement roll-forwards (IAS 37 / ASC 450).
1024    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1025    /// Contingent liabilities (IAS 37 / ASC 450).
1026    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1027    /// Journal entries from provisions.
1028    pub provision_journal_entries: Vec<JournalEntry>,
1029    /// IAS 21 functional currency translation results (one per entity per period).
1030    pub currency_translation_results:
1031        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1032    /// Revenue recognition contract count.
1033    pub revenue_contract_count: usize,
1034    /// Impairment test count.
1035    pub impairment_test_count: usize,
1036    /// Business combination count.
1037    pub business_combination_count: usize,
1038    /// ECL model count.
1039    pub ecl_model_count: usize,
1040    /// Provision count.
1041    pub provision_count: usize,
1042    /// Currency translation result count (IAS 21).
1043    pub currency_translation_count: usize,
1044    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1045    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1046    /// ROU asset + lease liability details.
1047    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1048    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1049    pub fair_value_measurements:
1050        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1051    /// Framework difference records (dual-reporting only).
1052    pub framework_differences:
1053        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1054    /// Per-entity framework reconciliation (dual-reporting only).
1055    pub framework_reconciliations:
1056        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1057    /// Counts for stats logging.
1058    pub lease_count: usize,
1059    pub fair_value_measurement_count: usize,
1060    pub framework_difference_count: usize,
1061}
1062
1063/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1064#[derive(Debug, Clone, Default)]
1065pub struct ComplianceRegulationsSnapshot {
1066    /// Flattened standard records for output.
1067    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1068    /// Cross-reference records.
1069    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1070    /// Jurisdiction profile records.
1071    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1072    /// Generated audit procedures.
1073    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1074    /// Generated compliance findings.
1075    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1076    /// Generated regulatory filings.
1077    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1078    /// Compliance graph (if graph integration enabled).
1079    pub compliance_graph: Option<datasynth_graph::Graph>,
1080}
1081
1082/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1083#[derive(Debug, Clone, Default)]
1084pub struct ManufacturingSnapshot {
1085    /// Production orders (actual data).
1086    pub production_orders: Vec<ProductionOrder>,
1087    /// Quality inspections (actual data).
1088    pub quality_inspections: Vec<QualityInspection>,
1089    /// Cycle counts (actual data).
1090    pub cycle_counts: Vec<CycleCount>,
1091    /// BOM components (actual data).
1092    pub bom_components: Vec<BomComponent>,
1093    /// Inventory movements (actual data).
1094    pub inventory_movements: Vec<InventoryMovement>,
1095    /// Production order count.
1096    pub production_order_count: usize,
1097    /// Quality inspection count.
1098    pub quality_inspection_count: usize,
1099    /// Cycle count count.
1100    pub cycle_count_count: usize,
1101    /// BOM component count.
1102    pub bom_component_count: usize,
1103    /// Inventory movement count.
1104    pub inventory_movement_count: usize,
1105}
1106
1107/// Sales, KPI, and budget data snapshot.
1108#[derive(Debug, Clone, Default)]
1109pub struct SalesKpiBudgetsSnapshot {
1110    /// Sales quotes (actual data).
1111    pub sales_quotes: Vec<SalesQuote>,
1112    /// Management KPIs (actual data).
1113    pub kpis: Vec<ManagementKpi>,
1114    /// Budgets (actual data).
1115    pub budgets: Vec<Budget>,
1116    /// Sales quote count.
1117    pub sales_quote_count: usize,
1118    /// Management KPI count.
1119    pub kpi_count: usize,
1120    /// Budget line count.
1121    pub budget_line_count: usize,
1122}
1123
1124/// Anomaly labels generated during injection.
1125#[derive(Debug, Clone, Default)]
1126pub struct AnomalyLabels {
1127    /// All anomaly labels.
1128    pub labels: Vec<LabeledAnomaly>,
1129    /// Summary statistics.
1130    pub summary: Option<AnomalySummary>,
1131    /// Count by anomaly type.
1132    pub by_type: HashMap<String, usize>,
1133}
1134
1135/// Balance validation results from running balance tracker.
1136#[derive(Debug, Clone, Default)]
1137pub struct BalanceValidationResult {
1138    /// Whether validation was performed.
1139    pub validated: bool,
1140    /// Whether balance sheet equation is satisfied.
1141    pub is_balanced: bool,
1142    /// Number of entries processed.
1143    pub entries_processed: u64,
1144    /// Total debits across all entries.
1145    pub total_debits: rust_decimal::Decimal,
1146    /// Total credits across all entries.
1147    pub total_credits: rust_decimal::Decimal,
1148    /// Number of accounts tracked.
1149    pub accounts_tracked: usize,
1150    /// Number of companies tracked.
1151    pub companies_tracked: usize,
1152    /// Validation errors encountered.
1153    pub validation_errors: Vec<ValidationError>,
1154    /// Whether any unbalanced entries were found.
1155    pub has_unbalanced_entries: bool,
1156}
1157
1158/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1159#[derive(Debug, Clone, Default)]
1160pub struct TaxSnapshot {
1161    /// Tax jurisdictions.
1162    pub jurisdictions: Vec<TaxJurisdiction>,
1163    /// Tax codes.
1164    pub codes: Vec<TaxCode>,
1165    /// Tax lines computed on documents.
1166    pub tax_lines: Vec<TaxLine>,
1167    /// Tax returns filed per period.
1168    pub tax_returns: Vec<TaxReturn>,
1169    /// Tax provisions.
1170    pub tax_provisions: Vec<TaxProvision>,
1171    /// Withholding tax records.
1172    pub withholding_records: Vec<WithholdingTaxRecord>,
1173    /// Tax anomaly labels.
1174    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1175    /// Jurisdiction count.
1176    pub jurisdiction_count: usize,
1177    /// Code count.
1178    pub code_count: usize,
1179    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1180    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1181    /// Journal entries posting tax payable/receivable from computed tax lines.
1182    pub tax_posting_journal_entries: Vec<JournalEntry>,
1183}
1184
1185/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1186#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1187pub struct IntercompanySnapshot {
1188    /// Group ownership structure (parent/subsidiary/associate relationships).
1189    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1190    /// IC matched pairs (transaction pairs between related entities).
1191    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1192    /// IC journal entries generated from matched pairs (seller side).
1193    pub seller_journal_entries: Vec<JournalEntry>,
1194    /// IC journal entries generated from matched pairs (buyer side).
1195    pub buyer_journal_entries: Vec<JournalEntry>,
1196    /// Elimination entries for consolidation.
1197    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1198    /// NCI measurements derived from group structure ownership percentages.
1199    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1200    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1201    #[serde(skip)]
1202    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1203    /// IC matched pair count.
1204    pub matched_pair_count: usize,
1205    /// IC elimination entry count.
1206    pub elimination_entry_count: usize,
1207    /// IC matching rate (0.0 to 1.0).
1208    pub match_rate: f64,
1209}
1210
1211/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1212#[derive(Debug, Clone, Default)]
1213pub struct EsgSnapshot {
1214    /// Emission records (scope 1, 2, 3).
1215    pub emissions: Vec<EmissionRecord>,
1216    /// Energy consumption records.
1217    pub energy: Vec<EnergyConsumption>,
1218    /// Water usage records.
1219    pub water: Vec<WaterUsage>,
1220    /// Waste records.
1221    pub waste: Vec<WasteRecord>,
1222    /// Workforce diversity metrics.
1223    pub diversity: Vec<WorkforceDiversityMetric>,
1224    /// Pay equity metrics.
1225    pub pay_equity: Vec<PayEquityMetric>,
1226    /// Safety incidents.
1227    pub safety_incidents: Vec<SafetyIncident>,
1228    /// Safety metrics.
1229    pub safety_metrics: Vec<SafetyMetric>,
1230    /// Governance metrics.
1231    pub governance: Vec<GovernanceMetric>,
1232    /// Supplier ESG assessments.
1233    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1234    /// Materiality assessments.
1235    pub materiality: Vec<MaterialityAssessment>,
1236    /// ESG disclosures.
1237    pub disclosures: Vec<EsgDisclosure>,
1238    /// Climate scenarios.
1239    pub climate_scenarios: Vec<ClimateScenario>,
1240    /// ESG anomaly labels.
1241    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1242    /// Total emission record count.
1243    pub emission_count: usize,
1244    /// Total disclosure count.
1245    pub disclosure_count: usize,
1246}
1247
1248/// Treasury data snapshot (cash management, hedging, debt, pooling).
1249#[derive(Debug, Clone, Default)]
1250pub struct TreasurySnapshot {
1251    /// Cash positions (daily balances per account).
1252    pub cash_positions: Vec<CashPosition>,
1253    /// Cash forecasts.
1254    pub cash_forecasts: Vec<CashForecast>,
1255    /// Cash pools.
1256    pub cash_pools: Vec<CashPool>,
1257    /// Cash pool sweep transactions.
1258    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1259    /// Hedging instruments.
1260    pub hedging_instruments: Vec<HedgingInstrument>,
1261    /// Hedge relationships (ASC 815/IFRS 9 designations).
1262    pub hedge_relationships: Vec<HedgeRelationship>,
1263    /// Debt instruments.
1264    pub debt_instruments: Vec<DebtInstrument>,
1265    /// Bank guarantees and letters of credit.
1266    pub bank_guarantees: Vec<BankGuarantee>,
1267    /// Intercompany netting runs.
1268    pub netting_runs: Vec<NettingRun>,
1269    /// Treasury anomaly labels.
1270    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1271    /// Journal entries generated from treasury instruments (debt interest accruals,
1272    /// hedge MTM, cash pool sweeps).
1273    pub journal_entries: Vec<JournalEntry>,
1274}
1275
1276/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1277#[derive(Debug, Clone, Default)]
1278pub struct ProjectAccountingSnapshot {
1279    /// Projects with WBS hierarchies.
1280    pub projects: Vec<Project>,
1281    /// Project cost lines (linked from source documents).
1282    pub cost_lines: Vec<ProjectCostLine>,
1283    /// Revenue recognition records.
1284    pub revenue_records: Vec<ProjectRevenue>,
1285    /// Earned value metrics.
1286    pub earned_value_metrics: Vec<EarnedValueMetric>,
1287    /// Change orders.
1288    pub change_orders: Vec<ChangeOrder>,
1289    /// Project milestones.
1290    pub milestones: Vec<ProjectMilestone>,
1291}
1292
1293/// Complete result of enhanced generation run.
1294#[derive(Debug, Default)]
1295pub struct EnhancedGenerationResult {
1296    /// Generated chart of accounts.
1297    pub chart_of_accounts: ChartOfAccounts,
1298    /// Master data snapshot.
1299    pub master_data: MasterDataSnapshot,
1300    /// Document flow snapshot.
1301    pub document_flows: DocumentFlowSnapshot,
1302    /// Subledger snapshot (linked from document flows).
1303    pub subledger: SubledgerSnapshot,
1304    /// OCPM event log snapshot (if OCPM generation enabled).
1305    pub ocpm: OcpmSnapshot,
1306    /// Audit data snapshot (if audit generation enabled).
1307    pub audit: AuditSnapshot,
1308    /// Banking KYC/AML data snapshot (if banking generation enabled).
1309    pub banking: BankingSnapshot,
1310    /// Graph export snapshot (if graph export enabled).
1311    pub graph_export: GraphExportSnapshot,
1312    /// S2C sourcing data snapshot (if sourcing generation enabled).
1313    pub sourcing: SourcingSnapshot,
1314    /// Financial reporting snapshot (financial statements + bank reconciliations).
1315    pub financial_reporting: FinancialReportingSnapshot,
1316    /// HR data snapshot (payroll, time entries, expenses).
1317    pub hr: HrSnapshot,
1318    /// Accounting standards snapshot (revenue recognition, impairment).
1319    pub accounting_standards: AccountingStandardsSnapshot,
1320    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1321    pub manufacturing: ManufacturingSnapshot,
1322    /// Sales, KPI, and budget snapshot.
1323    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1324    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1325    pub tax: TaxSnapshot,
1326    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1327    pub esg: EsgSnapshot,
1328    /// Treasury data snapshot (cash management, hedging, debt).
1329    pub treasury: TreasurySnapshot,
1330    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1331    pub project_accounting: ProjectAccountingSnapshot,
1332    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1333    pub process_evolution: Vec<ProcessEvolutionEvent>,
1334    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1335    pub organizational_events: Vec<OrganizationalEvent>,
1336    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1337    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1338    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1339    pub intercompany: IntercompanySnapshot,
1340    /// Generated journal entries.
1341    pub journal_entries: Vec<JournalEntry>,
1342    /// Anomaly labels (if injection enabled).
1343    pub anomaly_labels: AnomalyLabels,
1344    /// Balance validation results (if validation enabled).
1345    pub balance_validation: BalanceValidationResult,
1346    /// Data quality statistics (if injection enabled).
1347    pub data_quality_stats: DataQualityStats,
1348    /// Data quality issue records (if injection enabled).
1349    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1350    /// Generation statistics.
1351    pub statistics: EnhancedGenerationStatistics,
1352    /// Data lineage graph (if tracking enabled).
1353    pub lineage: Option<super::lineage::LineageGraph>,
1354    /// Quality gate evaluation result.
1355    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1356    /// Internal controls (if controls generation enabled).
1357    pub internal_controls: Vec<InternalControl>,
1358    /// SoD (Segregation of Duties) violations identified during control application.
1359    ///
1360    /// Each record corresponds to a journal entry where `sod_violation == true`.
1361    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1362    /// Opening balances (if opening balance generation enabled).
1363    pub opening_balances: Vec<GeneratedOpeningBalance>,
1364    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1365    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1366    /// Counterfactual (original, mutated) JE pairs for ML training.
1367    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1368    /// Fraud red-flag indicators on P2P/O2C documents.
1369    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1370    /// Collusion rings (coordinated fraud networks).
1371    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1372    /// Bi-temporal version chains for vendor entities.
1373    pub temporal_vendor_chains:
1374        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1375    /// Entity relationship graph (nodes + edges with strength scores).
1376    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1377    /// Cross-process links (P2P ↔ O2C via inventory movements).
1378    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1379    /// Industry-specific GL accounts and metadata.
1380    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1381    /// SP5.2 — CoA semantic prior snapshot. When `Some`, `write_journal_entries_csv`
1382    /// builds a secondary lookup from the prior's 3,123 corpus accounts and uses
1383    /// it as a fallback when the synthetic CoA index misses a line's `gl_account`
1384    /// (common when SP3.7's per-source attribute conditional emits corpus account
1385    /// numbers that differ from the synthetic CoA master table's number set).
1386    pub coa_semantic_prior:
1387        Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1388    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1389    pub compliance_regulations: ComplianceRegulationsSnapshot,
1390    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1391    /// industry benchmarks, management reports, drift events). Empty
1392    /// when `analytics_metadata.enabled = false`.
1393    pub analytics_metadata: AnalyticsMetadataSnapshot,
1394    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1395    /// KS) over the generated amount distribution.  `None` when
1396    /// `distributions.validation.enabled = false`.
1397    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1398    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1399    /// customer value-segment labels, and industry-specific metadata
1400    /// populated from the previously-inert `vendor_network`,
1401    /// `customer_segmentation`, and `industry_specific` schema
1402    /// sections. Empty when those sections are disabled.
1403    pub interconnectivity: InterconnectivitySnapshot,
1404}
1405
1406/// v4.1.3+: interconnectivity snapshot. Populated when
1407/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1408/// `industry_specific.enabled` are set. Holds tier / segment / industry
1409/// labels for generated entities so downstream tooling (graph export,
1410/// risk models) can consume them without re-deriving from scratch.
1411#[derive(Debug, Clone, Default)]
1412pub struct InterconnectivitySnapshot {
1413    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1414    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1415    pub vendor_tiers: Vec<(String, u8)>,
1416    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1417    /// `"reliable_strategic" / "standard_operational" / "transactional"
1418    /// / "problematic"`.
1419    pub vendor_clusters: Vec<(String, String)>,
1420    /// `(customer_id, value_segment)` pairs where value_segment is one
1421    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1422    pub customer_value_segments: Vec<(String, String)>,
1423    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1424    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1425    /// "churned" / "won_back"`.
1426    pub customer_lifecycle_stages: Vec<(String, String)>,
1427    /// Summary: industry-specific knob applied, if any (e.g.
1428    /// `"manufacturing.bom_depth=3"`).
1429    pub industry_metadata: Vec<String>,
1430}
1431
1432/// v3.3.0: snapshot for the analytics-metadata phase.
1433#[derive(Debug, Clone, Default)]
1434pub struct AnalyticsMetadataSnapshot {
1435    /// Prior-year comparative balances per account, per entity.
1436    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1437    /// Industry benchmarks for the configured industry.
1438    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1439    /// Management-report artefacts (dashboards, MDA sections).
1440    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1441    /// Drift-event labels emitted from the post-generation sweep.
1442    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1443}
1444
1445/// Enhanced statistics about a generation run.
1446#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1447pub struct EnhancedGenerationStatistics {
1448    /// Total journal entries generated.
1449    pub total_entries: u64,
1450    /// Total line items generated.
1451    pub total_line_items: u64,
1452    /// Number of accounts in CoA.
1453    pub accounts_count: usize,
1454    /// Number of companies.
1455    pub companies_count: usize,
1456    /// Period in months.
1457    pub period_months: u32,
1458    /// Master data counts.
1459    pub vendor_count: usize,
1460    pub customer_count: usize,
1461    pub material_count: usize,
1462    pub asset_count: usize,
1463    pub employee_count: usize,
1464    /// Document flow counts.
1465    pub p2p_chain_count: usize,
1466    pub o2c_chain_count: usize,
1467    /// Subledger counts.
1468    pub ap_invoice_count: usize,
1469    pub ar_invoice_count: usize,
1470    /// OCPM counts.
1471    pub ocpm_event_count: usize,
1472    pub ocpm_object_count: usize,
1473    pub ocpm_case_count: usize,
1474    /// Audit counts.
1475    pub audit_engagement_count: usize,
1476    pub audit_workpaper_count: usize,
1477    pub audit_evidence_count: usize,
1478    pub audit_risk_count: usize,
1479    pub audit_finding_count: usize,
1480    pub audit_judgment_count: usize,
1481    /// ISA 505 confirmation counts.
1482    #[serde(default)]
1483    pub audit_confirmation_count: usize,
1484    #[serde(default)]
1485    pub audit_confirmation_response_count: usize,
1486    /// ISA 330/530 procedure step and sample counts.
1487    #[serde(default)]
1488    pub audit_procedure_step_count: usize,
1489    #[serde(default)]
1490    pub audit_sample_count: usize,
1491    /// ISA 520 analytical procedure counts.
1492    #[serde(default)]
1493    pub audit_analytical_result_count: usize,
1494    /// ISA 610 internal audit counts.
1495    #[serde(default)]
1496    pub audit_ia_function_count: usize,
1497    #[serde(default)]
1498    pub audit_ia_report_count: usize,
1499    /// ISA 550 related party counts.
1500    #[serde(default)]
1501    pub audit_related_party_count: usize,
1502    #[serde(default)]
1503    pub audit_related_party_transaction_count: usize,
1504    /// Anomaly counts.
1505    pub anomalies_injected: usize,
1506    /// Data quality issue counts.
1507    pub data_quality_issues: usize,
1508    /// Banking counts.
1509    pub banking_customer_count: usize,
1510    pub banking_account_count: usize,
1511    pub banking_transaction_count: usize,
1512    pub banking_suspicious_count: usize,
1513    /// Graph export counts.
1514    pub graph_export_count: usize,
1515    pub graph_node_count: usize,
1516    pub graph_edge_count: usize,
1517    /// LLM enrichment timing (milliseconds).
1518    #[serde(default)]
1519    pub llm_enrichment_ms: u64,
1520    /// Number of vendor names enriched by LLM.
1521    #[serde(default)]
1522    pub llm_vendors_enriched: usize,
1523    /// v4.1.1+: number of customer names enriched by LLM.
1524    #[serde(default)]
1525    pub llm_customers_enriched: usize,
1526    /// v4.1.1+: number of material descriptions enriched by LLM.
1527    #[serde(default)]
1528    pub llm_materials_enriched: usize,
1529    /// v4.1.1+: number of audit finding titles enriched by LLM.
1530    #[serde(default)]
1531    pub llm_findings_enriched: usize,
1532    /// Diffusion enhancement timing (milliseconds).
1533    #[serde(default)]
1534    pub diffusion_enhancement_ms: u64,
1535    /// Number of diffusion samples generated.
1536    #[serde(default)]
1537    pub diffusion_samples_generated: usize,
1538    /// Hybrid-diffusion blend weight actually applied (after clamp to \[0,1\]).
1539    /// `None` when the neural/hybrid backend is not active.
1540    #[serde(default, skip_serializing_if = "Option::is_none")]
1541    pub neural_hybrid_weight: Option<f64>,
1542    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1543    #[serde(default, skip_serializing_if = "Option::is_none")]
1544    pub neural_hybrid_strategy: Option<String>,
1545    /// How many columns were routed through the neural backend.
1546    #[serde(default, skip_serializing_if = "Option::is_none")]
1547    pub neural_routed_column_count: Option<usize>,
1548    /// Causal generation timing (milliseconds).
1549    #[serde(default)]
1550    pub causal_generation_ms: u64,
1551    /// Number of causal samples generated.
1552    #[serde(default)]
1553    pub causal_samples_generated: usize,
1554    /// Whether causal validation passed.
1555    #[serde(default)]
1556    pub causal_validation_passed: Option<bool>,
1557    /// S2C sourcing counts.
1558    #[serde(default)]
1559    pub sourcing_project_count: usize,
1560    #[serde(default)]
1561    pub rfx_event_count: usize,
1562    #[serde(default)]
1563    pub bid_count: usize,
1564    #[serde(default)]
1565    pub contract_count: usize,
1566    #[serde(default)]
1567    pub catalog_item_count: usize,
1568    #[serde(default)]
1569    pub scorecard_count: usize,
1570    /// Financial reporting counts.
1571    #[serde(default)]
1572    pub financial_statement_count: usize,
1573    #[serde(default)]
1574    pub bank_reconciliation_count: usize,
1575    /// HR counts.
1576    #[serde(default)]
1577    pub payroll_run_count: usize,
1578    #[serde(default)]
1579    pub time_entry_count: usize,
1580    #[serde(default)]
1581    pub expense_report_count: usize,
1582    #[serde(default)]
1583    pub benefit_enrollment_count: usize,
1584    #[serde(default)]
1585    pub pension_plan_count: usize,
1586    #[serde(default)]
1587    pub stock_grant_count: usize,
1588    /// Accounting standards counts.
1589    #[serde(default)]
1590    pub revenue_contract_count: usize,
1591    #[serde(default)]
1592    pub impairment_test_count: usize,
1593    #[serde(default)]
1594    pub business_combination_count: usize,
1595    #[serde(default)]
1596    pub ecl_model_count: usize,
1597    #[serde(default)]
1598    pub provision_count: usize,
1599    /// Manufacturing counts.
1600    #[serde(default)]
1601    pub production_order_count: usize,
1602    #[serde(default)]
1603    pub quality_inspection_count: usize,
1604    #[serde(default)]
1605    pub cycle_count_count: usize,
1606    #[serde(default)]
1607    pub bom_component_count: usize,
1608    #[serde(default)]
1609    pub inventory_movement_count: usize,
1610    /// Sales & reporting counts.
1611    #[serde(default)]
1612    pub sales_quote_count: usize,
1613    #[serde(default)]
1614    pub kpi_count: usize,
1615    #[serde(default)]
1616    pub budget_line_count: usize,
1617    /// Tax counts.
1618    #[serde(default)]
1619    pub tax_jurisdiction_count: usize,
1620    #[serde(default)]
1621    pub tax_code_count: usize,
1622    /// ESG counts.
1623    #[serde(default)]
1624    pub esg_emission_count: usize,
1625    #[serde(default)]
1626    pub esg_disclosure_count: usize,
1627    /// Intercompany counts.
1628    #[serde(default)]
1629    pub ic_matched_pair_count: usize,
1630    #[serde(default)]
1631    pub ic_elimination_count: usize,
1632    /// Number of intercompany journal entries (seller + buyer side).
1633    #[serde(default)]
1634    pub ic_transaction_count: usize,
1635    /// Number of fixed asset subledger records.
1636    #[serde(default)]
1637    pub fa_subledger_count: usize,
1638    /// Number of inventory subledger records.
1639    #[serde(default)]
1640    pub inventory_subledger_count: usize,
1641    /// Treasury debt instrument count.
1642    #[serde(default)]
1643    pub treasury_debt_instrument_count: usize,
1644    /// Treasury hedging instrument count.
1645    #[serde(default)]
1646    pub treasury_hedging_instrument_count: usize,
1647    /// Project accounting project count.
1648    #[serde(default)]
1649    pub project_count: usize,
1650    /// Project accounting change order count.
1651    #[serde(default)]
1652    pub project_change_order_count: usize,
1653    /// Tax provision count.
1654    #[serde(default)]
1655    pub tax_provision_count: usize,
1656    /// Opening balance count.
1657    #[serde(default)]
1658    pub opening_balance_count: usize,
1659    /// Subledger reconciliation count.
1660    #[serde(default)]
1661    pub subledger_reconciliation_count: usize,
1662    /// Tax line count.
1663    #[serde(default)]
1664    pub tax_line_count: usize,
1665    /// Project cost line count.
1666    #[serde(default)]
1667    pub project_cost_line_count: usize,
1668    /// Cash position count.
1669    #[serde(default)]
1670    pub cash_position_count: usize,
1671    /// Cash forecast count.
1672    #[serde(default)]
1673    pub cash_forecast_count: usize,
1674    /// Cash pool count.
1675    #[serde(default)]
1676    pub cash_pool_count: usize,
1677    /// Process evolution event count.
1678    #[serde(default)]
1679    pub process_evolution_event_count: usize,
1680    /// Organizational event count.
1681    #[serde(default)]
1682    pub organizational_event_count: usize,
1683    /// Counterfactual pair count.
1684    #[serde(default)]
1685    pub counterfactual_pair_count: usize,
1686    /// Number of fraud red-flag indicators generated.
1687    #[serde(default)]
1688    pub red_flag_count: usize,
1689    /// Number of collusion rings generated.
1690    #[serde(default)]
1691    pub collusion_ring_count: usize,
1692    /// Number of bi-temporal vendor version chains generated.
1693    #[serde(default)]
1694    pub temporal_version_chain_count: usize,
1695    /// Number of nodes in the entity relationship graph.
1696    #[serde(default)]
1697    pub entity_relationship_node_count: usize,
1698    /// Number of edges in the entity relationship graph.
1699    #[serde(default)]
1700    pub entity_relationship_edge_count: usize,
1701    /// Number of cross-process links generated.
1702    #[serde(default)]
1703    pub cross_process_link_count: usize,
1704    /// Number of disruption events generated.
1705    #[serde(default)]
1706    pub disruption_event_count: usize,
1707    /// Number of industry-specific GL accounts generated.
1708    #[serde(default)]
1709    pub industry_gl_account_count: usize,
1710    /// Number of period-close journal entries generated (tax provision + closing entries).
1711    #[serde(default)]
1712    pub period_close_je_count: usize,
1713}
1714
1715/// Enhanced orchestrator with full feature integration.
1716pub struct EnhancedOrchestrator {
1717    config: GeneratorConfig,
1718    phase_config: PhaseConfig,
1719    coa: Option<Arc<ChartOfAccounts>>,
1720    master_data: MasterDataSnapshot,
1721    seed: u64,
1722    multi_progress: Option<MultiProgress>,
1723    /// Resource guard for memory, disk, and CPU monitoring
1724    resource_guard: ResourceGuard,
1725    /// Output path for disk space monitoring
1726    output_path: Option<PathBuf>,
1727    /// Copula generators for preserving correlations (from fingerprint)
1728    copula_generators: Vec<CopulaGeneratorSpec>,
1729    /// Country pack registry for localized data generation
1730    country_pack_registry: datasynth_core::CountryPackRegistry,
1731    /// Optional streaming sink for phase-by-phase output
1732    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1733    /// Shared template provider for user-supplied template packs.
1734    ///
1735    /// Constructed from `config.templates.path` at orchestrator creation
1736    /// time. When the path is `None`, this is still populated with an
1737    /// embedded-only provider so generators can always call trait methods
1738    /// without an `Option<…>` guard. v3.2.0+.
1739    template_provider: datasynth_core::templates::SharedTemplateProvider,
1740    /// v3.4.1+ temporal context for business-day / holiday awareness.
1741    ///
1742    /// Populated only when `temporal_patterns.business_days.enabled`. When
1743    /// `None`, document-flow / HR / treasury / period-close generators keep
1744    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1745    /// for the same seed).
1746    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1747    /// Optional shard-mode context (set by group-engine shard runners).
1748    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1749    shard_context: Option<crate::shard_context::ShardContext>,
1750    /// SP3.12 — cached priors, shared between `generate_journal_entries` (which
1751    /// loads them) and `generate_jes_from_document_flows` (which applies padding).
1752    /// Set once after the SP3 opt-in block in `generate_journal_entries`.
1753    cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1754}
1755
1756impl EnhancedOrchestrator {
1757    /// Create a new enhanced orchestrator.
1758    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1759        datasynth_config::validate_config(&config)?;
1760
1761        let seed = config.global.seed.unwrap_or_else(rand::random);
1762
1763        // Build resource guard from config
1764        let resource_guard = Self::build_resource_guard(&config, None);
1765
1766        // Build country pack registry from config
1767        let country_pack_registry = match &config.country_packs {
1768            Some(cp) => {
1769                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1770                    .map_err(|e| SynthError::config(e.to_string()))?
1771            }
1772            None => datasynth_core::CountryPackRegistry::builtin_only()
1773                .map_err(|e| SynthError::config(e.to_string()))?,
1774        };
1775
1776        // Build the shared template provider from config.templates.path.
1777        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1778        // `Some(path)` → load file/dir and honour `merge_strategy`.
1779        let template_provider = Self::build_template_provider(&config)?;
1780
1781        // v3.4.1: build a shared temporal context when
1782        // `temporal_patterns.business_days.enabled`. `None` preserves the
1783        // raw-RNG date-offset behaviour per-generator.
1784        let temporal_context = Self::build_temporal_context(&config)?;
1785
1786        Ok(Self {
1787            config,
1788            phase_config,
1789            coa: None,
1790            master_data: MasterDataSnapshot::default(),
1791            seed,
1792            multi_progress: None,
1793            resource_guard,
1794            output_path: None,
1795            copula_generators: Vec::new(),
1796            country_pack_registry,
1797            phase_sink: None,
1798            template_provider,
1799            temporal_context,
1800            shard_context: None,
1801            cached_priors: None,
1802        })
1803    }
1804
1805    /// Install shard-mode context.  Called by the group shard runner
1806    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1807    /// entry point).  Has no effect on single-entity runs.
1808    ///
1809    /// See [`crate::shard_context::ShardContext`] for rationale.
1810    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1811        self.shard_context = Some(ctx);
1812    }
1813
1814    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1815    ///
1816    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1817    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1818    /// enabled. Returns `Err` only for unrecoverable config errors.
1819    fn build_temporal_context(
1820        config: &GeneratorConfig,
1821    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1822        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1823
1824        let tp = &config.temporal_patterns;
1825        if !tp.enabled || !tp.business_days.enabled {
1826            return Ok(None);
1827        }
1828
1829        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1830            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1831        let end_date = start_date + chrono::Months::new(config.global.period_months);
1832
1833        let region_code = tp
1834            .calendars
1835            .regions
1836            .first()
1837            .cloned()
1838            .unwrap_or_else(|| "US".to_string());
1839        let region = parse_region_code(&region_code);
1840
1841        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1842    }
1843
1844    /// Build the shared template provider from `config.templates`.
1845    ///
1846    /// Always returns a provider — falls back to embedded-only when
1847    /// `config.templates.path` is `None`. The merge-strategy from config
1848    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1849    /// orchestrator-construction time are fatal (preferable to silently
1850    /// using embedded pools when the user supplied a bad path).
1851    fn build_template_provider(
1852        config: &GeneratorConfig,
1853    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1854        use datasynth_core::templates::{
1855            loader::{MergeStrategy, TemplateLoader},
1856            DefaultTemplateProvider,
1857        };
1858        use std::sync::Arc;
1859
1860        let provider = match &config.templates.path {
1861            None => DefaultTemplateProvider::new(),
1862            Some(path) => {
1863                let data = if path.is_dir() {
1864                    TemplateLoader::load_from_directory(path)
1865                } else {
1866                    TemplateLoader::load_from_file(path)
1867                }
1868                .map_err(|e| {
1869                    SynthError::config(format!(
1870                        "Failed to load templates from {}: {e}",
1871                        path.display()
1872                    ))
1873                })?;
1874                let strategy = match config.templates.merge_strategy {
1875                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1876                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1877                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1878                        MergeStrategy::MergePreferFile
1879                    }
1880                };
1881                DefaultTemplateProvider::with_templates(data, strategy)
1882            }
1883        };
1884        Ok(Arc::new(provider))
1885    }
1886
1887    /// Create with default phase config.
1888    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1889        Self::new(config, PhaseConfig::default())
1890    }
1891
1892    /// Set a streaming phase sink for real-time output (builder pattern).
1893    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1894        self.phase_sink = Some(sink);
1895        self
1896    }
1897
1898    /// Set a streaming phase sink on an existing orchestrator.
1899    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1900        self.phase_sink = Some(sink);
1901    }
1902
1903    /// Emit a batch of items to the phase sink (if configured).
1904    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1905        if let Some(ref sink) = self.phase_sink {
1906            for item in items {
1907                if let Ok(value) = serde_json::to_value(item) {
1908                    if let Err(e) = sink.emit(phase, type_name, &value) {
1909                        warn!(
1910                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1911                        );
1912                    }
1913                }
1914            }
1915            if let Err(e) = sink.phase_complete(phase) {
1916                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1917            }
1918        }
1919    }
1920
1921    /// Enable/disable progress bars.
1922    pub fn with_progress(mut self, show: bool) -> Self {
1923        self.phase_config.show_progress = show;
1924        if show {
1925            self.multi_progress = Some(MultiProgress::new());
1926        }
1927        self
1928    }
1929
1930    /// Set the output path for disk space monitoring.
1931    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1932        let path = path.into();
1933        self.output_path = Some(path.clone());
1934        // Rebuild resource guard with the output path
1935        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1936        self
1937    }
1938
1939    /// Access the country pack registry.
1940    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1941        &self.country_pack_registry
1942    }
1943
1944    /// Look up a country pack by country code string.
1945    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1946        self.country_pack_registry.get_by_str(country)
1947    }
1948
1949    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1950    /// company, defaulting to `"US"` if no companies are configured.
1951    fn primary_country_code(&self) -> &str {
1952        self.config
1953            .companies
1954            .first()
1955            .map(|c| c.country.as_str())
1956            .unwrap_or("US")
1957    }
1958
1959    /// Resolve the country pack for the primary (first) company.
1960    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1961        self.country_pack_for(self.primary_country_code())
1962    }
1963
1964    /// Resolve the CoA framework from config/country-pack.
1965    fn resolve_coa_framework(&self) -> CoAFramework {
1966        if self.config.accounting_standards.enabled {
1967            match self.config.accounting_standards.framework {
1968                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1969                    return CoAFramework::FrenchPcg;
1970                }
1971                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1972                    return CoAFramework::GermanSkr04;
1973                }
1974                _ => {}
1975            }
1976        }
1977        // Fallback: derive from country pack
1978        let pack = self.primary_pack();
1979        match pack.accounting.framework.as_str() {
1980            "french_gaap" => CoAFramework::FrenchPcg,
1981            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1982            _ => CoAFramework::UsGaap,
1983        }
1984    }
1985
1986    /// Resolve the framework string consumed by
1987    /// [`datasynth_core::framework_accounts::FrameworkAccounts::for_framework`].
1988    ///
1989    /// Mirrors [`Self::resolve_coa_framework`] but returns the snake_case
1990    /// label (`"us_gaap"`, `"ifrs"`, `"french_gaap"`, `"german_gaap"`,
1991    /// `"dual_reporting"`) that the framework-aware account classifier
1992    /// expects. Country drives selection because the country pack's CoA
1993    /// loader is what actually picks the numbering convention (SKR04 for
1994    /// DE, PCG for FR) — the entity's `accounting_framework` label can
1995    /// disagree with the chart it's posted against (e.g. a DE entity
1996    /// flagged `accounting_framework: ifrs` still gets SKR04 codes from
1997    /// its country pack).
1998    fn resolve_framework_str(&self) -> &'static str {
1999        // Country first — the chart of accounts loaded for this company
2000        // is keyed by country pack, so the code numbering convention
2001        // follows country, not the framework label.
2002        match self.primary_country_code().to_ascii_uppercase().as_str() {
2003            "DE" | "AT" => "german_gaap",
2004            "FR" | "BE" | "LU" => "french_gaap",
2005            _ => {
2006                // No country override → take the framework label.
2007                if self.config.accounting_standards.enabled {
2008                    match self.config.accounting_standards.framework {
2009                        Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2010                            return "french_gaap";
2011                        }
2012                        Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2013                            return "german_gaap";
2014                        }
2015                        Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2016                            return "ifrs";
2017                        }
2018                        Some(
2019                            datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2020                        ) => {
2021                            return "dual_reporting";
2022                        }
2023                        Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2024                        | None => {}
2025                    }
2026                }
2027                "us_gaap"
2028            }
2029        }
2030    }
2031
2032    /// Check if copula generators are available.
2033    ///
2034    /// Returns true if the orchestrator has copula generators for preserving
2035    /// correlations (typically from fingerprint-based generation).
2036    pub fn has_copulas(&self) -> bool {
2037        !self.copula_generators.is_empty()
2038    }
2039
2040    /// Get the copula generators.
2041    ///
2042    /// Returns a reference to the copula generators for use during generation.
2043    /// These can be used to generate correlated samples that preserve the
2044    /// statistical relationships from the source data.
2045    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2046        &self.copula_generators
2047    }
2048
2049    /// Get a mutable reference to the copula generators.
2050    ///
2051    /// Allows generators to sample from copulas during data generation.
2052    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2053        &mut self.copula_generators
2054    }
2055
2056    /// Sample correlated values from a named copula.
2057    ///
2058    /// Returns None if the copula doesn't exist.
2059    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2060        self.copula_generators
2061            .iter_mut()
2062            .find(|c| c.name == copula_name)
2063            .map(|c| c.generator.sample())
2064    }
2065
2066    /// Create an orchestrator from a fingerprint file.
2067    ///
2068    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
2069    /// and creates an orchestrator configured to generate data matching
2070    /// the statistical properties of the original data.
2071    ///
2072    /// # Arguments
2073    /// * `fingerprint_path` - Path to the .dsf fingerprint file
2074    /// * `phase_config` - Phase configuration for generation
2075    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2076    ///
2077    /// # Example
2078    /// ```no_run
2079    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
2080    /// use std::path::Path;
2081    ///
2082    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
2083    ///     Path::new("fingerprint.dsf"),
2084    ///     PhaseConfig::default(),
2085    ///     1.0,
2086    /// ).unwrap();
2087    /// ```
2088    pub fn from_fingerprint(
2089        fingerprint_path: &std::path::Path,
2090        phase_config: PhaseConfig,
2091        scale: f64,
2092    ) -> SynthResult<Self> {
2093        info!("Loading fingerprint from: {}", fingerprint_path.display());
2094
2095        // Read the fingerprint
2096        let reader = FingerprintReader::new();
2097        let fingerprint = reader
2098            .read_from_file(fingerprint_path)
2099            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2100
2101        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2102    }
2103
2104    /// Create an orchestrator from a loaded fingerprint.
2105    ///
2106    /// # Arguments
2107    /// * `fingerprint` - The loaded fingerprint
2108    /// * `phase_config` - Phase configuration for generation
2109    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2110    pub fn from_fingerprint_data(
2111        fingerprint: Fingerprint,
2112        phase_config: PhaseConfig,
2113        scale: f64,
2114    ) -> SynthResult<Self> {
2115        info!(
2116            "Synthesizing config from fingerprint (version: {}, tables: {})",
2117            fingerprint.manifest.version,
2118            fingerprint.schema.tables.len()
2119        );
2120
2121        // Generate a seed for the synthesis
2122        let seed: u64 = rand::random();
2123        info!("Fingerprint synthesis seed: {}", seed);
2124
2125        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2126        let options = SynthesisOptions {
2127            scale,
2128            seed: Some(seed),
2129            preserve_correlations: true,
2130            inject_anomalies: true,
2131        };
2132        let synthesizer = ConfigSynthesizer::with_options(options);
2133
2134        // Synthesize full result including copula generators
2135        let synthesis_result = synthesizer
2136            .synthesize_full(&fingerprint, seed)
2137            .map_err(|e| {
2138                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2139            })?;
2140
2141        // Start with a base config from the fingerprint's industry if available
2142        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2143            Self::base_config_for_industry(industry)
2144        } else {
2145            Self::base_config_for_industry("manufacturing")
2146        };
2147
2148        // Apply the synthesized patches
2149        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2150
2151        // Log synthesis results
2152        info!(
2153            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2154            fingerprint.schema.tables.len(),
2155            scale,
2156            synthesis_result.copula_generators.len()
2157        );
2158
2159        if !synthesis_result.copula_generators.is_empty() {
2160            for spec in &synthesis_result.copula_generators {
2161                info!(
2162                    "  Copula '{}' for table '{}': {} columns",
2163                    spec.name,
2164                    spec.table,
2165                    spec.columns.len()
2166                );
2167            }
2168        }
2169
2170        // Create the orchestrator with the synthesized config
2171        let mut orchestrator = Self::new(config, phase_config)?;
2172
2173        // Store copula generators for use during generation
2174        orchestrator.copula_generators = synthesis_result.copula_generators;
2175
2176        Ok(orchestrator)
2177    }
2178
2179    /// Create a base config for a given industry.
2180    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2181        use datasynth_config::presets::create_preset;
2182        use datasynth_config::TransactionVolume;
2183        use datasynth_core::models::{CoAComplexity, IndustrySector};
2184
2185        let sector = match industry.to_lowercase().as_str() {
2186            "manufacturing" => IndustrySector::Manufacturing,
2187            "retail" => IndustrySector::Retail,
2188            "financial" | "financial_services" => IndustrySector::FinancialServices,
2189            "healthcare" => IndustrySector::Healthcare,
2190            "technology" | "tech" => IndustrySector::Technology,
2191            _ => IndustrySector::Manufacturing,
2192        };
2193
2194        // Create a preset with reasonable defaults
2195        create_preset(
2196            sector,
2197            1,  // company count
2198            12, // period months
2199            CoAComplexity::Medium,
2200            TransactionVolume::TenK,
2201        )
2202    }
2203
2204    /// Apply a config patch to a GeneratorConfig.
2205    fn apply_config_patch(
2206        mut config: GeneratorConfig,
2207        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2208    ) -> GeneratorConfig {
2209        use datasynth_fingerprint::synthesis::ConfigValue;
2210
2211        for (key, value) in patch.values() {
2212            match (key.as_str(), value) {
2213                // Transaction count is handled via TransactionVolume enum on companies
2214                // Log it but cannot directly set it (would need to modify company volumes)
2215                ("transactions.count", ConfigValue::Integer(n)) => {
2216                    info!(
2217                        "Fingerprint suggests {} transactions (apply via company volumes)",
2218                        n
2219                    );
2220                }
2221                ("global.period_months", ConfigValue::Integer(n)) => {
2222                    config.global.period_months = (*n).clamp(1, 120) as u32;
2223                }
2224                ("global.start_date", ConfigValue::String(s)) => {
2225                    config.global.start_date = s.clone();
2226                }
2227                ("global.seed", ConfigValue::Integer(n)) => {
2228                    config.global.seed = Some(*n as u64);
2229                }
2230                ("fraud.enabled", ConfigValue::Bool(b)) => {
2231                    config.fraud.enabled = *b;
2232                }
2233                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2234                    config.fraud.fraud_rate = *f;
2235                }
2236                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2237                    config.data_quality.enabled = *b;
2238                }
2239                // Handle anomaly injection paths (mapped to fraud config)
2240                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2241                    config.fraud.enabled = *b;
2242                }
2243                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2244                    config.fraud.fraud_rate = *f;
2245                }
2246                _ => {
2247                    debug!("Ignoring unknown config patch key: {}", key);
2248                }
2249            }
2250        }
2251
2252        config
2253    }
2254
2255    /// Build a resource guard from the configuration.
2256    fn build_resource_guard(
2257        config: &GeneratorConfig,
2258        output_path: Option<PathBuf>,
2259    ) -> ResourceGuard {
2260        let mut builder = ResourceGuardBuilder::new();
2261
2262        // Configure memory limit if set
2263        if config.global.memory_limit_mb > 0 {
2264            builder = builder.memory_limit(config.global.memory_limit_mb);
2265        }
2266
2267        // Configure disk monitoring for output path
2268        if let Some(path) = output_path {
2269            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2270        }
2271
2272        // Use conservative degradation settings for production safety
2273        builder = builder.conservative();
2274
2275        builder.build()
2276    }
2277
2278    /// Check resources (memory, disk, CPU) and return degradation level.
2279    ///
2280    /// Returns an error if hard limits are exceeded.
2281    /// Returns Ok(DegradationLevel) indicating current resource state.
2282    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2283        self.resource_guard.check()
2284    }
2285
2286    /// Check resources with logging.
2287    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2288        let level = self.resource_guard.check()?;
2289
2290        if level != DegradationLevel::Normal {
2291            warn!(
2292                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2293                phase,
2294                level,
2295                self.resource_guard.current_memory_mb(),
2296                self.resource_guard.available_disk_mb()
2297            );
2298        }
2299
2300        Ok(level)
2301    }
2302
2303    /// Get current degradation actions based on resource state.
2304    fn get_degradation_actions(&self) -> DegradationActions {
2305        self.resource_guard.get_actions()
2306    }
2307
2308    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2309    fn check_memory_limit(&self) -> SynthResult<()> {
2310        self.check_resources()?;
2311        Ok(())
2312    }
2313
2314    /// Run the complete generation workflow.
2315    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2316        info!("Starting enhanced generation workflow");
2317        info!(
2318            "Config: industry={:?}, period_months={}, companies={}",
2319            self.config.global.industry,
2320            self.config.global.period_months,
2321            self.config.companies.len()
2322        );
2323
2324        // Set decimal serialization mode (thread-local, affects JSON output).
2325        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2326        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2327        datasynth_core::serde_decimal::set_numeric_native(is_native);
2328        struct NumericModeGuard;
2329        impl Drop for NumericModeGuard {
2330            fn drop(&mut self) {
2331                datasynth_core::serde_decimal::set_numeric_native(false);
2332            }
2333        }
2334        let _numeric_guard = if is_native {
2335            Some(NumericModeGuard)
2336        } else {
2337            None
2338        };
2339
2340        // Initial resource check before starting
2341        let initial_level = self.check_resources_with_log("initial")?;
2342        if initial_level == DegradationLevel::Emergency {
2343            return Err(SynthError::resource(
2344                "Insufficient resources to start generation",
2345            ));
2346        }
2347
2348        let mut stats = EnhancedGenerationStatistics {
2349            companies_count: self.config.companies.len(),
2350            period_months: self.config.global.period_months,
2351            ..Default::default()
2352        };
2353
2354        // Phase 1: Chart of Accounts
2355        let coa = self.phase_chart_of_accounts(&mut stats)?;
2356
2357        // Phase 2: Master Data
2358        self.phase_master_data(&mut stats)?;
2359
2360        // Emit master data to stream sink
2361        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2362        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2363        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2364
2365        // Phase 3: Document Flows + Subledger Linking
2366        let (mut document_flows, mut subledger, fa_journal_entries) =
2367            self.phase_document_flows(&mut stats)?;
2368
2369        // Emit document flows to stream sink
2370        self.emit_phase_items(
2371            "document_flows",
2372            "PurchaseOrder",
2373            &document_flows.purchase_orders,
2374        );
2375        self.emit_phase_items(
2376            "document_flows",
2377            "GoodsReceipt",
2378            &document_flows.goods_receipts,
2379        );
2380        self.emit_phase_items(
2381            "document_flows",
2382            "VendorInvoice",
2383            &document_flows.vendor_invoices,
2384        );
2385        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2386        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2387
2388        // Phase 3b: Opening Balances (before JE generation)
2389        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2390
2391        // Phase 3c: Convert opening balances to journal entries and prepend them.
2392        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2393        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2394        // balance map type.
2395        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2396            .iter()
2397            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2398            .collect();
2399        if !opening_balance_jes.is_empty() {
2400            debug!(
2401                "Prepending {} opening balance JEs to entries",
2402                opening_balance_jes.len()
2403            );
2404        }
2405
2406        // Phase 4: Journal Entries
2407        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2408
2409        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2410        // starts from the correct initial state.
2411        if !opening_balance_jes.is_empty() {
2412            let mut combined = opening_balance_jes;
2413            combined.extend(entries);
2414            entries = combined;
2415        }
2416
2417        // Phase 4c: Append FA acquisition journal entries to main entries
2418        if !fa_journal_entries.is_empty() {
2419            debug!(
2420                "Appending {} FA acquisition JEs to main entries",
2421                fa_journal_entries.len()
2422            );
2423            entries.extend(fa_journal_entries);
2424        }
2425
2426        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2427        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2428
2429        // Get current degradation actions for optional phases
2430        let actions = self.get_degradation_actions();
2431
2432        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2433        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2434
2435        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2436        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2437        if !sourcing.contracts.is_empty() {
2438            let mut linked_count = 0usize;
2439            // Collect (vendor_id, po_id) pairs from P2P chains
2440            let po_vendor_pairs: Vec<(String, String)> = document_flows
2441                .p2p_chains
2442                .iter()
2443                .map(|chain| {
2444                    (
2445                        chain.purchase_order.vendor_id.clone(),
2446                        chain.purchase_order.header.document_id.clone(),
2447                    )
2448                })
2449                .collect();
2450
2451            for chain in &mut document_flows.p2p_chains {
2452                if chain.purchase_order.contract_id.is_none() {
2453                    if let Some(contract) = sourcing
2454                        .contracts
2455                        .iter()
2456                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2457                    {
2458                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2459                        linked_count += 1;
2460                    }
2461                }
2462            }
2463
2464            // Populate reverse FK: purchase_order_ids on each contract
2465            for contract in &mut sourcing.contracts {
2466                let po_ids: Vec<String> = po_vendor_pairs
2467                    .iter()
2468                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2469                    .map(|(_, po_id)| po_id.clone())
2470                    .collect();
2471                if !po_ids.is_empty() {
2472                    contract.purchase_order_ids = po_ids;
2473                }
2474            }
2475
2476            if linked_count > 0 {
2477                debug!(
2478                    "Linked {} purchase orders to S2C contracts by vendor match",
2479                    linked_count
2480                );
2481            }
2482        }
2483
2484        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2485        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2486
2487        // Phase 5c: Append IC journal entries to main entries
2488        if !intercompany.seller_journal_entries.is_empty()
2489            || !intercompany.buyer_journal_entries.is_empty()
2490        {
2491            let ic_je_count = intercompany.seller_journal_entries.len()
2492                + intercompany.buyer_journal_entries.len();
2493            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2494            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2495            debug!(
2496                "Appended {} IC journal entries to main entries",
2497                ic_je_count
2498            );
2499        }
2500
2501        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2502        if !intercompany.elimination_entries.is_empty() {
2503            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2504                &intercompany.elimination_entries,
2505            );
2506            if !elim_jes.is_empty() {
2507                debug!(
2508                    "Appended {} elimination journal entries to main entries",
2509                    elim_jes.len()
2510                );
2511                // IC elimination net-zero assertion (v2.5 hardening)
2512                let elim_debit: rust_decimal::Decimal =
2513                    elim_jes.iter().map(|je| je.total_debit()).sum();
2514                let elim_credit: rust_decimal::Decimal =
2515                    elim_jes.iter().map(|je| je.total_credit()).sum();
2516                let elim_diff = (elim_debit - elim_credit).abs();
2517                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2518                if elim_diff > tolerance {
2519                    return Err(datasynth_core::error::SynthError::generation(format!(
2520                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2521                        elim_debit, elim_credit, elim_diff, tolerance
2522                    )));
2523                }
2524                debug!(
2525                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2526                    elim_debit, elim_credit, elim_diff
2527                );
2528                entries.extend(elim_jes);
2529            }
2530        }
2531
2532        // Phase 5e: Wire IC source documents into document flow snapshot
2533        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2534            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2535                document_flows
2536                    .customer_invoices
2537                    .extend(ic_docs.seller_invoices.iter().cloned());
2538                document_flows
2539                    .purchase_orders
2540                    .extend(ic_docs.buyer_orders.iter().cloned());
2541                document_flows
2542                    .goods_receipts
2543                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2544                document_flows
2545                    .vendor_invoices
2546                    .extend(ic_docs.buyer_invoices.iter().cloned());
2547                debug!(
2548                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2549                    ic_docs.seller_invoices.len(),
2550                    ic_docs.buyer_orders.len(),
2551                    ic_docs.buyer_goods_receipts.len(),
2552                    ic_docs.buyer_invoices.len(),
2553                );
2554            }
2555        }
2556
2557        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2558        let hr = self.phase_hr_data(&mut stats)?;
2559
2560        // Phase 6b: Generate JEs from payroll runs
2561        if !hr.payroll_runs.is_empty() {
2562            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2563            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2564            entries.extend(payroll_jes);
2565        }
2566
2567        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2568        if !hr.pension_journal_entries.is_empty() {
2569            debug!(
2570                "Generated {} JEs from pension plans",
2571                hr.pension_journal_entries.len()
2572            );
2573            entries.extend(hr.pension_journal_entries.iter().cloned());
2574        }
2575
2576        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2577        if !hr.stock_comp_journal_entries.is_empty() {
2578            debug!(
2579                "Generated {} JEs from stock-based compensation",
2580                hr.stock_comp_journal_entries.len()
2581            );
2582            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2583        }
2584
2585        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2586        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2587
2588        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2589        if !manufacturing_snap.production_orders.is_empty() {
2590            let currency = self
2591                .config
2592                .companies
2593                .first()
2594                .map(|c| c.currency.as_str())
2595                .unwrap_or("USD");
2596            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2597                &manufacturing_snap.production_orders,
2598                &manufacturing_snap.quality_inspections,
2599                currency,
2600            );
2601            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2602            entries.extend(mfg_jes);
2603        }
2604
2605        // Phase 7a-warranty: Generate warranty provisions per company
2606        if !manufacturing_snap.quality_inspections.is_empty() {
2607            let framework = match self.config.accounting_standards.framework {
2608                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2609                _ => "US_GAAP",
2610            };
2611            for company in &self.config.companies {
2612                let company_orders: Vec<_> = manufacturing_snap
2613                    .production_orders
2614                    .iter()
2615                    .filter(|o| o.company_code == company.code)
2616                    .cloned()
2617                    .collect();
2618                let company_inspections: Vec<_> = manufacturing_snap
2619                    .quality_inspections
2620                    .iter()
2621                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2622                    .cloned()
2623                    .collect();
2624                if company_inspections.is_empty() {
2625                    continue;
2626                }
2627                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2628                let warranty_result = warranty_gen.generate(
2629                    &company.code,
2630                    &company_orders,
2631                    &company_inspections,
2632                    &company.currency,
2633                    framework,
2634                );
2635                if !warranty_result.journal_entries.is_empty() {
2636                    debug!(
2637                        "Generated {} warranty provision JEs for {}",
2638                        warranty_result.journal_entries.len(),
2639                        company.code
2640                    );
2641                    entries.extend(warranty_result.journal_entries);
2642                }
2643            }
2644        }
2645
2646        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2647        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2648        {
2649            let cogs_currency = self
2650                .config
2651                .companies
2652                .first()
2653                .map(|c| c.currency.as_str())
2654                .unwrap_or("USD");
2655            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2656                &document_flows.deliveries,
2657                &manufacturing_snap.production_orders,
2658                cogs_currency,
2659            );
2660            if !cogs_jes.is_empty() {
2661                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2662                entries.extend(cogs_jes);
2663            }
2664        }
2665
2666        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2667        //
2668        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2669        // subledger inventory positions.  Here we reconcile them so that position balances
2670        // reflect the actual stock movements within the generation period.
2671        if !manufacturing_snap.inventory_movements.is_empty()
2672            && !subledger.inventory_positions.is_empty()
2673        {
2674            use datasynth_core::models::MovementType as MfgMovementType;
2675            let mut receipt_count = 0usize;
2676            let mut issue_count = 0usize;
2677            for movement in &manufacturing_snap.inventory_movements {
2678                // Find a matching position by material code and company
2679                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2680                    p.material_id == movement.material_code
2681                        && p.company_code == movement.entity_code
2682                }) {
2683                    match movement.movement_type {
2684                        MfgMovementType::GoodsReceipt => {
2685                            // Increase stock and update weighted-average cost
2686                            pos.add_quantity(
2687                                movement.quantity,
2688                                movement.value,
2689                                movement.movement_date,
2690                            );
2691                            receipt_count += 1;
2692                        }
2693                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2694                            // Decrease stock (best-effort; silently skip if insufficient)
2695                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2696                            issue_count += 1;
2697                        }
2698                        _ => {}
2699                    }
2700                }
2701            }
2702            debug!(
2703                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2704                manufacturing_snap.inventory_movements.len(),
2705                receipt_count,
2706                issue_count,
2707            );
2708        }
2709
2710        // Update final entry/line-item stats after all JE-generating phases
2711        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2712        if !entries.is_empty() {
2713            stats.total_entries = entries.len() as u64;
2714            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2715            debug!(
2716                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2717                stats.total_entries, stats.total_line_items
2718            );
2719        }
2720
2721        // Phase 7b: Apply internal controls to journal entries
2722        if self.config.internal_controls.enabled && !entries.is_empty() {
2723            info!("Phase 7b: Applying internal controls to journal entries");
2724            let control_config = ControlGeneratorConfig {
2725                exception_rate: self.config.internal_controls.exception_rate,
2726                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2727                enable_sox_marking: true,
2728                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2729                    self.config.internal_controls.sox_materiality_threshold,
2730                )
2731                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2732                ..Default::default()
2733            };
2734            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2735            for entry in &mut entries {
2736                control_gen.apply_controls(entry, &coa);
2737            }
2738            let with_controls = entries
2739                .iter()
2740                .filter(|e| !e.header.control_ids.is_empty())
2741                .count();
2742            info!(
2743                "Applied controls to {} entries ({} with control IDs assigned)",
2744                entries.len(),
2745                with_controls
2746            );
2747        }
2748
2749        // Phase 7c: Extract SoD violations from annotated journal entries.
2750        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2751        // Here we materialise those flags into standalone SodViolation records.
2752        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2753            .iter()
2754            .filter(|e| e.header.sod_violation)
2755            .filter_map(|e| {
2756                e.header.sod_conflict_type.map(|ct| {
2757                    use datasynth_core::models::{RiskLevel, SodViolation};
2758                    let severity = match ct {
2759                        datasynth_core::models::SodConflictType::PaymentReleaser
2760                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2761                            RiskLevel::Critical
2762                        }
2763                        datasynth_core::models::SodConflictType::PreparerApprover
2764                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2765                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2766                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2767                            RiskLevel::High
2768                        }
2769                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2770                            RiskLevel::Medium
2771                        }
2772                    };
2773                    let action = format!(
2774                        "SoD conflict {:?} on entry {} ({})",
2775                        ct, e.header.document_id, e.header.company_code
2776                    );
2777                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2778                })
2779            })
2780            .collect();
2781        if !sod_violations.is_empty() {
2782            info!(
2783                "Phase 7c: Extracted {} SoD violations from {} entries",
2784                sod_violations.len(),
2785                entries.len()
2786            );
2787        }
2788
2789        // Emit journal entries to stream sink (after all JE-generating phases)
2790        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2791
2792        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2793        //
2794        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2795        // document-level fraud are exempt from subsequent line-level flag
2796        // overwrites, and so downstream consumers see a coherent picture.
2797        //
2798        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2799        {
2800            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2801            if self.config.fraud.enabled && doc_rate > 0.0 {
2802                use datasynth_core::fraud_propagation::{
2803                    inject_document_fraud, propagate_documents_to_entries,
2804                };
2805                use datasynth_core::utils::weighted_select;
2806                use datasynth_core::FraudType;
2807                use rand_chacha::rand_core::SeedableRng;
2808
2809                let dist = &self.config.fraud.fraud_type_distribution;
2810                let fraud_type_weights: [(FraudType, f64); 8] = [
2811                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2812                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2813                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2814                    (
2815                        FraudType::ImproperCapitalization,
2816                        dist.expense_capitalization,
2817                    ),
2818                    (FraudType::SplitTransaction, dist.split_transaction),
2819                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2820                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2821                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2822                ];
2823                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2824                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2825                    if weights_sum <= 0.0 {
2826                        FraudType::FictitiousEntry
2827                    } else {
2828                        *weighted_select(rng, &fraud_type_weights)
2829                    }
2830                };
2831
2832                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2833                let mut doc_tagged = 0usize;
2834                macro_rules! inject_into {
2835                    ($collection:expr) => {{
2836                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2837                            $collection.iter_mut().map(|d| &mut d.header).collect();
2838                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2839                    }};
2840                }
2841                inject_into!(document_flows.purchase_orders);
2842                inject_into!(document_flows.goods_receipts);
2843                inject_into!(document_flows.vendor_invoices);
2844                inject_into!(document_flows.payments);
2845                inject_into!(document_flows.sales_orders);
2846                inject_into!(document_flows.deliveries);
2847                inject_into!(document_flows.customer_invoices);
2848                if doc_tagged > 0 {
2849                    info!(
2850                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2851                    );
2852                }
2853
2854                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2855                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2856                        Vec::new();
2857                    headers.extend(
2858                        document_flows
2859                            .purchase_orders
2860                            .iter()
2861                            .map(|d| d.header.clone()),
2862                    );
2863                    headers.extend(
2864                        document_flows
2865                            .goods_receipts
2866                            .iter()
2867                            .map(|d| d.header.clone()),
2868                    );
2869                    headers.extend(
2870                        document_flows
2871                            .vendor_invoices
2872                            .iter()
2873                            .map(|d| d.header.clone()),
2874                    );
2875                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2876                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2877                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2878                    headers.extend(
2879                        document_flows
2880                            .customer_invoices
2881                            .iter()
2882                            .map(|d| d.header.clone()),
2883                    );
2884                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2885                    if propagated > 0 {
2886                        info!(
2887                            "Propagated document-level fraud to {propagated} derived journal entries"
2888                        );
2889                    }
2890                }
2891            }
2892        }
2893
2894        // Phase 8: Anomaly Injection (after all JE-generating phases)
2895        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2896
2897        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2898        // through the anomaly injector.
2899        //
2900        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2901        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2902        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2903        //   - Any external mutation that sets is_fraud after the fact
2904        //
2905        // The anomaly injector already applies the same bias inline when it
2906        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2907        // so gating this sweep on `!is_anomaly` avoids double-application.
2908        //
2909        // Without this sweep, fraud entries from these paths show 0 lift on
2910        // the canonical forensic signals (is_round_1000, is_off_hours,
2911        // is_weekend, is_post_close), which is exactly what the SDK-side
2912        // evaluator caught in v3.1 — fraud features had worse lift than
2913        // baseline. See DS-3.1 post-deploy feedback.
2914        {
2915            use datasynth_core::fraud_bias::{
2916                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2917            };
2918            use rand_chacha::rand_core::SeedableRng;
2919            let cfg = FraudBehavioralBiasConfig::default();
2920            if cfg.enabled {
2921                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2922                let mut swept = 0usize;
2923                for entry in entries.iter_mut() {
2924                    if entry.header.is_fraud && !entry.header.is_anomaly {
2925                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2926                        swept += 1;
2927                    }
2928                }
2929                if swept > 0 {
2930                    info!(
2931                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2932                         (doc-propagated + je_generator intrinsic fraud)"
2933                    );
2934                }
2935            }
2936        }
2937
2938        // Emit anomaly labels to stream sink
2939        self.emit_phase_items(
2940            "anomaly_injection",
2941            "LabeledAnomaly",
2942            &anomaly_labels.labels,
2943        );
2944
2945        // Propagate fraud labels from journal entries to source documents.
2946        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2947        // instead of tracing through document_references.json.
2948        //
2949        // Gated by `fraud.propagate_to_document` (default true) — disable when
2950        // downstream consumers want document fraud flags to reflect only
2951        // document-level injection, not line-level.
2952        if self.config.fraud.propagate_to_document {
2953            use std::collections::HashMap;
2954            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2955            //
2956            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2957            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2958            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2959            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2960            // we register BOTH the prefixed form (raw reference) AND the bare form
2961            // (post-colon portion) in the map. Also register the JE's document_id
2962            // UUID so documents that set `journal_entry_id` match via that path.
2963            //
2964            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2965            // looked up "foo", silently producing 0 propagations.
2966            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2967            for je in &entries {
2968                if je.header.is_fraud {
2969                    if let Some(ref fraud_type) = je.header.fraud_type {
2970                        if let Some(ref reference) = je.header.reference {
2971                            // Register the full reference ("GR:PO-2024-000001")
2972                            fraud_map.insert(reference.clone(), *fraud_type);
2973                            // Also register the bare document ID ("PO-2024-000001")
2974                            // by stripping the "PREFIX:" if present.
2975                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2976                                if !bare.is_empty() {
2977                                    fraud_map.insert(bare.to_string(), *fraud_type);
2978                                }
2979                            }
2980                        }
2981                        // Also tag via journal_entry_id on document headers
2982                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2983                    }
2984                }
2985            }
2986            if !fraud_map.is_empty() {
2987                let mut propagated = 0usize;
2988                // Use DocumentHeader::propagate_fraud method for each doc type
2989                macro_rules! propagate_to {
2990                    ($collection:expr) => {
2991                        for doc in &mut $collection {
2992                            if doc.header.propagate_fraud(&fraud_map) {
2993                                propagated += 1;
2994                            }
2995                        }
2996                    };
2997                }
2998                propagate_to!(document_flows.purchase_orders);
2999                propagate_to!(document_flows.goods_receipts);
3000                propagate_to!(document_flows.vendor_invoices);
3001                propagate_to!(document_flows.payments);
3002                propagate_to!(document_flows.sales_orders);
3003                propagate_to!(document_flows.deliveries);
3004                propagate_to!(document_flows.customer_invoices);
3005                if propagated > 0 {
3006                    info!(
3007                        "Propagated fraud labels to {} document flow records",
3008                        propagated
3009                    );
3010                }
3011            }
3012        }
3013
3014        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
3015        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3016
3017        // Emit red flags to stream sink
3018        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3019
3020        // Phase 26b: Collusion Ring Generation (after red flags)
3021        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3022
3023        // Emit collusion rings to stream sink
3024        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3025
3026        // Phase 8d: W8.1 — TB drift-correction pass.  When a TB anchor prior is
3027        // loaded (industry bundle with real per-account targets), emit balanced
3028        // "SA" adjustment JEs to nudge the synthetic balance sheet toward the
3029        // corpus-median shape before final balance validation runs.
3030        self.phase_tb_drift_correction(&mut entries)?;
3031
3032        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
3033        let balance_validation = self.phase_balance_validation(&entries)?;
3034
3035        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
3036        // chart of accounts. Soft warning by default; hard fail when the
3037        // user passes --validate-coa-coverage / sets the strict flag.
3038        self.validate_coa_coverage(&entries, coa.as_ref())?;
3039
3040        // Phase 9b: GL-to-Subledger Reconciliation
3041        let subledger_reconciliation =
3042            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3043
3044        // Phase 10: Data Quality Injection
3045        let (data_quality_stats, quality_issues) =
3046            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3047
3048        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
3049        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3050
3051        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
3052        {
3053            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
3054
3055            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
3056            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
3057            let mut unbalanced_clean = 0usize;
3058            for je in &entries {
3059                if je.header.is_fraud || je.header.is_anomaly {
3060                    continue;
3061                }
3062                let diff = (je.total_debit() - je.total_credit()).abs();
3063                if diff > tolerance {
3064                    unbalanced_clean += 1;
3065                    if unbalanced_clean <= 3 {
3066                        warn!(
3067                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3068                            je.header.document_id,
3069                            je.total_debit(),
3070                            je.total_credit(),
3071                            diff
3072                        );
3073                    }
3074                }
3075            }
3076            if unbalanced_clean > 0 {
3077                return Err(datasynth_core::error::SynthError::generation(format!(
3078                    "{} non-anomaly JEs are unbalanced (debits != credits). \
3079                     First few logged above. Tolerance={}",
3080                    unbalanced_clean, tolerance
3081                )));
3082            }
3083            debug!(
3084                "Phase 10c: All {} non-anomaly JEs individually balanced",
3085                entries
3086                    .iter()
3087                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3088                    .count()
3089            );
3090
3091            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
3092            let company_codes: Vec<String> = self
3093                .config
3094                .companies
3095                .iter()
3096                .map(|c| c.code.clone())
3097                .collect();
3098            for company_code in &company_codes {
3099                let mut assets = rust_decimal::Decimal::ZERO;
3100                let mut liab_equity = rust_decimal::Decimal::ZERO;
3101
3102                for entry in &entries {
3103                    if entry.header.company_code != *company_code {
3104                        continue;
3105                    }
3106                    for line in &entry.lines {
3107                        let acct = &line.gl_account;
3108                        let net = line.debit_amount - line.credit_amount;
3109                        // Asset accounts (1xxx): normal debit balance
3110                        if acct.starts_with('1') {
3111                            assets += net;
3112                        }
3113                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3114                        else if acct.starts_with('2') || acct.starts_with('3') {
3115                            liab_equity -= net; // credit-normal, so negate debit-net
3116                        }
3117                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3118                        // so they net to zero after closing entries
3119                    }
3120                }
3121
3122                let bs_diff = (assets - liab_equity).abs();
3123                if bs_diff > tolerance {
3124                    warn!(
3125                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3126                         revenue/expense closing entries may not fully offset",
3127                        company_code, assets, liab_equity, bs_diff
3128                    );
3129                    // Warn rather than error: multi-period datasets may have timing
3130                    // differences from accruals/deferrals that resolve in later periods.
3131                    // The TB footing check (Assert 1) is the hard gate.
3132                } else {
3133                    debug!(
3134                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3135                        company_code, assets, liab_equity, bs_diff
3136                    );
3137                }
3138            }
3139
3140            info!("Phase 10c: All generation-time accounting assertions passed");
3141        }
3142
3143        // Phase 11: Audit Data
3144        let audit = self.phase_audit_data(&entries, &mut stats)?;
3145
3146        // Phase 12: Banking KYC/AML Data
3147        let mut banking = self.phase_banking_data(&mut stats)?;
3148
3149        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3150        // Creates coherence between the accounting layer (payments, JEs) and the
3151        // banking layer (bank transactions). A vendor invoice payment now appears
3152        // on both sides with cross-references and fraud labels propagated.
3153        if self.phase_config.generate_banking
3154            && !document_flows.payments.is_empty()
3155            && !banking.accounts.is_empty()
3156        {
3157            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3158            if bridge_rate > 0.0 {
3159                let mut bridge =
3160                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3161                        self.seed,
3162                    );
3163                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3164                    &document_flows.payments,
3165                    &banking.customers,
3166                    &banking.accounts,
3167                    bridge_rate,
3168                );
3169                info!(
3170                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3171                    bridge_stats.bridged_count,
3172                    bridge_stats.transactions_emitted,
3173                    bridge_stats.fraud_propagated,
3174                );
3175                let bridged_count = bridged_txns.len();
3176                banking.transactions.extend(bridged_txns);
3177
3178                // Re-run velocity computation so bridged txns also get features
3179                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3180                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3181                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3182                        &mut banking.transactions,
3183                    );
3184                }
3185
3186                // Recompute suspicious count after bridging
3187                banking.suspicious_count = banking
3188                    .transactions
3189                    .iter()
3190                    .filter(|t| t.is_suspicious)
3191                    .count();
3192                stats.banking_transaction_count = banking.transactions.len();
3193                stats.banking_suspicious_count = banking.suspicious_count;
3194            }
3195        }
3196
3197        // Phase 13: Graph Export
3198        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3199
3200        // Phase 14: LLM Enrichment
3201        self.phase_llm_enrichment(&mut stats);
3202
3203        // Phase 15: Diffusion Enhancement
3204        self.phase_diffusion_enhancement(&entries, &mut stats);
3205
3206        // Phase 16: Causal Overlay
3207        self.phase_causal_overlay(&mut stats);
3208
3209        // Phase 17: Bank Reconciliation + Financial Statements
3210        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3211        // provision data (from accounting_standards / tax snapshots) can be wired in.
3212        let mut financial_reporting = self.phase_financial_reporting(
3213            &document_flows,
3214            &entries,
3215            &coa,
3216            &hr,
3217            &audit,
3218            &mut stats,
3219        )?;
3220
3221        // BS coherence check: assets = liabilities + equity
3222        {
3223            use datasynth_core::models::StatementType;
3224            for stmt in &financial_reporting.consolidated_statements {
3225                if stmt.statement_type == StatementType::BalanceSheet {
3226                    let total_assets: rust_decimal::Decimal = stmt
3227                        .line_items
3228                        .iter()
3229                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3230                        .map(|li| li.amount)
3231                        .sum();
3232                    let total_le: rust_decimal::Decimal = stmt
3233                        .line_items
3234                        .iter()
3235                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3236                        .map(|li| li.amount)
3237                        .sum();
3238                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3239                        warn!(
3240                            "BS equation imbalance: assets={}, L+E={}",
3241                            total_assets, total_le
3242                        );
3243                    }
3244                }
3245            }
3246        }
3247
3248        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3249        let accounting_standards =
3250            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3251
3252        // Phase 18a: Merge ECL journal entries into main GL
3253        if !accounting_standards.ecl_journal_entries.is_empty() {
3254            debug!(
3255                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3256                accounting_standards.ecl_journal_entries.len()
3257            );
3258            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3259        }
3260
3261        // Phase 18a: Merge provision journal entries into main GL
3262        if !accounting_standards.provision_journal_entries.is_empty() {
3263            debug!(
3264                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3265                accounting_standards.provision_journal_entries.len()
3266            );
3267            entries.extend(
3268                accounting_standards
3269                    .provision_journal_entries
3270                    .iter()
3271                    .cloned(),
3272            );
3273        }
3274
3275        // Phase 18b: OCPM Events (after all process data is available)
3276        let mut ocpm = self.phase_ocpm_events(
3277            &document_flows,
3278            &sourcing,
3279            &hr,
3280            &manufacturing_snap,
3281            &banking,
3282            &audit,
3283            &financial_reporting,
3284            &mut stats,
3285        )?;
3286
3287        // Emit OCPM events to stream sink
3288        if let Some(ref event_log) = ocpm.event_log {
3289            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3290        }
3291
3292        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3293        if let Some(ref event_log) = ocpm.event_log {
3294            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3295            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3296                std::collections::HashMap::new();
3297            for (idx, event) in event_log.events.iter().enumerate() {
3298                if let Some(ref doc_ref) = event.document_ref {
3299                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3300                }
3301            }
3302
3303            if !doc_index.is_empty() {
3304                let mut annotated = 0usize;
3305                for entry in &mut entries {
3306                    let doc_id_str = entry.header.document_id.to_string();
3307                    // Collect matching event indices from document_id and reference
3308                    let mut matched_indices: Vec<usize> = Vec::new();
3309                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3310                        matched_indices.extend(indices);
3311                    }
3312                    if let Some(ref reference) = entry.header.reference {
3313                        let bare_ref = reference
3314                            .find(':')
3315                            .map(|i| &reference[i + 1..])
3316                            .unwrap_or(reference.as_str());
3317                        if let Some(indices) = doc_index.get(bare_ref) {
3318                            for &idx in indices {
3319                                if !matched_indices.contains(&idx) {
3320                                    matched_indices.push(idx);
3321                                }
3322                            }
3323                        }
3324                    }
3325                    // Apply matches to JE header
3326                    if !matched_indices.is_empty() {
3327                        for &idx in &matched_indices {
3328                            let event = &event_log.events[idx];
3329                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3330                                entry.header.ocpm_event_ids.push(event.event_id);
3331                            }
3332                            for obj_ref in &event.object_refs {
3333                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3334                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3335                                }
3336                            }
3337                            if entry.header.ocpm_case_id.is_none() {
3338                                entry.header.ocpm_case_id = event.case_id;
3339                            }
3340                        }
3341                        annotated += 1;
3342                    }
3343                }
3344                debug!(
3345                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3346                    annotated
3347                );
3348            }
3349        }
3350
3351        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3352        // IC eliminations, opening balances, standards-driven entries) so
3353        // every JournalEntry carries at least one `ocpm_event_ids` link.
3354        if let Some(ref mut event_log) = ocpm.event_log {
3355            let synthesized =
3356                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3357            if synthesized > 0 {
3358                info!(
3359                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3360                );
3361            }
3362
3363            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3364            // events and their owning CaseTrace. Without this, every exported
3365            // OCEL event has `is_anomaly = false` even when the underlying JE
3366            // was flagged.
3367            let anomaly_events =
3368                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3369            if anomaly_events > 0 {
3370                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3371            }
3372
3373            // Phase 18f: Inject process-variant imperfections (rework, skipped
3374            // steps, out-of-order events) so conformance checkers see
3375            // realistic variant counts and fitness < 1.0. Uses the P2P
3376            // process rates as the single source of truth.
3377            let p2p_cfg = &self.config.ocpm.p2p_process;
3378            let any_imperfection = p2p_cfg.rework_probability > 0.0
3379                || p2p_cfg.skip_step_probability > 0.0
3380                || p2p_cfg.out_of_order_probability > 0.0;
3381            if any_imperfection {
3382                use rand_chacha::rand_core::SeedableRng;
3383                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3384                    rework_rate: p2p_cfg.rework_probability,
3385                    skip_rate: p2p_cfg.skip_step_probability,
3386                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3387                };
3388                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3389                let stats =
3390                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3391                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3392                    info!(
3393                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3394                        stats.rework, stats.skipped, stats.out_of_order
3395                    );
3396                }
3397            }
3398        }
3399
3400        // Phase 19: Sales Quotes, Management KPIs, Budgets
3401        let sales_kpi_budgets =
3402            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3403
3404        // Phase 22: Treasury Data Generation
3405        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3406        // are included in the pre-tax income used by phase_tax_generation.
3407        let treasury =
3408            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3409
3410        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3411        if !treasury.journal_entries.is_empty() {
3412            debug!(
3413                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3414                treasury.journal_entries.len()
3415            );
3416            entries.extend(treasury.journal_entries.iter().cloned());
3417        }
3418
3419        // Phase 20: Tax Generation
3420        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3421
3422        // Phase 20 JEs: Merge tax posting journal entries into main GL
3423        if !tax.tax_posting_journal_entries.is_empty() {
3424            debug!(
3425                "Merging {} tax posting JEs into GL",
3426                tax.tax_posting_journal_entries.len()
3427            );
3428            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3429        }
3430
3431        // Phase 20b: FINAL fraud behavioral bias sweep.
3432        //
3433        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3434        // period close) extend `entries` with new journal entries that may
3435        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3436        // already-fraudulent transactions). Those late additions miss the
3437        // Phase 8b sweep and ship without bias applied — which is exactly
3438        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3439        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3440        //
3441        // Running the sweep one more time here guarantees every is_fraud
3442        // entry — regardless of which phase added it — has bias applied.
3443        // `!is_anomaly` gates out anomaly-injector entries (which already
3444        // got biased inline); the sweep is otherwise idempotent-ish:
3445        // weekend / off_hours re-fire to another valid weekend / off-hour,
3446        // post_close is guarded by `!is_post_close`, and round-dollar
3447        // rescaling on an already-round amount is a no-op (ratio = 1).
3448        {
3449            use datasynth_core::fraud_bias::{
3450                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3451            };
3452            use rand_chacha::rand_core::SeedableRng;
3453            let cfg = FraudBehavioralBiasConfig::default();
3454            if cfg.enabled {
3455                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3456                let mut swept = 0usize;
3457                for entry in entries.iter_mut() {
3458                    if entry.header.is_fraud && !entry.header.is_anomaly {
3459                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3460                        swept += 1;
3461                    }
3462                }
3463                if swept > 0 {
3464                    info!(
3465                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3466                         non-anomaly fraud entries (covers late-added JEs from \
3467                         ECL / provisions / treasury / tax / period-close)"
3468                    );
3469                }
3470            }
3471        }
3472
3473        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3474        // Build supplementary cash flow items from upstream JE data (depreciation,
3475        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3476        {
3477            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3478
3479            let framework_str = {
3480                use datasynth_config::schema::AccountingFrameworkConfig;
3481                match self
3482                    .config
3483                    .accounting_standards
3484                    .framework
3485                    .unwrap_or_default()
3486                {
3487                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3488                        "IFRS"
3489                    }
3490                    _ => "US_GAAP",
3491                }
3492            };
3493
3494            // Sum depreciation debits (account 6000) from close JEs
3495            let depreciation_total: rust_decimal::Decimal = entries
3496                .iter()
3497                .filter(|je| je.header.document_type == "CL")
3498                .flat_map(|je| je.lines.iter())
3499                .filter(|l| l.gl_account.starts_with("6000"))
3500                .map(|l| l.debit_amount)
3501                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3502
3503            // Sum interest expense debits (account 7100)
3504            let interest_paid: rust_decimal::Decimal = entries
3505                .iter()
3506                .flat_map(|je| je.lines.iter())
3507                .filter(|l| l.gl_account.starts_with("7100"))
3508                .map(|l| l.debit_amount)
3509                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3510
3511            // Sum tax expense debits (account 8000)
3512            let tax_paid: rust_decimal::Decimal = entries
3513                .iter()
3514                .flat_map(|je| je.lines.iter())
3515                .filter(|l| l.gl_account.starts_with("8000"))
3516                .map(|l| l.debit_amount)
3517                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3518
3519            // Sum capex debits on fixed assets (account 1500)
3520            let capex: rust_decimal::Decimal = entries
3521                .iter()
3522                .flat_map(|je| je.lines.iter())
3523                .filter(|l| l.gl_account.starts_with("1500"))
3524                .map(|l| l.debit_amount)
3525                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3526
3527            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3528            let dividends_paid: rust_decimal::Decimal = entries
3529                .iter()
3530                .flat_map(|je| je.lines.iter())
3531                .filter(|l| l.gl_account == "2170")
3532                .map(|l| l.debit_amount)
3533                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3534
3535            let cf_data = CashFlowSourceData {
3536                depreciation_total,
3537                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3538                delta_ar: rust_decimal::Decimal::ZERO,
3539                delta_ap: rust_decimal::Decimal::ZERO,
3540                delta_inventory: rust_decimal::Decimal::ZERO,
3541                capex,
3542                debt_issuance: rust_decimal::Decimal::ZERO,
3543                debt_repayment: rust_decimal::Decimal::ZERO,
3544                interest_paid,
3545                tax_paid,
3546                dividends_paid,
3547                framework: framework_str.to_string(),
3548            };
3549
3550            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3551            if !enhanced_cf_items.is_empty() {
3552                // Merge into ALL cash flow statements (standalone + consolidated)
3553                use datasynth_core::models::StatementType;
3554                let merge_count = enhanced_cf_items.len();
3555                for stmt in financial_reporting
3556                    .financial_statements
3557                    .iter_mut()
3558                    .chain(financial_reporting.consolidated_statements.iter_mut())
3559                    .chain(
3560                        financial_reporting
3561                            .standalone_statements
3562                            .values_mut()
3563                            .flat_map(|v| v.iter_mut()),
3564                    )
3565                {
3566                    if stmt.statement_type == StatementType::CashFlowStatement {
3567                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3568                    }
3569                }
3570                info!(
3571                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3572                    merge_count
3573                );
3574            }
3575        }
3576
3577        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3578        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3579        self.generate_notes_to_financial_statements(
3580            &mut financial_reporting,
3581            &accounting_standards,
3582            &tax,
3583            &hr,
3584            &audit,
3585            &treasury,
3586        );
3587
3588        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3589        // When we have 2+ companies, derive segment data from actual journal entries
3590        // to complement or replace the FS-generator-based segments.
3591        if self.config.companies.len() >= 2 && !entries.is_empty() {
3592            let companies: Vec<(String, String)> = self
3593                .config
3594                .companies
3595                .iter()
3596                .map(|c| (c.code.clone(), c.name.clone()))
3597                .collect();
3598            let ic_elim: rust_decimal::Decimal =
3599                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3600            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3601                .unwrap_or(NaiveDate::MIN);
3602            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3603            let period_label = format!(
3604                "{}-{:02}",
3605                end_date.year(),
3606                (end_date - chrono::Days::new(1)).month()
3607            );
3608
3609            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3610            let (je_segments, je_recon) =
3611                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3612            if !je_segments.is_empty() {
3613                info!(
3614                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3615                    je_segments.len(),
3616                    ic_elim,
3617                );
3618                // Replace if existing segment_reports were empty; otherwise supplement
3619                if financial_reporting.segment_reports.is_empty() {
3620                    financial_reporting.segment_reports = je_segments;
3621                    financial_reporting.segment_reconciliations = vec![je_recon];
3622                } else {
3623                    financial_reporting.segment_reports.extend(je_segments);
3624                    financial_reporting.segment_reconciliations.push(je_recon);
3625                }
3626            }
3627        }
3628
3629        // Phase 21: ESG Data Generation
3630        let esg_snap =
3631            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3632
3633        // Phase 23: Project Accounting Data Generation
3634        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3635
3636        // Phase 24: Process Evolution + Organizational Events
3637        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3638
3639        // Phase 24b: Disruption Events
3640        let disruption_events = self.phase_disruption_events(&mut stats)?;
3641
3642        // Phase 27: Bi-Temporal Vendor Version Chains
3643        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3644
3645        // Phase 28: Entity Relationship Graph + Cross-Process Links
3646        let (entity_relationship_graph, cross_process_links) =
3647            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3648
3649        // Phase 29: Industry-specific GL accounts
3650        let industry_output = self.phase_industry_data(&mut stats);
3651
3652        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3653        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3654
3655        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3656        //
3657        // The neural / hybrid diffusion path was a documented L2 stub
3658        // in v3.x; actual neural-network training requires ML
3659        // infrastructure (PyTorch / candle bindings, GPU access,
3660        // training loops) that was never wired through the
3661        // orchestrator. Rather than keep a silently-no-op block that
3662        // misleads users into thinking neural training happens, v4.0
3663        // acknowledges the config — exposing stats so downstream
3664        // tooling can see the request — but emits a clear warning
3665        // when a non-statistical backend is requested. The statistical
3666        // diffusion backend continues to run via
3667        // `phase_diffusion_enhancement`.
3668        //
3669        // Users who need real neural diffusion: track the roadmap item
3670        // in the v4.x backlog and consider contributing the backend
3671        // (the `DiffusionBackend` trait is the integration point).
3672        if self.config.diffusion.enabled
3673            && (self.config.diffusion.backend == "neural"
3674                || self.config.diffusion.backend == "hybrid")
3675        {
3676            let neural = &self.config.diffusion.neural;
3677            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3678            stats.neural_hybrid_weight = Some(weight);
3679            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3680            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3681            warn!(
3682                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3683                 the neural/hybrid training path is not yet shipped. Config \
3684                 is captured in stats (weight={weight:.2}, strategy={}, \
3685                 columns={}) but no neural training runs. Statistical \
3686                 diffusion (backend='statistical') continues to work.",
3687                self.config.diffusion.backend,
3688                neural.hybrid_strategy,
3689                neural.neural_columns.len(),
3690            );
3691        }
3692
3693        // Phase 19b: Hypergraph Export (after all data is available)
3694        self.phase_hypergraph_export(
3695            &coa,
3696            &entries,
3697            &document_flows,
3698            &sourcing,
3699            &hr,
3700            &manufacturing_snap,
3701            &banking,
3702            &audit,
3703            &financial_reporting,
3704            &ocpm,
3705            &compliance_regulations,
3706            &mut stats,
3707        )?;
3708
3709        // Phase 10c: Additional graph builders (approval, entity, banking)
3710        // These run after all data is available since they need banking/IC data.
3711        if self.phase_config.generate_graph_export {
3712            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3713        }
3714
3715        // Log informational messages for config sections not yet fully wired
3716        if self.config.streaming.enabled {
3717            info!("Note: streaming config is enabled but batch mode does not use it");
3718        }
3719        if self.config.vendor_network.enabled {
3720            debug!("Vendor network config available; relationship graph generation is partial");
3721        }
3722        if self.config.customer_segmentation.enabled {
3723            debug!("Customer segmentation config available; segment-aware generation is partial");
3724        }
3725
3726        // Log final resource statistics
3727        let resource_stats = self.resource_guard.stats();
3728        info!(
3729            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3730            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3731            resource_stats.disk.estimated_bytes_written,
3732            resource_stats.degradation_level
3733        );
3734
3735        // Flush any remaining stream sink data
3736        if let Some(ref sink) = self.phase_sink {
3737            if let Err(e) = sink.flush() {
3738                warn!("Stream sink flush failed: {e}");
3739            }
3740        }
3741
3742        // Build data lineage graph
3743        let lineage = self.build_lineage_graph();
3744
3745        // Evaluate quality gates if enabled in config
3746        let gate_result = if self.config.quality_gates.enabled {
3747            let profile_name = &self.config.quality_gates.profile;
3748            match datasynth_eval::gates::get_profile(profile_name) {
3749                Some(profile) => {
3750                    // Build an evaluation populated with actual generation metrics.
3751                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3752
3753                    // Populate balance sheet evaluation from balance validation results
3754                    if balance_validation.validated {
3755                        eval.coherence.balance =
3756                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3757                                equation_balanced: balance_validation.is_balanced,
3758                                max_imbalance: (balance_validation.total_debits
3759                                    - balance_validation.total_credits)
3760                                    .abs(),
3761                                periods_evaluated: 1,
3762                                periods_imbalanced: if balance_validation.is_balanced {
3763                                    0
3764                                } else {
3765                                    1
3766                                },
3767                                period_results: Vec::new(),
3768                                companies_evaluated: self.config.companies.len(),
3769                            });
3770                    }
3771
3772                    // Set coherence passes based on balance validation
3773                    eval.coherence.passes = balance_validation.is_balanced;
3774                    if !balance_validation.is_balanced {
3775                        eval.coherence
3776                            .failures
3777                            .push("Balance sheet equation not satisfied".to_string());
3778                    }
3779
3780                    // Set statistical score based on entry count (basic sanity)
3781                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3782                    eval.statistical.passes = !entries.is_empty();
3783
3784                    // Set quality score from data quality stats
3785                    eval.quality.overall_score = 0.9; // Default high for generated data
3786                    eval.quality.passes = true;
3787
3788                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3789                    info!(
3790                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3791                        profile_name, result.gates_passed, result.gates_total, result.summary
3792                    );
3793                    Some(result)
3794                }
3795                None => {
3796                    warn!(
3797                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3798                        profile_name
3799                    );
3800                    None
3801                }
3802            }
3803        } else {
3804            None
3805        };
3806
3807        // Generate internal controls if enabled
3808        let internal_controls = if self.config.internal_controls.enabled {
3809            InternalControl::standard_controls()
3810        } else {
3811            Vec::new()
3812        };
3813
3814        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3815        // phases (including fraud-bias sweep at Phase 20b) so derived
3816        // outputs reflect final data.
3817        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3818
3819        // v3.5.1: statistical validation over the final amount
3820        // distribution. Runs *after* all JE-adding phases so the report
3821        // reflects everything the user will see in the output. Returns
3822        // `None` unless `distributions.validation.enabled = true`.
3823        let statistical_validation = self.phase_statistical_validation(&entries)?;
3824
3825        // v4.1.3+: interconnectivity snapshot — tier assignments,
3826        // value-segment labels, industry-specific metadata. Runs after
3827        // master data is settled so it can index stable IDs.
3828        let interconnectivity = self.phase_interconnectivity();
3829
3830        // SP5.2 — snapshot the CoA semantic prior (if any) into the result so
3831        // output_writer can use it as a fallback index for account_description
3832        // resolution when the synthetic CoA index misses.
3833        let coa_semantic_prior = self
3834            .cached_priors
3835            .as_ref()
3836            .and_then(|p| p.coa_semantic.clone());
3837
3838        Ok(EnhancedGenerationResult {
3839            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3840            master_data: std::mem::take(&mut self.master_data),
3841            document_flows,
3842            subledger,
3843            ocpm,
3844            audit,
3845            banking,
3846            graph_export,
3847            sourcing,
3848            financial_reporting,
3849            hr,
3850            accounting_standards,
3851            manufacturing: manufacturing_snap,
3852            sales_kpi_budgets,
3853            tax,
3854            esg: esg_snap,
3855            treasury,
3856            project_accounting,
3857            process_evolution,
3858            organizational_events,
3859            disruption_events,
3860            intercompany,
3861            journal_entries: entries,
3862            anomaly_labels,
3863            balance_validation,
3864            data_quality_stats,
3865            quality_issues,
3866            statistics: stats,
3867            lineage: Some(lineage),
3868            gate_result,
3869            internal_controls,
3870            sod_violations,
3871            opening_balances,
3872            subledger_reconciliation,
3873            counterfactual_pairs,
3874            red_flags,
3875            collusion_rings,
3876            temporal_vendor_chains,
3877            entity_relationship_graph,
3878            cross_process_links,
3879            industry_output,
3880            coa_semantic_prior,
3881            compliance_regulations,
3882            analytics_metadata,
3883            statistical_validation,
3884            interconnectivity,
3885        })
3886    }
3887
3888    /// v4.1.3+: populate the interconnectivity snapshot from
3889    /// previously-inert schema sections. Empty when all sections are
3890    /// disabled.
3891    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3892        use rand::{RngExt, SeedableRng};
3893        use rand_chacha::ChaCha8Rng;
3894
3895        let mut snap = InterconnectivitySnapshot::default();
3896        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3897
3898        // --- Vendor network ---
3899        let vn = &self.config.vendor_network;
3900        if vn.enabled {
3901            let total = self.master_data.vendors.len();
3902            if total > 0 {
3903                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3904                let remaining_after_t1 = total.saturating_sub(tier1_count);
3905                let depth = vn.depth.clamp(1, 3);
3906                let tier2_count = if depth >= 2 {
3907                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3908                    (tier1_count * avg).min(remaining_after_t1)
3909                } else {
3910                    0
3911                };
3912                let tier3_count = total
3913                    .saturating_sub(tier1_count)
3914                    .saturating_sub(tier2_count);
3915
3916                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3917                    let tier = if idx < tier1_count {
3918                        1
3919                    } else if idx < tier1_count + tier2_count {
3920                        2
3921                    } else {
3922                        3
3923                    };
3924                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3925
3926                    // Cluster assignment via configured ratios.
3927                    let cl = &vn.clusters;
3928                    let roll: f64 = rng.random();
3929                    let cluster = if roll < cl.reliable_strategic {
3930                        "reliable_strategic"
3931                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3932                        "standard_operational"
3933                    } else if roll
3934                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3935                    {
3936                        "transactional"
3937                    } else {
3938                        "problematic"
3939                    };
3940                    snap.vendor_clusters
3941                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3942                }
3943                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3944            }
3945        }
3946
3947        // --- Customer segmentation ---
3948        let cs = &self.config.customer_segmentation;
3949        if cs.enabled {
3950            let seg = &cs.value_segments;
3951            for customer in &self.master_data.customers {
3952                let roll: f64 = rng.random();
3953                let value_segment = if roll < seg.enterprise.customer_share {
3954                    "enterprise"
3955                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3956                    "mid_market"
3957                } else if roll
3958                    < seg.enterprise.customer_share
3959                        + seg.mid_market.customer_share
3960                        + seg.smb.customer_share
3961                {
3962                    "smb"
3963                } else {
3964                    "consumer"
3965                };
3966                snap.customer_value_segments
3967                    .push((customer.customer_id.clone(), value_segment.to_string()));
3968
3969                let roll2: f64 = rng.random();
3970                let life = &cs.lifecycle;
3971                let lifecycle = if roll2 < life.prospect_rate {
3972                    "prospect"
3973                } else if roll2 < life.prospect_rate + life.new_rate {
3974                    "new"
3975                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3976                    "growth"
3977                } else if roll2
3978                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3979                {
3980                    "mature"
3981                } else if roll2
3982                    < life.prospect_rate
3983                        + life.new_rate
3984                        + life.growth_rate
3985                        + life.mature_rate
3986                        + life.at_risk_rate
3987                {
3988                    "at_risk"
3989                } else if roll2
3990                    < life.prospect_rate
3991                        + life.new_rate
3992                        + life.growth_rate
3993                        + life.mature_rate
3994                        + life.at_risk_rate
3995                        + life.churned_rate
3996                {
3997                    "churned"
3998                } else {
3999                    "won_back"
4000                };
4001                snap.customer_lifecycle_stages
4002                    .push((customer.customer_id.clone(), lifecycle.to_string()));
4003            }
4004        }
4005
4006        // --- Industry-specific metadata (minimal) ---
4007        let is = &self.config.industry_specific;
4008        if is.enabled {
4009            snap.industry_metadata.push(format!(
4010                "industry_specific.enabled=true (industry={:?})",
4011                self.config.global.industry
4012            ));
4013        }
4014
4015        snap
4016    }
4017
4018    // ========================================================================
4019    // Generation Phase Methods
4020    // ========================================================================
4021
4022    /// Phase 1: Generate Chart of Accounts and update statistics.
4023    fn phase_chart_of_accounts(
4024        &mut self,
4025        stats: &mut EnhancedGenerationStatistics,
4026    ) -> SynthResult<Arc<ChartOfAccounts>> {
4027        info!("Phase 1: Generating Chart of Accounts");
4028        let coa = self.generate_coa()?;
4029        stats.accounts_count = coa.account_count();
4030        info!(
4031            "Chart of Accounts generated: {} accounts",
4032            stats.accounts_count
4033        );
4034        self.check_resources_with_log("post-coa")?;
4035        Ok(coa)
4036    }
4037
4038    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
4039    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4040        if self.phase_config.generate_master_data {
4041            info!("Phase 2: Generating Master Data");
4042            self.generate_master_data()?;
4043            stats.vendor_count = self.master_data.vendors.len();
4044            stats.customer_count = self.master_data.customers.len();
4045            stats.material_count = self.master_data.materials.len();
4046            stats.asset_count = self.master_data.assets.len();
4047            stats.employee_count = self.master_data.employees.len();
4048            info!(
4049                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4050                stats.vendor_count, stats.customer_count, stats.material_count,
4051                stats.asset_count, stats.employee_count
4052            );
4053            self.check_resources_with_log("post-master-data")?;
4054        } else {
4055            debug!("Phase 2: Skipped (master data generation disabled)");
4056        }
4057        Ok(())
4058    }
4059
4060    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
4061    fn phase_document_flows(
4062        &mut self,
4063        stats: &mut EnhancedGenerationStatistics,
4064    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4065        let mut document_flows = DocumentFlowSnapshot::default();
4066        let mut subledger = SubledgerSnapshot::default();
4067        // Dunning JEs (interest + charges) accumulated here and merged into the
4068        // main FA-JE list below so they appear in the GL.
4069        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4070
4071        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4072            info!("Phase 3: Generating Document Flows");
4073            self.generate_document_flows(&mut document_flows)?;
4074            stats.p2p_chain_count = document_flows.p2p_chains.len();
4075            stats.o2c_chain_count = document_flows.o2c_chains.len();
4076            info!(
4077                "Document flows generated: {} P2P chains, {} O2C chains",
4078                stats.p2p_chain_count, stats.o2c_chain_count
4079            );
4080
4081            // Phase 3b: Link document flows to subledgers (for data coherence)
4082            debug!("Phase 3b: Linking document flows to subledgers");
4083            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4084            stats.ap_invoice_count = subledger.ap_invoices.len();
4085            stats.ar_invoice_count = subledger.ar_invoices.len();
4086            debug!(
4087                "Subledgers linked: {} AP invoices, {} AR invoices",
4088                stats.ap_invoice_count, stats.ar_invoice_count
4089            );
4090
4091            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
4092            // Without this step the subledger is systematically overstated because
4093            // amount_remaining is set at invoice creation and never reduced by
4094            // the payments that were generated in the document-flow phase.
4095            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4096            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4097            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4098            debug!("Payment settlements applied to AP and AR subledgers");
4099
4100            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
4101            // The as-of date is the last day of the configured period.
4102            if let Ok(start_date) =
4103                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4104            {
4105                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4106                    - chrono::Days::new(1);
4107                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4108                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4109                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4110                // derived from JE-level aggregation and will typically differ. This is a known
4111                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4112                // generated independently. A future reconciliation phase should align them by
4113                // using subledger totals as the authoritative source for BS Receivables.
4114                for company in &self.config.companies {
4115                    let ar_report = ARAgingReport::from_invoices(
4116                        company.code.clone(),
4117                        &subledger.ar_invoices,
4118                        as_of_date,
4119                    );
4120                    subledger.ar_aging_reports.push(ar_report);
4121
4122                    let ap_report = APAgingReport::from_invoices(
4123                        company.code.clone(),
4124                        &subledger.ap_invoices,
4125                        as_of_date,
4126                    );
4127                    subledger.ap_aging_reports.push(ap_report);
4128                }
4129                debug!(
4130                    "AR/AP aging reports built: {} AR, {} AP",
4131                    subledger.ar_aging_reports.len(),
4132                    subledger.ap_aging_reports.len()
4133                );
4134
4135                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4136                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4137                {
4138                    use datasynth_generators::DunningGenerator;
4139                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4140                    for company in &self.config.companies {
4141                        let currency = company.currency.as_str();
4142                        // Collect mutable references to AR invoices for this company
4143                        // (dunning generator updates dunning_info on invoices in-place).
4144                        let mut company_invoices: Vec<
4145                            datasynth_core::models::subledger::ar::ARInvoice,
4146                        > = subledger
4147                            .ar_invoices
4148                            .iter()
4149                            .filter(|inv| inv.company_code == company.code)
4150                            .cloned()
4151                            .collect();
4152
4153                        if company_invoices.is_empty() {
4154                            continue;
4155                        }
4156
4157                        let result = dunning_gen.execute_dunning_run(
4158                            &company.code,
4159                            as_of_date,
4160                            &mut company_invoices,
4161                            currency,
4162                        );
4163
4164                        // Write back updated dunning info to the main AR invoice list
4165                        for updated in &company_invoices {
4166                            if let Some(orig) = subledger
4167                                .ar_invoices
4168                                .iter_mut()
4169                                .find(|i| i.invoice_number == updated.invoice_number)
4170                            {
4171                                orig.dunning_info = updated.dunning_info.clone();
4172                            }
4173                        }
4174
4175                        subledger.dunning_runs.push(result.dunning_run);
4176                        subledger.dunning_letters.extend(result.letters);
4177                        // Dunning JEs (interest + charges) collected into local buffer.
4178                        dunning_journal_entries.extend(result.journal_entries);
4179                    }
4180                    debug!(
4181                        "Dunning runs complete: {} runs, {} letters",
4182                        subledger.dunning_runs.len(),
4183                        subledger.dunning_letters.len()
4184                    );
4185                }
4186            }
4187
4188            self.check_resources_with_log("post-document-flows")?;
4189        } else {
4190            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4191        }
4192
4193        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4194        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4195        if !self.master_data.assets.is_empty() {
4196            debug!("Generating FA subledger records");
4197            let company_code = self
4198                .config
4199                .companies
4200                .first()
4201                .map(|c| c.code.as_str())
4202                .unwrap_or("1000");
4203            let currency = self
4204                .config
4205                .companies
4206                .first()
4207                .map(|c| c.currency.as_str())
4208                .unwrap_or("USD");
4209
4210            let mut fa_gen = datasynth_generators::FAGenerator::new(
4211                datasynth_generators::FAGeneratorConfig::default(),
4212                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4213            );
4214
4215            for asset in &self.master_data.assets {
4216                let (record, je) = fa_gen.generate_asset_acquisition(
4217                    company_code,
4218                    &format!("{:?}", asset.asset_class),
4219                    &asset.description,
4220                    asset.acquisition_date,
4221                    currency,
4222                    asset.cost_center.as_deref(),
4223                );
4224                subledger.fa_records.push(record);
4225                fa_journal_entries.push(je);
4226            }
4227
4228            stats.fa_subledger_count = subledger.fa_records.len();
4229            debug!(
4230                "FA subledger records generated: {} (with {} acquisition JEs)",
4231                stats.fa_subledger_count,
4232                fa_journal_entries.len()
4233            );
4234        }
4235
4236        // Generate Inventory subledger records from master data materials
4237        if !self.master_data.materials.is_empty() {
4238            debug!("Generating Inventory subledger records");
4239            let first_company = self.config.companies.first();
4240            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4241            let inv_currency = first_company
4242                .map(|c| c.currency.clone())
4243                .unwrap_or_else(|| "USD".to_string());
4244
4245            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4246                datasynth_generators::InventoryGeneratorConfig::default(),
4247                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4248                inv_currency.clone(),
4249            );
4250
4251            for (i, material) in self.master_data.materials.iter().enumerate() {
4252                let plant = format!("PLANT{:02}", (i % 3) + 1);
4253                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4254                let initial_qty = rust_decimal::Decimal::from(
4255                    material
4256                        .safety_stock
4257                        .to_string()
4258                        .parse::<i64>()
4259                        .unwrap_or(100),
4260                );
4261
4262                let position = inv_gen.generate_position(
4263                    company_code,
4264                    &plant,
4265                    &storage_loc,
4266                    &material.material_id,
4267                    &material.description,
4268                    initial_qty,
4269                    Some(material.standard_cost),
4270                    &inv_currency,
4271                );
4272                subledger.inventory_positions.push(position);
4273            }
4274
4275            stats.inventory_subledger_count = subledger.inventory_positions.len();
4276            debug!(
4277                "Inventory subledger records generated: {}",
4278                stats.inventory_subledger_count
4279            );
4280        }
4281
4282        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4283        if !subledger.fa_records.is_empty() {
4284            if let Ok(start_date) =
4285                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4286            {
4287                let company_code = self
4288                    .config
4289                    .companies
4290                    .first()
4291                    .map(|c| c.code.as_str())
4292                    .unwrap_or("1000");
4293                let fiscal_year = start_date.year();
4294                let start_period = start_date.month();
4295                let end_period =
4296                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4297
4298                let depr_cfg = FaDepreciationScheduleConfig {
4299                    fiscal_year,
4300                    start_period,
4301                    end_period,
4302                    seed_offset: 800,
4303                };
4304                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4305                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4306                let run_count = runs.len();
4307                subledger.depreciation_runs = runs;
4308                debug!(
4309                    "Depreciation runs generated: {} runs for {} periods",
4310                    run_count, self.config.global.period_months
4311                );
4312            }
4313        }
4314
4315        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4316        if !subledger.inventory_positions.is_empty() {
4317            if let Ok(start_date) =
4318                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4319            {
4320                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4321                    - chrono::Days::new(1);
4322
4323                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4324                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4325
4326                for company in &self.config.companies {
4327                    let result = inv_val_gen.generate(
4328                        &company.code,
4329                        &subledger.inventory_positions,
4330                        as_of_date,
4331                    );
4332                    subledger.inventory_valuations.push(result);
4333                }
4334                debug!(
4335                    "Inventory valuations generated: {} company reports",
4336                    subledger.inventory_valuations.len()
4337                );
4338            }
4339        }
4340
4341        Ok((document_flows, subledger, fa_journal_entries))
4342    }
4343
4344    /// Phase 3c: Generate OCPM events from document flows.
4345    #[allow(clippy::too_many_arguments)]
4346    fn phase_ocpm_events(
4347        &mut self,
4348        document_flows: &DocumentFlowSnapshot,
4349        sourcing: &SourcingSnapshot,
4350        hr: &HrSnapshot,
4351        manufacturing: &ManufacturingSnapshot,
4352        banking: &BankingSnapshot,
4353        audit: &AuditSnapshot,
4354        financial_reporting: &FinancialReportingSnapshot,
4355        stats: &mut EnhancedGenerationStatistics,
4356    ) -> SynthResult<OcpmSnapshot> {
4357        let degradation = self.check_resources()?;
4358        if degradation >= DegradationLevel::Reduced {
4359            debug!(
4360                "Phase skipped due to resource pressure (degradation: {:?})",
4361                degradation
4362            );
4363            return Ok(OcpmSnapshot::default());
4364        }
4365        if self.phase_config.generate_ocpm_events {
4366            info!("Phase 3c: Generating OCPM Events");
4367            let ocpm_snapshot = self.generate_ocpm_events(
4368                document_flows,
4369                sourcing,
4370                hr,
4371                manufacturing,
4372                banking,
4373                audit,
4374                financial_reporting,
4375            )?;
4376            stats.ocpm_event_count = ocpm_snapshot.event_count;
4377            stats.ocpm_object_count = ocpm_snapshot.object_count;
4378            stats.ocpm_case_count = ocpm_snapshot.case_count;
4379            info!(
4380                "OCPM events generated: {} events, {} objects, {} cases",
4381                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4382            );
4383            self.check_resources_with_log("post-ocpm")?;
4384            Ok(ocpm_snapshot)
4385        } else {
4386            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4387            Ok(OcpmSnapshot::default())
4388        }
4389    }
4390
4391    /// Phase 4: Generate journal entries from document flows and standalone generation.
4392    fn phase_journal_entries(
4393        &mut self,
4394        coa: &Arc<ChartOfAccounts>,
4395        document_flows: &DocumentFlowSnapshot,
4396        _stats: &mut EnhancedGenerationStatistics,
4397    ) -> SynthResult<Vec<JournalEntry>> {
4398        let mut entries = Vec::new();
4399
4400        // Phase 4a: Generate JEs from document flows (for data coherence)
4401        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4402            debug!("Phase 4a: Generating JEs from document flows");
4403            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4404            debug!("Generated {} JEs from document flows", flow_entries.len());
4405            entries.extend(flow_entries);
4406        }
4407
4408        // Phase 4b: Generate standalone journal entries
4409        if self.phase_config.generate_journal_entries {
4410            info!("Phase 4: Generating Journal Entries");
4411            let je_entries = self.generate_journal_entries(coa)?;
4412            info!("Generated {} standalone journal entries", je_entries.len());
4413            entries.extend(je_entries);
4414        } else {
4415            debug!("Phase 4: Skipped (journal entry generation disabled)");
4416        }
4417
4418        // Phase 4c (shard mode): inject pre-built IC journal entries from
4419        // `ShardContext`. When running standalone (no group engine), this
4420        // is a no-op. See crate::shard_context::ShardContext for rationale.
4421        if let Some(ctx) = &self.shard_context {
4422            if !ctx.extra_journal_entries.is_empty() {
4423                debug!(
4424                    "Phase 4c: appending {} shard-mode IC journal entries",
4425                    ctx.extra_journal_entries.len()
4426                );
4427                entries.extend(ctx.extra_journal_entries.iter().cloned());
4428            }
4429        }
4430
4431        if !entries.is_empty() {
4432            // Note: stats.total_entries/total_line_items are set in generate()
4433            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4434            self.check_resources_with_log("post-journal-entries")?;
4435        }
4436
4437        Ok(entries)
4438    }
4439
4440    /// Phase 5: Inject anomalies into journal entries.
4441    fn phase_anomaly_injection(
4442        &mut self,
4443        entries: &mut [JournalEntry],
4444        actions: &DegradationActions,
4445        stats: &mut EnhancedGenerationStatistics,
4446    ) -> SynthResult<AnomalyLabels> {
4447        if self.phase_config.inject_anomalies
4448            && !entries.is_empty()
4449            && !actions.skip_anomaly_injection
4450        {
4451            info!("Phase 5: Injecting Anomalies");
4452            let result = self.inject_anomalies(entries)?;
4453            stats.anomalies_injected = result.labels.len();
4454            info!("Injected {} anomalies", stats.anomalies_injected);
4455            self.check_resources_with_log("post-anomaly-injection")?;
4456            Ok(result)
4457        } else if actions.skip_anomaly_injection {
4458            warn!("Phase 5: Skipped due to resource degradation");
4459            Ok(AnomalyLabels::default())
4460        } else {
4461            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4462            Ok(AnomalyLabels::default())
4463        }
4464    }
4465
4466    /// Phase 8d (W8.1): TB drift-correction pass.
4467    ///
4468    /// Builds a `RunningBalanceTracker` over all JEs assembled so far, attaches
4469    /// the TB anchor prior (when available), and — if `drift_correction_needed()`
4470    /// fires for any company — emits one balanced "SA" adjustment JE per company
4471    /// to pull the synthetic balances toward the corpus-median targets.
4472    ///
4473    /// No-op when no TB anchor is loaded (backwards-compatible).
4474    fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4475        // Only proceed when priors with a TB anchor are loaded.
4476        let tb_anchor = match &self.cached_priors {
4477            Some(priors) => match &priors.tb_anchor {
4478                Some(anchor) => anchor.clone(),
4479                None => return Ok(()),
4480            },
4481            None => return Ok(()),
4482        };
4483
4484        if !tb_anchor.has_data() {
4485            return Ok(());
4486        }
4487
4488        tracing::info!(
4489            target: "datasynth_runtime::tb_anchor",
4490            accounts = tb_anchor.per_account.len(),
4491            total_assets = tb_anchor.total_assets,
4492            "W8.1 — TB anchor loaded; running drift-correction pass"
4493        );
4494
4495        // Build a tracker over all current JEs.
4496        let tracker_config = BalanceTrackerConfig {
4497            validate_on_each_entry: false,
4498            track_history: false,
4499            fail_on_validation_error: false,
4500            ..Default::default()
4501        };
4502        let currency = self
4503            .config
4504            .companies
4505            .first()
4506            .map(|c| c.currency.clone())
4507            .unwrap_or_else(|| "USD".to_string());
4508
4509        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4510        tracker.set_tb_anchor(tb_anchor.clone());
4511        let _ = tracker.apply_entries(entries);
4512
4513        // SP5.1 — Diagnostic: log the number of accounts being tracked vs in the
4514        // anchor, plus the top-5 most-drifted accounts for each company so we
4515        // can distinguish "no drift" from "drift below threshold" at a glance.
4516        for company in &self.config.companies {
4517            let code = &company.code;
4518            let drifts = tracker.account_drift(code);
4519            let mut sorted_drifts = drifts.clone();
4520            sorted_drifts.sort_by(|a, b| {
4521                b.1.abs()
4522                    .partial_cmp(&a.1.abs())
4523                    .unwrap_or(std::cmp::Ordering::Equal)
4524            });
4525            let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4526            let correction_needed = tracker.drift_correction_needed(code);
4527            tracing::info!(
4528                target: "datasynth_runtime::tb_anchor",
4529                company = %code,
4530                anchor_accounts = tb_anchor.per_account.len(),
4531                tracked_accounts = drifts.len(),
4532                aggregate_drift = aggregate_drift,
4533                correction_needed = correction_needed,
4534                "W8.1 SP5.1 — per-company drift summary before correction"
4535            );
4536            for (acc, drift) in sorted_drifts.iter().take(5) {
4537                tracing::info!(
4538                    target: "datasynth_runtime::tb_anchor",
4539                    company = %code,
4540                    account = %acc,
4541                    drift = drift,
4542                    "W8.1 SP5.1 — top-5 drifted accounts"
4543                );
4544            }
4545        }
4546
4547        // Derive the posting date: use the last day of the simulation period.
4548        let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4549            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4550            .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4551
4552        // Distinct seed offset so drift-correction draws are independent of other phases.
4553        use rand_chacha::rand_core::SeedableRng as _;
4554        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4555
4556        let mut correction_count = 0usize;
4557        for company in &self.config.companies {
4558            let code = &company.code;
4559            if !tracker.drift_correction_needed(code) {
4560                tracing::debug!(
4561                    target: "datasynth_runtime::tb_anchor",
4562                    company = %code,
4563                    "W8.1 — drift_correction_needed returned false; skipping company"
4564                );
4565                continue;
4566            }
4567            if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4568                tracing::debug!(
4569                    target: "datasynth_runtime::tb_anchor",
4570                    company = %code,
4571                    lines = je.lines.len(),
4572                    debit = %je.total_debit(),
4573                    credit = %je.total_credit(),
4574                    "W8.1 — emitting drift-correction JE"
4575                );
4576                // Apply the correction to the tracker so the running state is current.
4577                let _ = tracker.apply_entry(&je);
4578                entries.push(je);
4579                correction_count += 1;
4580            }
4581        }
4582
4583        if correction_count > 0 {
4584            tracing::info!(
4585                target: "datasynth_runtime::tb_anchor",
4586                correction_count,
4587                "W8.1 — drift-correction pass emitted {} JE(s)",
4588                correction_count
4589            );
4590        } else {
4591            tracing::debug!(
4592                target: "datasynth_runtime::tb_anchor",
4593                "W8.1 — drift-correction pass: no corrections needed"
4594            );
4595        }
4596
4597        Ok(())
4598    }
4599
4600    /// Phase 6: Validate balance sheet equation on journal entries.
4601    fn phase_balance_validation(
4602        &mut self,
4603        entries: &[JournalEntry],
4604    ) -> SynthResult<BalanceValidationResult> {
4605        if self.phase_config.validate_balances && !entries.is_empty() {
4606            debug!("Phase 6: Validating Balances");
4607            let balance_validation = self.validate_journal_entries(entries)?;
4608            if balance_validation.is_balanced {
4609                debug!("Balance validation passed");
4610            } else {
4611                warn!(
4612                    "Balance validation found {} errors",
4613                    balance_validation.validation_errors.len()
4614                );
4615            }
4616            Ok(balance_validation)
4617        } else {
4618            Ok(BalanceValidationResult::default())
4619        }
4620    }
4621
4622    /// Validate that every `gl_account` referenced in `entries` exists in the
4623    /// chart of accounts.
4624    ///
4625    /// Always emits a warn-level log when the COA is missing accounts; in
4626    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4627    /// `SynthError::generation` so the caller can fail fast.
4628    fn validate_coa_coverage(
4629        &self,
4630        entries: &[JournalEntry],
4631        coa: &ChartOfAccounts,
4632    ) -> SynthResult<()> {
4633        if entries.is_empty() {
4634            return Ok(());
4635        }
4636        let coa_set: std::collections::HashSet<&str> = coa
4637            .accounts
4638            .iter()
4639            .map(|a| a.account_number.as_str())
4640            .collect();
4641        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4642        for je in entries {
4643            for line in je.lines.iter() {
4644                if !coa_set.contains(line.gl_account.as_str()) {
4645                    missing.insert(line.gl_account.clone());
4646                }
4647            }
4648        }
4649        if missing.is_empty() {
4650            debug!("COA coverage validation passed");
4651            return Ok(());
4652        }
4653        let msg = format!(
4654            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4655            missing.len(),
4656            missing.iter().take(10).collect::<Vec<_>>()
4657        );
4658        if self.phase_config.validate_coa_coverage_strict {
4659            Err(SynthError::generation(msg))
4660        } else {
4661            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4662            Ok(())
4663        }
4664    }
4665
4666    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4667    fn phase_data_quality_injection(
4668        &mut self,
4669        entries: &mut [JournalEntry],
4670        actions: &DegradationActions,
4671        stats: &mut EnhancedGenerationStatistics,
4672    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4673        if self.phase_config.inject_data_quality
4674            && !entries.is_empty()
4675            && !actions.skip_data_quality
4676        {
4677            info!("Phase 7: Injecting Data Quality Variations");
4678            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4679            stats.data_quality_issues = dq_stats.records_with_issues;
4680            info!("Injected {} data quality issues", stats.data_quality_issues);
4681            self.check_resources_with_log("post-data-quality")?;
4682            Ok((dq_stats, quality_issues))
4683        } else if actions.skip_data_quality {
4684            warn!("Phase 7: Skipped due to resource degradation");
4685            // v4.4.1: report the denominator (entries seen) even when
4686            // injection is skipped, so downstream consumers can tell
4687            // "skipped, 0/N" apart from "ran but found nothing".
4688            Ok((stats_with_denominator(entries.len()), Vec::new()))
4689        } else {
4690            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4691            Ok((stats_with_denominator(entries.len()), Vec::new()))
4692        }
4693    }
4694
4695    /// Phase 10b: Generate period-close journal entries.
4696    ///
4697    /// Generates:
4698    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4699    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4700    ///    for the configured period.
4701    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4702    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4703    ///    earnings via the Income Summary (3600) clearing account.
4704    fn phase_period_close(
4705        &mut self,
4706        entries: &mut Vec<JournalEntry>,
4707        subledger: &SubledgerSnapshot,
4708        stats: &mut EnhancedGenerationStatistics,
4709    ) -> SynthResult<()> {
4710        if !self.phase_config.generate_period_close || entries.is_empty() {
4711            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4712            return Ok(());
4713        }
4714
4715        info!("Phase 10b: Generating period-close journal entries");
4716
4717        use datasynth_core::accounts::{
4718            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4719        };
4720        use rust_decimal::Decimal;
4721
4722        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4723            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4724        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4725        // Posting date for close entries is the last day of the period
4726        let close_date = end_date - chrono::Days::new(1);
4727
4728        // Statutory tax rate (21% — configurable rates come in later tiers)
4729        let tax_rate = Decimal::new(21, 2); // 0.21
4730
4731        // Collect company codes from config
4732        let company_codes: Vec<String> = self
4733            .config
4734            .companies
4735            .iter()
4736            .map(|c| c.code.clone())
4737            .collect();
4738
4739        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4740        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4741        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4742
4743        // --- Depreciation JEs (per asset) ---
4744        // Compute period depreciation for each active fixed asset using straight-line method.
4745        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4746        let period_months = self.config.global.period_months;
4747        for asset in &subledger.fa_records {
4748            // Skip assets that are inactive / fully depreciated / non-depreciable
4749            use datasynth_core::models::subledger::fa::AssetStatus;
4750            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4751                continue;
4752            }
4753            let useful_life_months = asset.useful_life_months();
4754            if useful_life_months == 0 {
4755                // Land or CIP — not depreciated
4756                continue;
4757            }
4758            let salvage_value = asset.salvage_value();
4759            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4760            if depreciable_base == Decimal::ZERO {
4761                continue;
4762            }
4763            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4764                * Decimal::from(period_months))
4765            .round_dp(2);
4766            if period_depr <= Decimal::ZERO {
4767                continue;
4768            }
4769
4770            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4771            depr_header.document_type = "CL".to_string();
4772            depr_header.header_text = Some(format!(
4773                "Depreciation - {} {}",
4774                asset.asset_number, asset.description
4775            ));
4776            depr_header.created_by = "CLOSE_ENGINE".to_string();
4777            depr_header.source = TransactionSource::Automated;
4778            depr_header.business_process = Some(BusinessProcess::R2R);
4779
4780            let doc_id = depr_header.document_id;
4781            let mut depr_je = JournalEntry::new(depr_header);
4782
4783            // DR Depreciation Expense (6000)
4784            depr_je.add_line(JournalEntryLine::debit(
4785                doc_id,
4786                1,
4787                expense_accounts::DEPRECIATION.to_string(),
4788                period_depr,
4789            ));
4790            // CR Accumulated Depreciation (1510)
4791            depr_je.add_line(JournalEntryLine::credit(
4792                doc_id,
4793                2,
4794                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4795                period_depr,
4796            ));
4797
4798            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4799            close_jes.push(depr_je);
4800        }
4801
4802        if !subledger.fa_records.is_empty() {
4803            debug!(
4804                "Generated {} depreciation JEs from {} FA records",
4805                close_jes.len(),
4806                subledger.fa_records.len()
4807            );
4808        }
4809
4810        // --- Accrual entries (standard period-end accruals per company) ---
4811        // Generate standard accrued expense entries (utilities, rent, interest) using
4812        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4813        {
4814            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4815            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4816            // v3.4.3: snap reversal dates to business days. No-op when
4817            // temporal_patterns.business_days is disabled.
4818            if let Some(ctx) = &self.temporal_context {
4819                accrual_gen.set_temporal_context(Arc::clone(ctx));
4820            }
4821
4822            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4823            let accrual_items: &[(&str, &str, &str)] = &[
4824                ("Accrued Utilities", "6200", "2100"),
4825                ("Accrued Rent", "6300", "2100"),
4826                ("Accrued Interest", "6100", "2150"),
4827            ];
4828
4829            for company_code in &company_codes {
4830                // Estimate company revenue from existing JEs
4831                let company_revenue: Decimal = entries
4832                    .iter()
4833                    .filter(|e| e.header.company_code == *company_code)
4834                    .flat_map(|e| e.lines.iter())
4835                    .filter(|l| l.gl_account.starts_with('4'))
4836                    .map(|l| l.credit_amount - l.debit_amount)
4837                    .fold(Decimal::ZERO, |acc, v| acc + v);
4838
4839                if company_revenue <= Decimal::ZERO {
4840                    continue;
4841                }
4842
4843                // Use 0.5% of period revenue per accrual item as a proxy
4844                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4845                if accrual_base <= Decimal::ZERO {
4846                    continue;
4847                }
4848
4849                for (description, expense_acct, liability_acct) in accrual_items {
4850                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4851                        company_code,
4852                        description,
4853                        accrual_base,
4854                        expense_acct,
4855                        liability_acct,
4856                        close_date,
4857                        None,
4858                    );
4859                    close_jes.push(accrual_je);
4860                    if let Some(rev_je) = reversal_je {
4861                        close_jes.push(rev_je);
4862                    }
4863                }
4864            }
4865
4866            debug!(
4867                "Generated accrual entries for {} companies",
4868                company_codes.len()
4869            );
4870        }
4871
4872        for company_code in &company_codes {
4873            // Calculate net income for this company from existing JEs:
4874            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4875            // Revenue (4xxx): credit-normal, so net = credits - debits
4876            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4877            let mut total_revenue = Decimal::ZERO;
4878            let mut total_expenses = Decimal::ZERO;
4879
4880            for entry in entries.iter() {
4881                if entry.header.company_code != *company_code {
4882                    continue;
4883                }
4884                for line in &entry.lines {
4885                    let category = AccountCategory::from_account(&line.gl_account);
4886                    match category {
4887                        AccountCategory::Revenue => {
4888                            // Revenue is credit-normal: net revenue = credits - debits
4889                            total_revenue += line.credit_amount - line.debit_amount;
4890                        }
4891                        AccountCategory::Cogs
4892                        | AccountCategory::OperatingExpense
4893                        | AccountCategory::OtherIncomeExpense
4894                        | AccountCategory::Tax => {
4895                            // Expenses are debit-normal: net expense = debits - credits
4896                            total_expenses += line.debit_amount - line.credit_amount;
4897                        }
4898                        _ => {}
4899                    }
4900                }
4901            }
4902
4903            let pre_tax_income = total_revenue - total_expenses;
4904
4905            // Skip if no income statement activity
4906            if pre_tax_income == Decimal::ZERO {
4907                debug!(
4908                    "Company {}: no pre-tax income, skipping period close",
4909                    company_code
4910                );
4911                continue;
4912            }
4913
4914            // --- Tax provision / DTA JE ---
4915            if pre_tax_income > Decimal::ZERO {
4916                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4917                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4918
4919                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4920                tax_header.document_type = "CL".to_string();
4921                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4922                tax_header.created_by = "CLOSE_ENGINE".to_string();
4923                tax_header.source = TransactionSource::Automated;
4924                tax_header.business_process = Some(BusinessProcess::R2R);
4925
4926                let doc_id = tax_header.document_id;
4927                let mut tax_je = JournalEntry::new(tax_header);
4928
4929                // DR Tax Expense (8000)
4930                tax_je.add_line(JournalEntryLine::debit(
4931                    doc_id,
4932                    1,
4933                    tax_accounts::TAX_EXPENSE.to_string(),
4934                    tax_amount,
4935                ));
4936                // CR Income Tax Payable (2130)
4937                tax_je.add_line(JournalEntryLine::credit(
4938                    doc_id,
4939                    2,
4940                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4941                    tax_amount,
4942                ));
4943
4944                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4945                close_jes.push(tax_je);
4946            } else {
4947                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4948                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4949                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4950                if dta_amount > Decimal::ZERO {
4951                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4952                    dta_header.document_type = "CL".to_string();
4953                    dta_header.header_text =
4954                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4955                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4956                    dta_header.source = TransactionSource::Automated;
4957                    dta_header.business_process = Some(BusinessProcess::R2R);
4958
4959                    let doc_id = dta_header.document_id;
4960                    let mut dta_je = JournalEntry::new(dta_header);
4961
4962                    // DR Deferred Tax Asset (1600)
4963                    dta_je.add_line(JournalEntryLine::debit(
4964                        doc_id,
4965                        1,
4966                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4967                        dta_amount,
4968                    ));
4969                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4970                    // reflecting the benefit of the future deductible temporary difference.
4971                    dta_je.add_line(JournalEntryLine::credit(
4972                        doc_id,
4973                        2,
4974                        tax_accounts::TAX_EXPENSE.to_string(),
4975                        dta_amount,
4976                    ));
4977
4978                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4979                    close_jes.push(dta_je);
4980                    debug!(
4981                        "Company {}: loss year — recognised DTA of {}",
4982                        company_code, dta_amount
4983                    );
4984                }
4985            }
4986
4987            // --- Dividend JEs (v2.4) ---
4988            // If the entity is profitable after tax, declare a 10% dividend payout.
4989            // This runs AFTER tax provision so the dividend is based on post-tax income
4990            // but BEFORE the retained earnings close so the RE transfer reflects the
4991            // reduced balance.
4992            let tax_provision = if pre_tax_income > Decimal::ZERO {
4993                (pre_tax_income * tax_rate).round_dp(2)
4994            } else {
4995                Decimal::ZERO
4996            };
4997            let net_income = pre_tax_income - tax_provision;
4998
4999            if net_income > Decimal::ZERO {
5000                use datasynth_generators::DividendGenerator;
5001                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
5002                let mut div_gen = DividendGenerator::new(self.seed + 460);
5003                let currency_str = self
5004                    .config
5005                    .companies
5006                    .iter()
5007                    .find(|c| c.code == *company_code)
5008                    .map(|c| c.currency.as_str())
5009                    .unwrap_or("USD");
5010                let div_result = div_gen.generate(
5011                    company_code,
5012                    close_date,
5013                    Decimal::new(1, 0), // $1 per share placeholder
5014                    dividend_amount,
5015                    currency_str,
5016                );
5017                let div_je_count = div_result.journal_entries.len();
5018                close_jes.extend(div_result.journal_entries);
5019                debug!(
5020                    "Company {}: declared dividend of {} ({} JEs)",
5021                    company_code, dividend_amount, div_je_count
5022                );
5023            }
5024
5025            // --- Income statement closing JE ---
5026            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
5027            // For a loss year the DTA JE above already recognises the deferred benefit; here we
5028            // close the pre-tax loss into Retained Earnings as-is.
5029            if net_income != Decimal::ZERO {
5030                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5031                close_header.document_type = "CL".to_string();
5032                close_header.header_text =
5033                    Some(format!("Income statement close - {}", company_code));
5034                close_header.created_by = "CLOSE_ENGINE".to_string();
5035                close_header.source = TransactionSource::Automated;
5036                close_header.business_process = Some(BusinessProcess::R2R);
5037
5038                let doc_id = close_header.document_id;
5039                let mut close_je = JournalEntry::new(close_header);
5040
5041                let abs_net_income = net_income.abs();
5042
5043                if net_income > Decimal::ZERO {
5044                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
5045                    close_je.add_line(JournalEntryLine::debit(
5046                        doc_id,
5047                        1,
5048                        equity_accounts::INCOME_SUMMARY.to_string(),
5049                        abs_net_income,
5050                    ));
5051                    close_je.add_line(JournalEntryLine::credit(
5052                        doc_id,
5053                        2,
5054                        equity_accounts::RETAINED_EARNINGS.to_string(),
5055                        abs_net_income,
5056                    ));
5057                } else {
5058                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
5059                    close_je.add_line(JournalEntryLine::debit(
5060                        doc_id,
5061                        1,
5062                        equity_accounts::RETAINED_EARNINGS.to_string(),
5063                        abs_net_income,
5064                    ));
5065                    close_je.add_line(JournalEntryLine::credit(
5066                        doc_id,
5067                        2,
5068                        equity_accounts::INCOME_SUMMARY.to_string(),
5069                        abs_net_income,
5070                    ));
5071                }
5072
5073                debug_assert!(
5074                    close_je.is_balanced(),
5075                    "Income statement closing JE must be balanced"
5076                );
5077                close_jes.push(close_je);
5078            }
5079        }
5080
5081        let close_count = close_jes.len();
5082        if close_count > 0 {
5083            info!("Generated {} period-close journal entries", close_count);
5084            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5085            entries.extend(close_jes);
5086            stats.period_close_je_count = close_count;
5087
5088            // Update total entry/line-item stats
5089            stats.total_entries = entries.len() as u64;
5090            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5091        } else {
5092            debug!("No period-close entries generated (no income statement activity)");
5093        }
5094
5095        Ok(())
5096    }
5097
5098    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
5099    fn phase_audit_data(
5100        &mut self,
5101        entries: &[JournalEntry],
5102        stats: &mut EnhancedGenerationStatistics,
5103    ) -> SynthResult<AuditSnapshot> {
5104        if self.phase_config.generate_audit {
5105            info!("Phase 8: Generating Audit Data");
5106            let audit_snapshot = self.generate_audit_data(entries)?;
5107            stats.audit_engagement_count = audit_snapshot.engagements.len();
5108            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5109            stats.audit_evidence_count = audit_snapshot.evidence.len();
5110            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5111            stats.audit_finding_count = audit_snapshot.findings.len();
5112            stats.audit_judgment_count = audit_snapshot.judgments.len();
5113            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5114            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5115            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5116            stats.audit_sample_count = audit_snapshot.samples.len();
5117            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5118            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5119            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5120            stats.audit_related_party_count = audit_snapshot.related_parties.len();
5121            stats.audit_related_party_transaction_count =
5122                audit_snapshot.related_party_transactions.len();
5123            info!(
5124                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5125                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5126                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5127                 {} RP transactions",
5128                stats.audit_engagement_count,
5129                stats.audit_workpaper_count,
5130                stats.audit_evidence_count,
5131                stats.audit_risk_count,
5132                stats.audit_finding_count,
5133                stats.audit_judgment_count,
5134                stats.audit_confirmation_count,
5135                stats.audit_procedure_step_count,
5136                stats.audit_sample_count,
5137                stats.audit_analytical_result_count,
5138                stats.audit_ia_function_count,
5139                stats.audit_ia_report_count,
5140                stats.audit_related_party_count,
5141                stats.audit_related_party_transaction_count,
5142            );
5143            self.check_resources_with_log("post-audit")?;
5144            Ok(audit_snapshot)
5145        } else {
5146            debug!("Phase 8: Skipped (audit generation disabled)");
5147            Ok(AuditSnapshot::default())
5148        }
5149    }
5150
5151    /// Phase 9: Generate banking KYC/AML data.
5152    fn phase_banking_data(
5153        &mut self,
5154        stats: &mut EnhancedGenerationStatistics,
5155    ) -> SynthResult<BankingSnapshot> {
5156        if self.phase_config.generate_banking {
5157            info!("Phase 9: Generating Banking KYC/AML Data");
5158            let banking_snapshot = self.generate_banking_data()?;
5159            stats.banking_customer_count = banking_snapshot.customers.len();
5160            stats.banking_account_count = banking_snapshot.accounts.len();
5161            stats.banking_transaction_count = banking_snapshot.transactions.len();
5162            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5163            info!(
5164                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5165                stats.banking_customer_count, stats.banking_account_count,
5166                stats.banking_transaction_count, stats.banking_suspicious_count
5167            );
5168            self.check_resources_with_log("post-banking")?;
5169            Ok(banking_snapshot)
5170        } else {
5171            debug!("Phase 9: Skipped (banking generation disabled)");
5172            Ok(BankingSnapshot::default())
5173        }
5174    }
5175
5176    /// Phase 10: Export accounting network graphs for ML training.
5177    fn phase_graph_export(
5178        &mut self,
5179        entries: &[JournalEntry],
5180        coa: &Arc<ChartOfAccounts>,
5181        stats: &mut EnhancedGenerationStatistics,
5182    ) -> SynthResult<GraphExportSnapshot> {
5183        if self.phase_config.generate_graph_export && !entries.is_empty() {
5184            info!("Phase 10: Exporting Accounting Network Graphs");
5185            match self.export_graphs(entries, coa, stats) {
5186                Ok(snapshot) => {
5187                    info!(
5188                        "Graph export complete: {} graphs ({} nodes, {} edges)",
5189                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5190                    );
5191                    Ok(snapshot)
5192                }
5193                Err(e) => {
5194                    warn!("Phase 10: Graph export failed: {}", e);
5195                    Ok(GraphExportSnapshot::default())
5196                }
5197            }
5198        } else {
5199            debug!("Phase 10: Skipped (graph export disabled or no entries)");
5200            Ok(GraphExportSnapshot::default())
5201        }
5202    }
5203
5204    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
5205    #[allow(clippy::too_many_arguments)]
5206    fn phase_hypergraph_export(
5207        &self,
5208        coa: &Arc<ChartOfAccounts>,
5209        entries: &[JournalEntry],
5210        document_flows: &DocumentFlowSnapshot,
5211        sourcing: &SourcingSnapshot,
5212        hr: &HrSnapshot,
5213        manufacturing: &ManufacturingSnapshot,
5214        banking: &BankingSnapshot,
5215        audit: &AuditSnapshot,
5216        financial_reporting: &FinancialReportingSnapshot,
5217        ocpm: &OcpmSnapshot,
5218        compliance: &ComplianceRegulationsSnapshot,
5219        stats: &mut EnhancedGenerationStatistics,
5220    ) -> SynthResult<()> {
5221        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5222            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5223            match self.export_hypergraph(
5224                coa,
5225                entries,
5226                document_flows,
5227                sourcing,
5228                hr,
5229                manufacturing,
5230                banking,
5231                audit,
5232                financial_reporting,
5233                ocpm,
5234                compliance,
5235                stats,
5236            ) {
5237                Ok(info) => {
5238                    info!(
5239                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5240                        info.node_count, info.edge_count, info.hyperedge_count
5241                    );
5242                }
5243                Err(e) => {
5244                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5245                }
5246            }
5247        } else {
5248            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5249        }
5250        Ok(())
5251    }
5252
5253    /// Phase 11: LLM Enrichment.
5254    ///
5255    /// Uses an LLM provider (mock by default) to enrich vendor names with
5256    /// realistic, context-aware names. This phase is non-blocking: failures
5257    /// log a warning but do not stop the generation pipeline.
5258    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5259        if !self.config.llm.enabled {
5260            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5261            return;
5262        }
5263
5264        info!("Phase 11: Starting LLM Enrichment");
5265        let start = std::time::Instant::now();
5266
5267        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5268            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5269            // and the corresponding API key environment variable is present.
5270            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5271                let schema_provider = &self.config.llm.provider;
5272                let api_key_env = match schema_provider.as_str() {
5273                    "openai" => Some("OPENAI_API_KEY"),
5274                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5275                    "custom" => Some("LLM_API_KEY"),
5276                    _ => None,
5277                };
5278                if let Some(key_env) = api_key_env {
5279                    if std::env::var(key_env).is_ok() {
5280                        let llm_config = datasynth_core::llm::LlmConfig {
5281                            model: self.config.llm.model.clone(),
5282                            api_key_env: key_env.to_string(),
5283                            ..datasynth_core::llm::LlmConfig::default()
5284                        };
5285                        match HttpLlmProvider::new(llm_config) {
5286                            Ok(p) => Arc::new(p),
5287                            Err(e) => {
5288                                warn!(
5289                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5290                                    e
5291                                );
5292                                Arc::new(MockLlmProvider::new(self.seed))
5293                            }
5294                        }
5295                    } else {
5296                        Arc::new(MockLlmProvider::new(self.seed))
5297                    }
5298                } else {
5299                    Arc::new(MockLlmProvider::new(self.seed))
5300                }
5301            };
5302            // v4.1.1+: multi-category enrichment. Vendors remain the
5303            // default path; customers and materials opt in via
5304            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5305            let industry = format!("{:?}", self.config.global.industry);
5306
5307            let vendor_enricher =
5308                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5309            let max_vendors = self
5310                .config
5311                .llm
5312                .max_vendor_enrichments
5313                .min(self.master_data.vendors.len());
5314            let mut vendors_enriched = 0usize;
5315            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5316                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5317                    Ok(name) => {
5318                        vendor.name = name;
5319                        vendors_enriched += 1;
5320                    }
5321                    Err(e) => warn!(
5322                        "LLM vendor enrichment failed for {}: {}",
5323                        vendor.vendor_id, e
5324                    ),
5325                }
5326            }
5327
5328            let mut customers_enriched = 0usize;
5329            if self.config.llm.enrich_customers {
5330                let customer_enricher =
5331                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5332                        &provider,
5333                    ));
5334                let max_customers = self
5335                    .config
5336                    .llm
5337                    .max_customer_enrichments
5338                    .min(self.master_data.customers.len());
5339                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5340                    match customer_enricher.enrich_customer_name(
5341                        &industry,
5342                        "general",
5343                        &customer.country,
5344                    ) {
5345                        Ok(name) => {
5346                            customer.name = name;
5347                            customers_enriched += 1;
5348                        }
5349                        Err(e) => warn!(
5350                            "LLM customer enrichment failed for {}: {}",
5351                            customer.customer_id, e
5352                        ),
5353                    }
5354                }
5355            }
5356
5357            let mut materials_enriched = 0usize;
5358            if self.config.llm.enrich_materials {
5359                let material_enricher =
5360                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5361                        &provider,
5362                    ));
5363                let max_materials = self
5364                    .config
5365                    .llm
5366                    .max_material_enrichments
5367                    .min(self.master_data.materials.len());
5368                for material in self.master_data.materials.iter_mut().take(max_materials) {
5369                    let material_type = format!("{:?}", material.material_type);
5370                    match material_enricher.enrich_material_description(&material_type, &industry) {
5371                        Ok(desc) => {
5372                            material.description = desc;
5373                            materials_enriched += 1;
5374                        }
5375                        Err(e) => warn!(
5376                            "LLM material enrichment failed for {}: {}",
5377                            material.material_id, e
5378                        ),
5379                    }
5380                }
5381            }
5382
5383            (vendors_enriched, customers_enriched, materials_enriched)
5384        }));
5385
5386        match result {
5387            Ok((v, c, m)) => {
5388                stats.llm_vendors_enriched = v;
5389                stats.llm_customers_enriched = c;
5390                stats.llm_materials_enriched = m;
5391                let elapsed = start.elapsed();
5392                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5393                info!(
5394                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5395                    v, c, m, stats.llm_enrichment_ms
5396                );
5397            }
5398            Err(_) => {
5399                let elapsed = start.elapsed();
5400                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5401                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5402            }
5403        }
5404    }
5405
5406    /// Phase 12: Diffusion Enhancement.
5407    ///
5408    /// Generates a sample set matching distribution properties from the
5409    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5410    /// - `"statistical"` (default) — moment-matching backend, always fast.
5411    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5412    ///   the `neural` Cargo feature; falls back to statistical when the
5413    ///   feature isn't compiled in, with a loud warning.
5414    ///
5415    /// This phase is non-blocking: failures log a warning but do not
5416    /// stop the pipeline.
5417    fn phase_diffusion_enhancement(
5418        &self,
5419        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5420        stats: &mut EnhancedGenerationStatistics,
5421    ) {
5422        if !self.config.diffusion.enabled {
5423            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5424            return;
5425        }
5426
5427        info!("Phase 12: Starting Diffusion Enhancement");
5428        let start = std::time::Instant::now();
5429
5430        let backend_choice = self.config.diffusion.backend.as_str();
5431        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5432
5433        if use_neural {
5434            #[cfg(feature = "neural")]
5435            {
5436                match self.run_neural_diffusion_phase(entries) {
5437                    Ok(sample_count) => {
5438                        stats.diffusion_samples_generated = sample_count;
5439                        let elapsed = start.elapsed();
5440                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5441                        info!(
5442                            "Phase 12 complete ({}): {} samples in {}ms",
5443                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5444                        );
5445                        return;
5446                    }
5447                    Err(e) => {
5448                        warn!(
5449                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5450                        );
5451                        // Fall through to statistical path below.
5452                    }
5453                }
5454            }
5455            #[cfg(not(feature = "neural"))]
5456            {
5457                warn!(
5458                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5459                     not compiled in — falling back to statistical. Rebuild with \
5460                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5461                    backend_choice
5462                );
5463            }
5464        } else if !matches!(backend_choice, "statistical" | "") {
5465            warn!(
5466                "Phase 12: unknown backend '{}', falling back to statistical",
5467                backend_choice
5468            );
5469        }
5470
5471        // Statistical path (default + fallback).
5472        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5473            let means = vec![5000.0, 3.0, 2.0];
5474            let stds = vec![2000.0, 1.5, 1.0];
5475
5476            let diffusion_config = DiffusionConfig {
5477                n_steps: self.config.diffusion.n_steps,
5478                seed: self.seed,
5479                ..Default::default()
5480            };
5481
5482            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5483            let n_samples = self.config.diffusion.sample_size;
5484            let n_features = 3;
5485            backend.generate(n_samples, n_features, self.seed).len()
5486        }));
5487
5488        match result {
5489            Ok(sample_count) => {
5490                stats.diffusion_samples_generated = sample_count;
5491                let elapsed = start.elapsed();
5492                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5493                info!(
5494                    "Phase 12 complete (statistical): {} samples in {}ms",
5495                    sample_count, stats.diffusion_enhancement_ms
5496                );
5497            }
5498            Err(_) => {
5499                let elapsed = start.elapsed();
5500                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5501                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5502            }
5503        }
5504    }
5505
5506    /// Neural-backend execution — either load a pre-trained checkpoint
5507    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5508    /// from the first batch of JE amounts. Returns the sample count
5509    /// produced; any error bubbles up to the statistical fallback.
5510    #[cfg(feature = "neural")]
5511    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5512        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5513
5514        if entries.is_empty() {
5515            return Err(SynthError::generation(
5516                "neural diffusion: no journal entries available as training data",
5517            ));
5518        }
5519
5520        let training_data: Vec<Vec<f64>> = entries
5521            .iter()
5522            .take(5000)
5523            .map(|je| {
5524                let total_amount: f64 = je
5525                    .lines
5526                    .iter()
5527                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5528                    .map(|l| {
5529                        use rust_decimal::prelude::ToPrimitive;
5530                        l.debit_amount.to_f64().unwrap_or(0.0)
5531                    })
5532                    .sum();
5533                let line_count = je.lines.len() as f64;
5534                // Use the approval-workflow depth as the third feature
5535                // (proxy for complexity / risk). `None` → 1.
5536                let approval_level = je
5537                    .header
5538                    .approval_workflow
5539                    .as_ref()
5540                    .map(|w| w.required_levels as f64)
5541                    .unwrap_or(1.0);
5542                vec![total_amount, line_count, approval_level]
5543            })
5544            .collect();
5545
5546        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5547
5548        let cfg = &self.config.diffusion;
5549        let neural_cfg = &cfg.neural;
5550
5551        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5552            neural_cfg.checkpoint_path.as_ref()
5553        {
5554            let path = std::path::Path::new(ckpt_path);
5555            info!(
5556                "  Neural diffusion: loading checkpoint from {}",
5557                path.display()
5558            );
5559            NeuralDiffusionBackend::load(path)
5560                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5561        } else {
5562            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5563            info!(
5564                "  Neural diffusion: training score network on {} rows × {} features, \
5565                     {} epochs, hidden_dims={:?}",
5566                training_data.len(),
5567                n_features,
5568                neural_cfg.training_epochs,
5569                neural_cfg.hidden_dims
5570            );
5571            let training_config = NeuralTrainingConfig {
5572                n_steps: cfg.n_steps,
5573                schedule: cfg.schedule.clone(),
5574                hidden_dims: neural_cfg.hidden_dims.clone(),
5575                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5576                learning_rate: neural_cfg.learning_rate,
5577                epochs: neural_cfg.training_epochs,
5578                batch_size: neural_cfg.batch_size,
5579            };
5580            let (backend, report) =
5581                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5582                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5583            info!(
5584                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5585                report.epochs_completed, report.final_loss
5586            );
5587            backend
5588        };
5589
5590        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5591        Ok(samples.len())
5592    }
5593
5594    /// Phase 13: Causal Overlay.
5595    ///
5596    /// Builds a structural causal model from a built-in template (e.g.,
5597    /// fraud_detection) and generates causal samples. Optionally validates
5598    /// that the output respects the causal structure. This phase is
5599    /// non-blocking: failures log a warning but do not stop the pipeline.
5600    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5601        if !self.config.causal.enabled {
5602            debug!("Phase 13: Skipped (causal generation disabled)");
5603            return;
5604        }
5605
5606        info!("Phase 13: Starting Causal Overlay");
5607        let start = std::time::Instant::now();
5608
5609        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5610            // Select template based on config
5611            let graph = match self.config.causal.template.as_str() {
5612                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5613                _ => CausalGraph::fraud_detection_template(),
5614            };
5615
5616            let scm = StructuralCausalModel::new(graph.clone())
5617                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5618
5619            let n_samples = self.config.causal.sample_size;
5620            let samples = scm
5621                .generate(n_samples, self.seed)
5622                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5623
5624            // Optionally validate causal structure
5625            let validation_passed = if self.config.causal.validate {
5626                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5627                if report.valid {
5628                    info!(
5629                        "Causal validation passed: all {} checks OK",
5630                        report.checks.len()
5631                    );
5632                } else {
5633                    warn!(
5634                        "Causal validation: {} violations detected: {:?}",
5635                        report.violations.len(),
5636                        report.violations
5637                    );
5638                }
5639                Some(report.valid)
5640            } else {
5641                None
5642            };
5643
5644            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5645        }));
5646
5647        match result {
5648            Ok(Ok((sample_count, validation_passed))) => {
5649                stats.causal_samples_generated = sample_count;
5650                stats.causal_validation_passed = validation_passed;
5651                let elapsed = start.elapsed();
5652                stats.causal_generation_ms = elapsed.as_millis() as u64;
5653                info!(
5654                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5655                    sample_count, stats.causal_generation_ms, validation_passed,
5656                );
5657            }
5658            Ok(Err(e)) => {
5659                let elapsed = start.elapsed();
5660                stats.causal_generation_ms = elapsed.as_millis() as u64;
5661                warn!("Phase 13: Causal generation failed: {}", e);
5662            }
5663            Err(_) => {
5664                let elapsed = start.elapsed();
5665                stats.causal_generation_ms = elapsed.as_millis() as u64;
5666                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5667            }
5668        }
5669    }
5670
5671    /// Phase 14: Generate S2C sourcing data.
5672    fn phase_sourcing_data(
5673        &mut self,
5674        stats: &mut EnhancedGenerationStatistics,
5675    ) -> SynthResult<SourcingSnapshot> {
5676        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5677            debug!("Phase 14: Skipped (sourcing generation disabled)");
5678            return Ok(SourcingSnapshot::default());
5679        }
5680        let degradation = self.check_resources()?;
5681        if degradation >= DegradationLevel::Reduced {
5682            debug!(
5683                "Phase skipped due to resource pressure (degradation: {:?})",
5684                degradation
5685            );
5686            return Ok(SourcingSnapshot::default());
5687        }
5688
5689        info!("Phase 14: Generating S2C Sourcing Data");
5690        let seed = self.seed;
5691
5692        // Gather vendor data from master data
5693        let vendor_ids: Vec<String> = self
5694            .master_data
5695            .vendors
5696            .iter()
5697            .map(|v| v.vendor_id.clone())
5698            .collect();
5699        if vendor_ids.is_empty() {
5700            debug!("Phase 14: Skipped (no vendors available)");
5701            return Ok(SourcingSnapshot::default());
5702        }
5703
5704        let categories: Vec<(String, String)> = vec![
5705            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5706            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5707            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5708            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5709            ("CAT-LOG".to_string(), "Logistics".to_string()),
5710        ];
5711        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5712            .iter()
5713            .map(|(id, name)| {
5714                (
5715                    id.clone(),
5716                    name.clone(),
5717                    rust_decimal::Decimal::from(100_000),
5718                )
5719            })
5720            .collect();
5721
5722        let company_code = self
5723            .config
5724            .companies
5725            .first()
5726            .map(|c| c.code.as_str())
5727            .unwrap_or("1000");
5728        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5729            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5730        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5731        let fiscal_year = start_date.year() as u16;
5732        let owner_ids: Vec<String> = self
5733            .master_data
5734            .employees
5735            .iter()
5736            .take(5)
5737            .map(|e| e.employee_id.clone())
5738            .collect();
5739        let owner_id = owner_ids
5740            .first()
5741            .map(std::string::String::as_str)
5742            .unwrap_or("BUYER-001");
5743
5744        // Step 1: Spend Analysis
5745        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5746        let spend_analyses =
5747            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5748
5749        // Step 2: Sourcing Projects
5750        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5751        let sourcing_projects = if owner_ids.is_empty() {
5752            Vec::new()
5753        } else {
5754            project_gen.generate(
5755                company_code,
5756                &categories_with_spend,
5757                &owner_ids,
5758                start_date,
5759                self.config.global.period_months,
5760            )
5761        };
5762        stats.sourcing_project_count = sourcing_projects.len();
5763
5764        // Step 3: Qualifications
5765        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5766        let mut qual_gen = QualificationGenerator::new(seed + 2);
5767        let qualifications = qual_gen.generate(
5768            company_code,
5769            &qual_vendor_ids,
5770            sourcing_projects.first().map(|p| p.project_id.as_str()),
5771            owner_id,
5772            start_date,
5773        );
5774
5775        // Step 4: RFx Events
5776        let mut rfx_gen = RfxGenerator::new(seed + 3);
5777        let rfx_events: Vec<RfxEvent> = sourcing_projects
5778            .iter()
5779            .map(|proj| {
5780                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5781                rfx_gen.generate(
5782                    company_code,
5783                    &proj.project_id,
5784                    &proj.category_id,
5785                    &qualified_vids,
5786                    owner_id,
5787                    start_date,
5788                    50000.0,
5789                )
5790            })
5791            .collect();
5792        stats.rfx_event_count = rfx_events.len();
5793
5794        // Step 5: Bids
5795        let mut bid_gen = BidGenerator::new(seed + 4);
5796        let mut all_bids = Vec::new();
5797        for rfx in &rfx_events {
5798            let bidder_count = vendor_ids.len().clamp(2, 5);
5799            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5800            let bids = bid_gen.generate(rfx, &responding, start_date);
5801            all_bids.extend(bids);
5802        }
5803        stats.bid_count = all_bids.len();
5804
5805        // Step 6: Bid Evaluations
5806        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5807        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5808            .iter()
5809            .map(|rfx| {
5810                let rfx_bids: Vec<SupplierBid> = all_bids
5811                    .iter()
5812                    .filter(|b| b.rfx_id == rfx.rfx_id)
5813                    .cloned()
5814                    .collect();
5815                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5816            })
5817            .collect();
5818
5819        // Step 7: Contracts from winning bids
5820        let mut contract_gen = ContractGenerator::new(seed + 6);
5821        let contracts: Vec<ProcurementContract> = bid_evaluations
5822            .iter()
5823            .zip(rfx_events.iter())
5824            .filter_map(|(eval, rfx)| {
5825                eval.ranked_bids.first().and_then(|winner| {
5826                    all_bids
5827                        .iter()
5828                        .find(|b| b.bid_id == winner.bid_id)
5829                        .map(|winning_bid| {
5830                            contract_gen.generate_from_bid(
5831                                winning_bid,
5832                                Some(&rfx.sourcing_project_id),
5833                                &rfx.category_id,
5834                                owner_id,
5835                                start_date,
5836                            )
5837                        })
5838                })
5839            })
5840            .collect();
5841        stats.contract_count = contracts.len();
5842
5843        // Step 8: Catalog Items
5844        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5845        let catalog_items = catalog_gen.generate(&contracts);
5846        stats.catalog_item_count = catalog_items.len();
5847
5848        // Step 9: Scorecards
5849        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5850        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5851            .iter()
5852            .fold(
5853                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5854                |mut acc, c| {
5855                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5856                    acc
5857                },
5858            )
5859            .into_iter()
5860            .collect();
5861        let scorecards = scorecard_gen.generate(
5862            company_code,
5863            &vendor_contracts,
5864            start_date,
5865            end_date,
5866            owner_id,
5867        );
5868        stats.scorecard_count = scorecards.len();
5869
5870        // Back-populate cross-references on sourcing projects (Task 35)
5871        // Link each project to its RFx events, contracts, and spend analyses
5872        let mut sourcing_projects = sourcing_projects;
5873        for project in &mut sourcing_projects {
5874            // Link RFx events generated for this project
5875            project.rfx_ids = rfx_events
5876                .iter()
5877                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5878                .map(|rfx| rfx.rfx_id.clone())
5879                .collect();
5880
5881            // Link contract awarded from this project's RFx
5882            project.contract_id = contracts
5883                .iter()
5884                .find(|c| {
5885                    c.sourcing_project_id
5886                        .as_deref()
5887                        .is_some_and(|sp| sp == project.project_id)
5888                })
5889                .map(|c| c.contract_id.clone());
5890
5891            // Link spend analysis for matching category (use category_id as the reference)
5892            project.spend_analysis_id = spend_analyses
5893                .iter()
5894                .find(|sa| sa.category_id == project.category_id)
5895                .map(|sa| sa.category_id.clone());
5896        }
5897
5898        info!(
5899            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5900            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5901            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5902        );
5903        self.check_resources_with_log("post-sourcing")?;
5904
5905        Ok(SourcingSnapshot {
5906            spend_analyses,
5907            sourcing_projects,
5908            qualifications,
5909            rfx_events,
5910            bids: all_bids,
5911            bid_evaluations,
5912            contracts,
5913            catalog_items,
5914            scorecards,
5915        })
5916    }
5917
5918    /// Build a [`GroupStructure`] from the current company configuration.
5919    ///
5920    /// The first company in the configuration is treated as the ultimate parent.
5921    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5922    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5923    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5924        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5925
5926        let parent_code = self
5927            .config
5928            .companies
5929            .first()
5930            .map(|c| c.code.clone())
5931            .unwrap_or_else(|| "PARENT".to_string());
5932
5933        let mut group = GroupStructure::new(parent_code);
5934
5935        for company in self.config.companies.iter().skip(1) {
5936            let sub =
5937                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5938            group.add_subsidiary(sub);
5939        }
5940
5941        group
5942    }
5943
5944    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5945    fn phase_intercompany(
5946        &mut self,
5947        journal_entries: &[JournalEntry],
5948        stats: &mut EnhancedGenerationStatistics,
5949    ) -> SynthResult<IntercompanySnapshot> {
5950        // Skip if intercompany is disabled in config
5951        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5952            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5953            return Ok(IntercompanySnapshot::default());
5954        }
5955
5956        // Intercompany requires at least 2 companies
5957        if self.config.companies.len() < 2 {
5958            debug!(
5959                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5960                self.config.companies.len()
5961            );
5962            return Ok(IntercompanySnapshot::default());
5963        }
5964
5965        info!("Phase 14b: Generating Intercompany Transactions");
5966
5967        // Build the group structure early — used by ISA 600 component auditor scope
5968        // and consolidated financial statement generators downstream.
5969        let group_structure = self.build_group_structure();
5970        debug!(
5971            "Group structure built: parent={}, subsidiaries={}",
5972            group_structure.parent_entity,
5973            group_structure.subsidiaries.len()
5974        );
5975
5976        let seed = self.seed;
5977        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5980
5981        // Build ownership structure from company configs
5982        // First company is treated as the parent, remaining are subsidiaries
5983        let parent_code = self.config.companies[0].code.clone();
5984        let mut ownership_structure =
5985            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5986
5987        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5988            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5989                format!("REL{:03}", i + 1),
5990                parent_code.clone(),
5991                company.code.clone(),
5992                rust_decimal::Decimal::from(100), // Default 100% ownership
5993                start_date,
5994            );
5995            ownership_structure.add_relationship(relationship);
5996        }
5997
5998        // Convert config transfer pricing method to core model enum
5999        let tp_method = match self.config.intercompany.transfer_pricing_method {
6000            datasynth_config::schema::TransferPricingMethod::CostPlus => {
6001                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6002            }
6003            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6004                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6005            }
6006            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6007                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6008            }
6009            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6010                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6011            }
6012            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6013                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6014            }
6015        };
6016
6017        // Build IC generator config from schema config
6018        let ic_currency = self
6019            .config
6020            .companies
6021            .first()
6022            .map(|c| c.currency.clone())
6023            .unwrap_or_else(|| "USD".to_string());
6024        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6025            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6026            transfer_pricing_method: tp_method,
6027            markup_percent: rust_decimal::Decimal::from_f64_retain(
6028                self.config.intercompany.markup_percent,
6029            )
6030            .unwrap_or(rust_decimal::Decimal::from(5)),
6031            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6032            default_currency: ic_currency,
6033            ..Default::default()
6034        };
6035
6036        // Create IC generator
6037        let mut ic_generator = datasynth_generators::ICGenerator::new(
6038            ic_gen_config,
6039            ownership_structure.clone(),
6040            seed + 50,
6041        );
6042
6043        // Generate IC transactions for the period
6044        // Use ~3 transactions per day as a reasonable default
6045        let transactions_per_day = 3;
6046        let matched_pairs = ic_generator.generate_transactions_for_period(
6047            start_date,
6048            end_date,
6049            transactions_per_day,
6050        );
6051
6052        // Generate IC source P2P/O2C documents
6053        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6054        debug!(
6055            "Generated {} IC seller invoices, {} IC buyer POs",
6056            ic_doc_chains.seller_invoices.len(),
6057            ic_doc_chains.buyer_orders.len()
6058        );
6059
6060        // Generate journal entries from matched pairs
6061        let mut seller_entries = Vec::new();
6062        let mut buyer_entries = Vec::new();
6063        let fiscal_year = start_date.year();
6064
6065        for pair in &matched_pairs {
6066            let fiscal_period = pair.posting_date.month();
6067            let (seller_je, buyer_je) =
6068                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6069            seller_entries.push(seller_je);
6070            buyer_entries.push(buyer_je);
6071        }
6072
6073        // Run matching engine
6074        let matching_config = datasynth_generators::ICMatchingConfig {
6075            base_currency: self
6076                .config
6077                .companies
6078                .first()
6079                .map(|c| c.currency.clone())
6080                .unwrap_or_else(|| "USD".to_string()),
6081            ..Default::default()
6082        };
6083        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6084        matching_engine.load_matched_pairs(&matched_pairs);
6085        let matching_result = matching_engine.run_matching(end_date);
6086
6087        // Generate elimination entries if configured
6088        let mut elimination_entries = Vec::new();
6089        if self.config.intercompany.generate_eliminations {
6090            let elim_config = datasynth_generators::EliminationConfig {
6091                consolidation_entity: "GROUP".to_string(),
6092                base_currency: self
6093                    .config
6094                    .companies
6095                    .first()
6096                    .map(|c| c.currency.clone())
6097                    .unwrap_or_else(|| "USD".to_string()),
6098                ..Default::default()
6099            };
6100
6101            let mut elim_generator =
6102                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6103
6104            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6105            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6106                matching_result
6107                    .matched_balances
6108                    .iter()
6109                    .chain(matching_result.unmatched_balances.iter())
6110                    .cloned()
6111                    .collect();
6112
6113            // Build investment and equity maps from the group structure so that the
6114            // elimination generator can produce equity-investment elimination entries
6115            // (parent's investment in subsidiary vs. subsidiary's equity capital).
6116            //
6117            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
6118            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
6119            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
6120            //
6121            // Net assets are derived from the journal entries using account-range heuristics:
6122            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
6123            // no JE data is available (IC phase runs early in the generation pipeline).
6124            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6125                std::collections::HashMap::new();
6126            let mut equity_amounts: std::collections::HashMap<
6127                String,
6128                std::collections::HashMap<String, rust_decimal::Decimal>,
6129            > = std::collections::HashMap::new();
6130            {
6131                use rust_decimal::Decimal;
6132                let hundred = Decimal::from(100u32);
6133                let ten_pct = Decimal::new(10, 2); // 0.10
6134                let thirty_pct = Decimal::new(30, 2); // 0.30
6135                let sixty_pct = Decimal::new(60, 2); // 0.60
6136                let parent_code = &group_structure.parent_entity;
6137                for sub in &group_structure.subsidiaries {
6138                    let net_assets = {
6139                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6140                        if na > Decimal::ZERO {
6141                            na
6142                        } else {
6143                            Decimal::from(1_000_000u64)
6144                        }
6145                    };
6146                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
6147                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6148                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6149
6150                    // Split subsidiary equity into conventional components:
6151                    // 10 % share capital / 30 % APIC / 60 % retained earnings
6152                    let mut eq_map = std::collections::HashMap::new();
6153                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6154                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6155                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6156                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
6157                }
6158            }
6159
6160            let journal = elim_generator.generate_eliminations(
6161                &fiscal_period,
6162                end_date,
6163                &all_balances,
6164                &matched_pairs,
6165                &investment_amounts,
6166                &equity_amounts,
6167            );
6168
6169            elimination_entries = journal.entries.clone();
6170        }
6171
6172        let matched_pair_count = matched_pairs.len();
6173        let elimination_entry_count = elimination_entries.len();
6174        let match_rate = matching_result.match_rate;
6175
6176        stats.ic_matched_pair_count = matched_pair_count;
6177        stats.ic_elimination_count = elimination_entry_count;
6178        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6179
6180        info!(
6181            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6182            matched_pair_count,
6183            stats.ic_transaction_count,
6184            seller_entries.len(),
6185            buyer_entries.len(),
6186            elimination_entry_count,
6187            match_rate * 100.0
6188        );
6189        self.check_resources_with_log("post-intercompany")?;
6190
6191        // ----------------------------------------------------------------
6192        // NCI measurements: derive from group structure ownership percentages
6193        // ----------------------------------------------------------------
6194        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6195            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6196            use rust_decimal::Decimal;
6197
6198            let eight_pct = Decimal::new(8, 2); // 0.08
6199
6200            group_structure
6201                .subsidiaries
6202                .iter()
6203                .filter(|sub| {
6204                    sub.nci_percentage > Decimal::ZERO
6205                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6206                })
6207                .map(|sub| {
6208                    // Compute net assets from actual journal entries for this subsidiary.
6209                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
6210                    // IC phase runs before the main JE batch has been populated).
6211                    let net_assets_from_jes =
6212                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6213
6214                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
6215                        net_assets_from_jes.round_dp(2)
6216                    } else {
6217                        // Fallback: use a plausible base amount
6218                        Decimal::from(1_000_000u64)
6219                    };
6220
6221                    // Net income approximated as 8% of net assets
6222                    let net_income = (net_assets * eight_pct).round_dp(2);
6223
6224                    NciMeasurement::compute(
6225                        sub.entity_code.clone(),
6226                        sub.nci_percentage,
6227                        net_assets,
6228                        net_income,
6229                    )
6230                })
6231                .collect()
6232        };
6233
6234        if !nci_measurements.is_empty() {
6235            info!(
6236                "NCI measurements: {} subsidiaries with non-controlling interests",
6237                nci_measurements.len()
6238            );
6239        }
6240
6241        Ok(IntercompanySnapshot {
6242            group_structure: Some(group_structure),
6243            matched_pairs,
6244            seller_journal_entries: seller_entries,
6245            buyer_journal_entries: buyer_entries,
6246            elimination_entries,
6247            nci_measurements,
6248            ic_document_chains: Some(ic_doc_chains),
6249            matched_pair_count,
6250            elimination_entry_count,
6251            match_rate,
6252        })
6253    }
6254
6255    /// Phase 15: Generate bank reconciliations and financial statements.
6256    fn phase_financial_reporting(
6257        &mut self,
6258        document_flows: &DocumentFlowSnapshot,
6259        journal_entries: &[JournalEntry],
6260        coa: &Arc<ChartOfAccounts>,
6261        _hr: &HrSnapshot,
6262        _audit: &AuditSnapshot,
6263        stats: &mut EnhancedGenerationStatistics,
6264    ) -> SynthResult<FinancialReportingSnapshot> {
6265        let fs_enabled = self.phase_config.generate_financial_statements
6266            || self.config.financial_reporting.enabled;
6267        let br_enabled = self.phase_config.generate_bank_reconciliation;
6268
6269        if !fs_enabled && !br_enabled {
6270            debug!("Phase 15: Skipped (financial reporting disabled)");
6271            return Ok(FinancialReportingSnapshot::default());
6272        }
6273
6274        info!("Phase 15: Generating Financial Reporting Data");
6275
6276        let seed = self.seed;
6277        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6278            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6279
6280        let mut financial_statements = Vec::new();
6281        let mut bank_reconciliations = Vec::new();
6282        let mut trial_balances = Vec::new();
6283        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6284        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6285            Vec::new();
6286        // Standalone statements keyed by entity code
6287        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6288            std::collections::HashMap::new();
6289        // Consolidated statements (one per period)
6290        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6291        // Consolidation schedules (one per period)
6292        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6293
6294        // Generate financial statements from JE-derived trial balances.
6295        //
6296        // When journal entries are available, we use cumulative trial balances for
6297        // balance sheet accounts and current-period trial balances for income
6298        // statement accounts. We also track prior-period trial balances so the
6299        // generator can produce comparative amounts, and we build a proper
6300        // cash flow statement from working capital changes rather than random data.
6301        if fs_enabled {
6302            let has_journal_entries = !journal_entries.is_empty();
6303
6304            // Use FinancialStatementGenerator for balance sheet and income statement,
6305            // but build cash flow ourselves from TB data when JEs are available.
6306            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6307            // Separate generator for consolidated statements (different seed offset)
6308            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6309
6310            // Collect elimination JEs once (reused across periods)
6311            let elimination_entries: Vec<&JournalEntry> = journal_entries
6312                .iter()
6313                .filter(|je| je.header.is_elimination)
6314                .collect();
6315
6316            // Generate one set of statements per period, per entity
6317            for period in 0..self.config.global.period_months {
6318                let period_start = start_date + chrono::Months::new(period);
6319                let period_end =
6320                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6321                let fiscal_year = period_end.year() as u16;
6322                let fiscal_period = period_end.month() as u8;
6323                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6324
6325                // Build per-entity trial balances for this period (non-elimination JEs)
6326                // We accumulate them for the consolidation step.
6327                let mut entity_tb_map: std::collections::HashMap<
6328                    String,
6329                    std::collections::HashMap<String, rust_decimal::Decimal>,
6330                > = std::collections::HashMap::new();
6331
6332                // --- Standalone: one set of statements per company ---
6333                // v5.33: resolve once per phase. In single-shard / standalone
6334                // mode this is the primary country's framework; in group
6335                // mode each shard runs against its own entity (one company)
6336                // so the primary-country lookup is the entity's. Either way
6337                // the string drives framework-aware TB classification (Defect
6338                // A fix — German SKR / French PCG accounts no longer routed
6339                // through a US-only prefix table).
6340                let framework_str = self.resolve_framework_str();
6341                for (company_idx, company) in self.config.companies.iter().enumerate() {
6342                    let company_code = company.code.as_str();
6343                    let currency = company.currency.as_str();
6344                    // Use a unique seed offset per company to keep statements deterministic
6345                    // and distinct across companies
6346                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6347                    let mut company_fs_gen =
6348                        FinancialStatementGenerator::new(seed + company_seed_offset);
6349
6350                    if has_journal_entries {
6351                        let tb_entries = Self::build_cumulative_trial_balance(
6352                            journal_entries,
6353                            coa,
6354                            company_code,
6355                            start_date,
6356                            period_end,
6357                            fiscal_year,
6358                            fiscal_period,
6359                            framework_str,
6360                        );
6361
6362                        // Accumulate per-entity category balances for consolidation
6363                        let entity_cat_map =
6364                            entity_tb_map.entry(company_code.to_string()).or_default();
6365                        for tb_entry in &tb_entries {
6366                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6367                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6368                        }
6369
6370                        let stmts = company_fs_gen.generate(
6371                            company_code,
6372                            currency,
6373                            &tb_entries,
6374                            period_start,
6375                            period_end,
6376                            fiscal_year,
6377                            fiscal_period,
6378                            None,
6379                            "SYS-AUTOCLOSE",
6380                        );
6381
6382                        let mut entity_stmts = Vec::new();
6383                        for stmt in stmts {
6384                            if stmt.statement_type == StatementType::CashFlowStatement {
6385                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6386                                let cf_items = Self::build_cash_flow_from_trial_balances(
6387                                    &tb_entries,
6388                                    None,
6389                                    net_income,
6390                                );
6391                                entity_stmts.push(FinancialStatement {
6392                                    cash_flow_items: cf_items,
6393                                    ..stmt
6394                                });
6395                            } else {
6396                                entity_stmts.push(stmt);
6397                            }
6398                        }
6399
6400                        // Add to the flat financial_statements list (used by KPI/budget)
6401                        financial_statements.extend(entity_stmts.clone());
6402
6403                        // Store standalone per-entity
6404                        standalone_statements
6405                            .entry(company_code.to_string())
6406                            .or_default()
6407                            .extend(entity_stmts);
6408
6409                        // Only store trial balance for the first company in the period
6410                        // to avoid duplicates in the trial_balances list
6411                        if company_idx == 0 {
6412                            trial_balances.push(PeriodTrialBalance {
6413                                fiscal_year,
6414                                fiscal_period,
6415                                period_start,
6416                                period_end,
6417                                entries: tb_entries,
6418                                framework: framework_str.to_string(),
6419                            });
6420                        }
6421                    } else {
6422                        // Fallback: no JEs available
6423                        let tb_entries = Self::build_trial_balance_from_entries(
6424                            journal_entries,
6425                            coa,
6426                            company_code,
6427                            fiscal_year,
6428                            fiscal_period,
6429                            framework_str,
6430                        );
6431
6432                        let stmts = company_fs_gen.generate(
6433                            company_code,
6434                            currency,
6435                            &tb_entries,
6436                            period_start,
6437                            period_end,
6438                            fiscal_year,
6439                            fiscal_period,
6440                            None,
6441                            "SYS-AUTOCLOSE",
6442                        );
6443                        financial_statements.extend(stmts.clone());
6444                        standalone_statements
6445                            .entry(company_code.to_string())
6446                            .or_default()
6447                            .extend(stmts);
6448
6449                        if company_idx == 0 && !tb_entries.is_empty() {
6450                            trial_balances.push(PeriodTrialBalance {
6451                                fiscal_year,
6452                                fiscal_period,
6453                                period_start,
6454                                period_end,
6455                                entries: tb_entries,
6456                                framework: framework_str.to_string(),
6457                            });
6458                        }
6459                    }
6460                }
6461
6462                // --- Consolidated: aggregate all entities + apply eliminations ---
6463                // Use the primary (first) company's currency for the consolidated statement
6464                let group_currency = self
6465                    .config
6466                    .companies
6467                    .first()
6468                    .map(|c| c.currency.as_str())
6469                    .unwrap_or("USD");
6470
6471                // Build owned elimination entries for this period
6472                let period_eliminations: Vec<JournalEntry> = elimination_entries
6473                    .iter()
6474                    .filter(|je| {
6475                        je.header.fiscal_year == fiscal_year
6476                            && je.header.fiscal_period == fiscal_period
6477                    })
6478                    .map(|je| (*je).clone())
6479                    .collect();
6480
6481                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6482                    &entity_tb_map,
6483                    &period_eliminations,
6484                    &period_label,
6485                );
6486
6487                // Build a pseudo trial balance from consolidated line items for the
6488                // FinancialStatementGenerator to use (only for cash flow direction).
6489                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6490                    .line_items
6491                    .iter()
6492                    .map(|li| {
6493                        let net = li.post_elimination_total;
6494                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6495                            (net, rust_decimal::Decimal::ZERO)
6496                        } else {
6497                            (rust_decimal::Decimal::ZERO, -net)
6498                        };
6499                        datasynth_generators::TrialBalanceEntry {
6500                            account_code: li.account_category.clone(),
6501                            account_name: li.account_category.clone(),
6502                            category: li.account_category.clone(),
6503                            debit_balance: debit,
6504                            credit_balance: credit,
6505                        }
6506                    })
6507                    .collect();
6508
6509                let mut cons_stmts = cons_gen.generate(
6510                    "GROUP",
6511                    group_currency,
6512                    &cons_tb,
6513                    period_start,
6514                    period_end,
6515                    fiscal_year,
6516                    fiscal_period,
6517                    None,
6518                    "SYS-AUTOCLOSE",
6519                );
6520
6521                // Split consolidated line items by statement type.
6522                // The consolidation generator returns BS items first, then IS items,
6523                // identified by their CONS- prefix and category.
6524                let bs_categories: &[&str] = &[
6525                    "CASH",
6526                    "RECEIVABLES",
6527                    "INVENTORY",
6528                    "FIXEDASSETS",
6529                    "PAYABLES",
6530                    "ACCRUEDLIABILITIES",
6531                    "LONGTERMDEBT",
6532                    "EQUITY",
6533                ];
6534                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6535                    cons_line_items.into_iter().partition(|li| {
6536                        let upper = li.label.to_uppercase();
6537                        bs_categories.iter().any(|c| upper == *c)
6538                    });
6539
6540                for stmt in &mut cons_stmts {
6541                    stmt.is_consolidated = true;
6542                    match stmt.statement_type {
6543                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6544                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6545                        _ => {} // CF and equity change statements keep generator output
6546                    }
6547                }
6548
6549                consolidated_statements.extend(cons_stmts);
6550                consolidation_schedules.push(schedule);
6551            }
6552
6553            // Backward compat: if only 1 company, use existing code path logic
6554            // (prior_cumulative_tb for comparative amounts). Already handled above;
6555            // the prior_ref is omitted to keep this change minimal.
6556            let _ = &mut fs_gen; // suppress unused warning
6557
6558            stats.financial_statement_count = financial_statements.len();
6559            info!(
6560                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6561                stats.financial_statement_count,
6562                consolidated_statements.len(),
6563                has_journal_entries
6564            );
6565
6566            // ----------------------------------------------------------------
6567            // IFRS 8 / ASC 280: Operating Segment Reporting
6568            // ----------------------------------------------------------------
6569            // Build entity seeds from the company configuration.
6570            let entity_seeds: Vec<SegmentSeed> = self
6571                .config
6572                .companies
6573                .iter()
6574                .map(|c| SegmentSeed {
6575                    code: c.code.clone(),
6576                    name: c.name.clone(),
6577                    currency: c.currency.clone(),
6578                })
6579                .collect();
6580
6581            let mut seg_gen = SegmentGenerator::new(seed + 30);
6582
6583            // Generate one set of segment reports per period.
6584            // We extract consolidated revenue / profit / assets from the consolidated
6585            // financial statements produced above, falling back to simple sums when
6586            // no consolidated statements were generated (single-entity path).
6587            for period in 0..self.config.global.period_months {
6588                let period_end =
6589                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6590                let fiscal_year = period_end.year() as u16;
6591                let fiscal_period = period_end.month() as u8;
6592                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6593
6594                use datasynth_core::models::StatementType;
6595
6596                // Try to find consolidated income statement for this period
6597                let cons_is = consolidated_statements.iter().find(|s| {
6598                    s.fiscal_year == fiscal_year
6599                        && s.fiscal_period == fiscal_period
6600                        && s.statement_type == StatementType::IncomeStatement
6601                });
6602                let cons_bs = consolidated_statements.iter().find(|s| {
6603                    s.fiscal_year == fiscal_year
6604                        && s.fiscal_period == fiscal_period
6605                        && s.statement_type == StatementType::BalanceSheet
6606                });
6607
6608                // If consolidated statements not available fall back to the flat list
6609                let is_stmt = cons_is.or_else(|| {
6610                    financial_statements.iter().find(|s| {
6611                        s.fiscal_year == fiscal_year
6612                            && s.fiscal_period == fiscal_period
6613                            && s.statement_type == StatementType::IncomeStatement
6614                    })
6615                });
6616                let bs_stmt = cons_bs.or_else(|| {
6617                    financial_statements.iter().find(|s| {
6618                        s.fiscal_year == fiscal_year
6619                            && s.fiscal_period == fiscal_period
6620                            && s.statement_type == StatementType::BalanceSheet
6621                    })
6622                });
6623
6624                let consolidated_revenue = is_stmt
6625                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6626                    .map(|li| -li.amount) // revenue is stored as negative in IS
6627                    .unwrap_or(rust_decimal::Decimal::ZERO);
6628
6629                let consolidated_profit = is_stmt
6630                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6631                    .map(|li| li.amount)
6632                    .unwrap_or(rust_decimal::Decimal::ZERO);
6633
6634                let consolidated_assets = bs_stmt
6635                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6636                    .map(|li| li.amount)
6637                    .unwrap_or(rust_decimal::Decimal::ZERO);
6638
6639                // Skip periods where we have no financial data
6640                if consolidated_revenue == rust_decimal::Decimal::ZERO
6641                    && consolidated_assets == rust_decimal::Decimal::ZERO
6642                {
6643                    continue;
6644                }
6645
6646                let group_code = self
6647                    .config
6648                    .companies
6649                    .first()
6650                    .map(|c| c.code.as_str())
6651                    .unwrap_or("GROUP");
6652
6653                // Compute period depreciation from JEs with document type "CL" hitting account
6654                // 6000 (depreciation expense).  These are generated by phase_period_close.
6655                let total_depr: rust_decimal::Decimal = journal_entries
6656                    .iter()
6657                    .filter(|je| je.header.document_type == "CL")
6658                    .flat_map(|je| je.lines.iter())
6659                    .filter(|l| l.gl_account.starts_with("6000"))
6660                    .map(|l| l.debit_amount)
6661                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6662                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6663                    Some(total_depr)
6664                } else {
6665                    None
6666                };
6667
6668                let (segs, recon) = seg_gen.generate(
6669                    group_code,
6670                    &period_label,
6671                    consolidated_revenue,
6672                    consolidated_profit,
6673                    consolidated_assets,
6674                    &entity_seeds,
6675                    depr_param,
6676                );
6677                segment_reports.extend(segs);
6678                segment_reconciliations.push(recon);
6679            }
6680
6681            info!(
6682                "Segment reports generated: {} segments, {} reconciliations",
6683                segment_reports.len(),
6684                segment_reconciliations.len()
6685            );
6686        }
6687
6688        // Generate bank reconciliations from payment data
6689        if br_enabled && !document_flows.payments.is_empty() {
6690            let employee_ids: Vec<String> = self
6691                .master_data
6692                .employees
6693                .iter()
6694                .map(|e| e.employee_id.clone())
6695                .collect();
6696            let mut br_gen =
6697                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6698
6699            // Group payments by company code and period
6700            for company in &self.config.companies {
6701                let company_payments: Vec<PaymentReference> = document_flows
6702                    .payments
6703                    .iter()
6704                    .filter(|p| p.header.company_code == company.code)
6705                    .map(|p| PaymentReference {
6706                        id: p.header.document_id.clone(),
6707                        amount: if p.is_vendor { p.amount } else { -p.amount },
6708                        date: p.header.document_date,
6709                        reference: p
6710                            .check_number
6711                            .clone()
6712                            .or_else(|| p.wire_reference.clone())
6713                            .unwrap_or_else(|| p.header.document_id.clone()),
6714                    })
6715                    .collect();
6716
6717                if company_payments.is_empty() {
6718                    continue;
6719                }
6720
6721                let bank_account_id = format!("{}-MAIN", company.code);
6722
6723                // Generate one reconciliation per period
6724                for period in 0..self.config.global.period_months {
6725                    let period_start = start_date + chrono::Months::new(period);
6726                    let period_end =
6727                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6728
6729                    let period_payments: Vec<PaymentReference> = company_payments
6730                        .iter()
6731                        .filter(|p| p.date >= period_start && p.date <= period_end)
6732                        .cloned()
6733                        .collect();
6734
6735                    let recon = br_gen.generate(
6736                        &company.code,
6737                        &bank_account_id,
6738                        period_start,
6739                        period_end,
6740                        &company.currency,
6741                        &period_payments,
6742                    );
6743                    bank_reconciliations.push(recon);
6744                }
6745            }
6746            info!(
6747                "Bank reconciliations generated: {} reconciliations",
6748                bank_reconciliations.len()
6749            );
6750        }
6751
6752        stats.bank_reconciliation_count = bank_reconciliations.len();
6753        self.check_resources_with_log("post-financial-reporting")?;
6754
6755        if !trial_balances.is_empty() {
6756            info!(
6757                "Period-close trial balances captured: {} periods",
6758                trial_balances.len()
6759            );
6760        }
6761
6762        // Notes to financial statements are generated in a separate post-processing step
6763        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6764        // phases have completed, so that deferred tax and provision data can be wired in.
6765        let notes_to_financial_statements = Vec::new();
6766
6767        Ok(FinancialReportingSnapshot {
6768            financial_statements,
6769            standalone_statements,
6770            consolidated_statements,
6771            consolidation_schedules,
6772            bank_reconciliations,
6773            trial_balances,
6774            segment_reports,
6775            segment_reconciliations,
6776            notes_to_financial_statements,
6777        })
6778    }
6779
6780    /// Populate notes to financial statements using fully-resolved snapshots.
6781    ///
6782    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6783    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6784    /// can be wired into the notes context.  The method mutates
6785    /// `financial_reporting.notes_to_financial_statements` in-place.
6786    fn generate_notes_to_financial_statements(
6787        &self,
6788        financial_reporting: &mut FinancialReportingSnapshot,
6789        accounting_standards: &AccountingStandardsSnapshot,
6790        tax: &TaxSnapshot,
6791        hr: &HrSnapshot,
6792        audit: &AuditSnapshot,
6793        treasury: &TreasurySnapshot,
6794    ) {
6795        use datasynth_config::schema::AccountingFrameworkConfig;
6796        use datasynth_core::models::StatementType;
6797        use datasynth_generators::period_close::notes_generator::{
6798            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6799        };
6800
6801        let seed = self.seed;
6802        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6803        {
6804            Ok(d) => d,
6805            Err(_) => return,
6806        };
6807
6808        let mut notes_gen = NotesGenerator::new(seed + 4235);
6809
6810        for company in &self.config.companies {
6811            let last_period_end = start_date
6812                + chrono::Months::new(self.config.global.period_months)
6813                - chrono::Days::new(1);
6814            let fiscal_year = last_period_end.year() as u16;
6815
6816            // Extract relevant amounts from the already-generated financial statements
6817            let entity_is = financial_reporting
6818                .standalone_statements
6819                .get(&company.code)
6820                .and_then(|stmts| {
6821                    stmts.iter().find(|s| {
6822                        s.fiscal_year == fiscal_year
6823                            && s.statement_type == StatementType::IncomeStatement
6824                    })
6825                });
6826            let entity_bs = financial_reporting
6827                .standalone_statements
6828                .get(&company.code)
6829                .and_then(|stmts| {
6830                    stmts.iter().find(|s| {
6831                        s.fiscal_year == fiscal_year
6832                            && s.statement_type == StatementType::BalanceSheet
6833                    })
6834                });
6835
6836            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6837            let revenue_amount = entity_is
6838                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6839                .map(|li| li.amount);
6840            let ppe_gross = entity_bs
6841                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6842                .map(|li| li.amount);
6843
6844            let framework = match self
6845                .config
6846                .accounting_standards
6847                .framework
6848                .unwrap_or_default()
6849            {
6850                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6851                    "IFRS".to_string()
6852                }
6853                _ => "US GAAP".to_string(),
6854            };
6855
6856            // ---- Deferred tax (IAS 12 / ASC 740) ----
6857            // Sum closing DTA and DTL from rollforward entries for this entity.
6858            let (entity_dta, entity_dtl) = {
6859                let mut dta = rust_decimal::Decimal::ZERO;
6860                let mut dtl = rust_decimal::Decimal::ZERO;
6861                for rf in &tax.deferred_tax.rollforwards {
6862                    if rf.entity_code == company.code {
6863                        dta += rf.closing_dta;
6864                        dtl += rf.closing_dtl;
6865                    }
6866                }
6867                (
6868                    if dta > rust_decimal::Decimal::ZERO {
6869                        Some(dta)
6870                    } else {
6871                        None
6872                    },
6873                    if dtl > rust_decimal::Decimal::ZERO {
6874                        Some(dtl)
6875                    } else {
6876                        None
6877                    },
6878                )
6879            };
6880
6881            // ---- Provisions (IAS 37 / ASC 450) ----
6882            // Filter provisions to this entity; sum best_estimate amounts.
6883            let entity_provisions: Vec<_> = accounting_standards
6884                .provisions
6885                .iter()
6886                .filter(|p| p.entity_code == company.code)
6887                .collect();
6888            let provision_count = entity_provisions.len();
6889            let total_provisions = if provision_count > 0 {
6890                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6891            } else {
6892                None
6893            };
6894
6895            // ---- Pension data from HR snapshot ----
6896            let entity_pension_plan_count = hr
6897                .pension_plans
6898                .iter()
6899                .filter(|p| p.entity_code == company.code)
6900                .count();
6901            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6902                let sum: rust_decimal::Decimal = hr
6903                    .pension_disclosures
6904                    .iter()
6905                    .filter(|d| {
6906                        hr.pension_plans
6907                            .iter()
6908                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6909                    })
6910                    .map(|d| d.net_pension_liability)
6911                    .sum();
6912                let plan_assets_sum: rust_decimal::Decimal = hr
6913                    .pension_plan_assets
6914                    .iter()
6915                    .filter(|a| {
6916                        hr.pension_plans
6917                            .iter()
6918                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6919                    })
6920                    .map(|a| a.fair_value_closing)
6921                    .sum();
6922                if entity_pension_plan_count > 0 {
6923                    Some(sum + plan_assets_sum)
6924                } else {
6925                    None
6926                }
6927            };
6928            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6929                let sum: rust_decimal::Decimal = hr
6930                    .pension_plan_assets
6931                    .iter()
6932                    .filter(|a| {
6933                        hr.pension_plans
6934                            .iter()
6935                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6936                    })
6937                    .map(|a| a.fair_value_closing)
6938                    .sum();
6939                if entity_pension_plan_count > 0 {
6940                    Some(sum)
6941                } else {
6942                    None
6943                }
6944            };
6945
6946            // ---- Audit data: related parties + subsequent events ----
6947            // Audit snapshot covers all entities; use total counts (common case = single entity).
6948            let rp_count = audit.related_party_transactions.len();
6949            let se_count = audit.subsequent_events.len();
6950            let adjusting_count = audit
6951                .subsequent_events
6952                .iter()
6953                .filter(|e| {
6954                    matches!(
6955                        e.classification,
6956                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6957                    )
6958                })
6959                .count();
6960
6961            let ctx = NotesGeneratorContext {
6962                entity_code: company.code.clone(),
6963                framework,
6964                period: format!("FY{}", fiscal_year),
6965                period_end: last_period_end,
6966                currency: company.currency.clone(),
6967                revenue_amount,
6968                total_ppe_gross: ppe_gross,
6969                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6970                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6971                deferred_tax_asset: entity_dta,
6972                deferred_tax_liability: entity_dtl,
6973                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6974                provision_count,
6975                total_provisions,
6976                // Pension data from HR snapshot
6977                pension_plan_count: entity_pension_plan_count,
6978                total_dbo: entity_total_dbo,
6979                total_plan_assets: entity_total_plan_assets,
6980                // Audit data
6981                related_party_transaction_count: rp_count,
6982                subsequent_event_count: se_count,
6983                adjusting_event_count: adjusting_count,
6984                ..NotesGeneratorContext::default()
6985            };
6986
6987            let entity_notes = notes_gen.generate(&ctx);
6988            let standard_note_count = entity_notes.len() as u32;
6989            info!(
6990                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6991                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6992            );
6993            financial_reporting
6994                .notes_to_financial_statements
6995                .extend(entity_notes);
6996
6997            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6998            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6999                .debt_instruments
7000                .iter()
7001                .filter(|d| d.entity_id == company.code)
7002                .map(|d| {
7003                    (
7004                        format!("{:?}", d.instrument_type),
7005                        d.principal,
7006                        d.maturity_date.to_string(),
7007                    )
7008                })
7009                .collect();
7010
7011            let hedge_count = treasury.hedge_relationships.len();
7012            let effective_hedges = treasury
7013                .hedge_relationships
7014                .iter()
7015                .filter(|h| h.is_effective)
7016                .count();
7017            let total_notional: rust_decimal::Decimal = treasury
7018                .hedging_instruments
7019                .iter()
7020                .map(|h| h.notional_amount)
7021                .sum();
7022            let total_fair_value: rust_decimal::Decimal = treasury
7023                .hedging_instruments
7024                .iter()
7025                .map(|h| h.fair_value)
7026                .sum();
7027
7028            // Join provision_movements with provisions to get entity/type info
7029            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7030                .provisions
7031                .iter()
7032                .filter(|p| p.entity_code == company.code)
7033                .map(|p| p.id.as_str())
7034                .collect();
7035            let provision_movements: Vec<(
7036                String,
7037                rust_decimal::Decimal,
7038                rust_decimal::Decimal,
7039                rust_decimal::Decimal,
7040            )> = accounting_standards
7041                .provision_movements
7042                .iter()
7043                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7044                .map(|m| {
7045                    let prov_type = accounting_standards
7046                        .provisions
7047                        .iter()
7048                        .find(|p| p.id == m.provision_id)
7049                        .map(|p| format!("{:?}", p.provision_type))
7050                        .unwrap_or_else(|| "Unknown".to_string());
7051                    (prov_type, m.opening, m.additions, m.closing)
7052                })
7053                .collect();
7054
7055            let enhanced_ctx = EnhancedNotesContext {
7056                entity_code: company.code.clone(),
7057                period: format!("FY{}", fiscal_year),
7058                currency: company.currency.clone(),
7059                // Inventory breakdown: best-effort using zero (would need balance tracker)
7060                finished_goods_value: rust_decimal::Decimal::ZERO,
7061                wip_value: rust_decimal::Decimal::ZERO,
7062                raw_materials_value: rust_decimal::Decimal::ZERO,
7063                debt_instruments,
7064                hedge_count,
7065                effective_hedges,
7066                total_notional,
7067                total_fair_value,
7068                provision_movements,
7069            };
7070
7071            let enhanced_notes =
7072                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7073            if !enhanced_notes.is_empty() {
7074                info!(
7075                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7076                    company.code,
7077                    enhanced_notes.len(),
7078                    enhanced_ctx.debt_instruments.len(),
7079                    hedge_count,
7080                    enhanced_ctx.provision_movements.len(),
7081                );
7082                financial_reporting
7083                    .notes_to_financial_statements
7084                    .extend(enhanced_notes);
7085            }
7086        }
7087    }
7088
7089    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
7090    ///
7091    /// This ensures the trial balance is coherent with the JEs: every debit and credit
7092    /// posted in the journal entries flows through to the trial balance, using the real
7093    /// GL account numbers from the CoA.
7094    fn build_trial_balance_from_entries(
7095        journal_entries: &[JournalEntry],
7096        coa: &ChartOfAccounts,
7097        company_code: &str,
7098        fiscal_year: u16,
7099        fiscal_period: u8,
7100        framework: &str,
7101    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7102        use rust_decimal::Decimal;
7103
7104        // Accumulate total debits and credits per GL account
7105        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7106        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7107
7108        for je in journal_entries {
7109            // Filter to matching company, fiscal year, and period
7110            if je.header.company_code != company_code
7111                || je.header.fiscal_year != fiscal_year
7112                || je.header.fiscal_period != fiscal_period
7113            {
7114                continue;
7115            }
7116
7117            for line in &je.lines {
7118                let acct = &line.gl_account;
7119                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7120                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7121            }
7122        }
7123
7124        // Build a TrialBalanceEntry for each account that had activity
7125        let mut all_accounts: Vec<&String> = account_debits
7126            .keys()
7127            .chain(account_credits.keys())
7128            .collect::<std::collections::HashSet<_>>()
7129            .into_iter()
7130            .collect();
7131        all_accounts.sort();
7132
7133        let mut entries = Vec::new();
7134
7135        for acct_number in all_accounts {
7136            let debit = account_debits
7137                .get(acct_number)
7138                .copied()
7139                .unwrap_or(Decimal::ZERO);
7140            let credit = account_credits
7141                .get(acct_number)
7142                .copied()
7143                .unwrap_or(Decimal::ZERO);
7144
7145            if debit.is_zero() && credit.is_zero() {
7146                continue;
7147            }
7148
7149            // Look up account name from CoA, fall back to "Account {code}"
7150            let account_name = coa
7151                .get_account(acct_number)
7152                .map(|gl| gl.short_description.clone())
7153                .unwrap_or_else(|| format!("Account {acct_number}"));
7154
7155            // Map account code prefix to the category strings expected by
7156            // FinancialStatementGenerator (Cash, Receivables, Inventory,
7157            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
7158            // OperatingExpenses).
7159            let category = Self::category_from_account_code(acct_number, framework);
7160
7161            entries.push(datasynth_generators::TrialBalanceEntry {
7162                account_code: acct_number.clone(),
7163                account_name,
7164                category,
7165                debit_balance: debit,
7166                credit_balance: credit,
7167            });
7168        }
7169
7170        entries
7171    }
7172
7173    /// Build a cumulative trial balance by aggregating all JEs from the start up to
7174    /// (and including) the given period end date.
7175    ///
7176    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
7177    /// while income statement accounts (revenue, expenses) show only the current period.
7178    /// The two are merged into a single Vec for the FinancialStatementGenerator.
7179    #[allow(clippy::too_many_arguments)]
7180    fn build_cumulative_trial_balance(
7181        journal_entries: &[JournalEntry],
7182        coa: &ChartOfAccounts,
7183        company_code: &str,
7184        start_date: NaiveDate,
7185        period_end: NaiveDate,
7186        fiscal_year: u16,
7187        fiscal_period: u8,
7188        framework: &str,
7189    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7190        use rust_decimal::Decimal;
7191
7192        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
7193        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7194        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7195
7196        // Accumulate debits/credits for income statement accounts (current period only)
7197        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7198        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7199
7200        for je in journal_entries {
7201            if je.header.company_code != company_code {
7202                continue;
7203            }
7204
7205            for line in &je.lines {
7206                let acct = &line.gl_account;
7207                // Framework-aware BS bucketing — fixes the Defect A
7208                // mis-classification where US-style prefix tables routed
7209                // SKR/PCG balance-sheet accounts through the P&L bucket
7210                // (or vice versa), giving the resulting TB an asymmetric
7211                // time window with no integrity invariant left to test.
7212                let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7213
7214                if is_bs_account {
7215                    // Balance sheet: accumulate from start through period_end
7216                    if je.header.document_date <= period_end
7217                        && je.header.document_date >= start_date
7218                    {
7219                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7220                            line.debit_amount;
7221                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7222                            line.credit_amount;
7223                    }
7224                } else {
7225                    // Income statement: current period only
7226                    if je.header.fiscal_year == fiscal_year
7227                        && je.header.fiscal_period == fiscal_period
7228                    {
7229                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7230                            line.debit_amount;
7231                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7232                            line.credit_amount;
7233                    }
7234                }
7235            }
7236        }
7237
7238        // Merge all accounts
7239        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7240        all_accounts.extend(bs_debits.keys().cloned());
7241        all_accounts.extend(bs_credits.keys().cloned());
7242        all_accounts.extend(is_debits.keys().cloned());
7243        all_accounts.extend(is_credits.keys().cloned());
7244
7245        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7246        sorted_accounts.sort();
7247
7248        let mut entries = Vec::new();
7249
7250        for acct_number in &sorted_accounts {
7251            let category = Self::category_from_account_code(acct_number, framework);
7252            let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7253
7254            let (debit, credit) = if is_bs_account {
7255                (
7256                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7257                    bs_credits
7258                        .get(acct_number)
7259                        .copied()
7260                        .unwrap_or(Decimal::ZERO),
7261                )
7262            } else {
7263                (
7264                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7265                    is_credits
7266                        .get(acct_number)
7267                        .copied()
7268                        .unwrap_or(Decimal::ZERO),
7269                )
7270            };
7271
7272            if debit.is_zero() && credit.is_zero() {
7273                continue;
7274            }
7275
7276            let account_name = coa
7277                .get_account(acct_number)
7278                .map(|gl| gl.short_description.clone())
7279                .unwrap_or_else(|| format!("Account {acct_number}"));
7280
7281            entries.push(datasynth_generators::TrialBalanceEntry {
7282                account_code: acct_number.clone(),
7283                account_name,
7284                category,
7285                debit_balance: debit,
7286                credit_balance: credit,
7287            });
7288        }
7289
7290        entries
7291    }
7292
7293    /// Build a JE-derived cash flow statement using the indirect method.
7294    ///
7295    /// Compares current and prior cumulative trial balances to derive working capital
7296    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7297    fn build_cash_flow_from_trial_balances(
7298        current_tb: &[datasynth_generators::TrialBalanceEntry],
7299        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7300        net_income: rust_decimal::Decimal,
7301    ) -> Vec<CashFlowItem> {
7302        use rust_decimal::Decimal;
7303
7304        // Helper: aggregate a TB by category and return net (debit - credit)
7305        let aggregate =
7306            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7307                let mut map: HashMap<String, Decimal> = HashMap::new();
7308                for entry in tb {
7309                    let net = entry.debit_balance - entry.credit_balance;
7310                    *map.entry(entry.category.clone()).or_default() += net;
7311                }
7312                map
7313            };
7314
7315        let current = aggregate(current_tb);
7316        let prior = prior_tb.map(aggregate);
7317
7318        // Get balance for a category, defaulting to zero
7319        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7320            *map.get(key).unwrap_or(&Decimal::ZERO)
7321        };
7322
7323        // Compute change: current - prior (or current if no prior)
7324        let change = |key: &str| -> Decimal {
7325            let curr = get(&current, key);
7326            match &prior {
7327                Some(p) => curr - get(p, key),
7328                None => curr,
7329            }
7330        };
7331
7332        // Operating activities (indirect method)
7333        // Depreciation add-back: approximate from FixedAssets decrease
7334        let fixed_asset_change = change("FixedAssets");
7335        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7336            -fixed_asset_change
7337        } else {
7338            Decimal::ZERO
7339        };
7340
7341        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7342        let ar_change = change("Receivables");
7343        let inventory_change = change("Inventory");
7344        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7345        let ap_change = change("Payables");
7346        let accrued_change = change("AccruedLiabilities");
7347
7348        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7349            + (-ap_change)
7350            + (-accrued_change);
7351
7352        // Investing activities
7353        let capex = if fixed_asset_change > Decimal::ZERO {
7354            -fixed_asset_change
7355        } else {
7356            Decimal::ZERO
7357        };
7358        let investing_cf = capex;
7359
7360        // Financing activities
7361        let debt_change = -change("LongTermDebt");
7362        let equity_change = -change("Equity");
7363        let financing_cf = debt_change + equity_change;
7364
7365        let net_change = operating_cf + investing_cf + financing_cf;
7366
7367        vec![
7368            CashFlowItem {
7369                item_code: "CF-NI".to_string(),
7370                label: "Net Income".to_string(),
7371                category: CashFlowCategory::Operating,
7372                amount: net_income,
7373                amount_prior: None,
7374                sort_order: 1,
7375                is_total: false,
7376            },
7377            CashFlowItem {
7378                item_code: "CF-DEP".to_string(),
7379                label: "Depreciation & Amortization".to_string(),
7380                category: CashFlowCategory::Operating,
7381                amount: depreciation_addback,
7382                amount_prior: None,
7383                sort_order: 2,
7384                is_total: false,
7385            },
7386            CashFlowItem {
7387                item_code: "CF-AR".to_string(),
7388                label: "Change in Accounts Receivable".to_string(),
7389                category: CashFlowCategory::Operating,
7390                amount: -ar_change,
7391                amount_prior: None,
7392                sort_order: 3,
7393                is_total: false,
7394            },
7395            CashFlowItem {
7396                item_code: "CF-AP".to_string(),
7397                label: "Change in Accounts Payable".to_string(),
7398                category: CashFlowCategory::Operating,
7399                amount: -ap_change,
7400                amount_prior: None,
7401                sort_order: 4,
7402                is_total: false,
7403            },
7404            CashFlowItem {
7405                item_code: "CF-INV".to_string(),
7406                label: "Change in Inventory".to_string(),
7407                category: CashFlowCategory::Operating,
7408                amount: -inventory_change,
7409                amount_prior: None,
7410                sort_order: 5,
7411                is_total: false,
7412            },
7413            CashFlowItem {
7414                item_code: "CF-OP".to_string(),
7415                label: "Net Cash from Operating Activities".to_string(),
7416                category: CashFlowCategory::Operating,
7417                amount: operating_cf,
7418                amount_prior: None,
7419                sort_order: 6,
7420                is_total: true,
7421            },
7422            CashFlowItem {
7423                item_code: "CF-CAPEX".to_string(),
7424                label: "Capital Expenditures".to_string(),
7425                category: CashFlowCategory::Investing,
7426                amount: capex,
7427                amount_prior: None,
7428                sort_order: 7,
7429                is_total: false,
7430            },
7431            CashFlowItem {
7432                item_code: "CF-INV-T".to_string(),
7433                label: "Net Cash from Investing Activities".to_string(),
7434                category: CashFlowCategory::Investing,
7435                amount: investing_cf,
7436                amount_prior: None,
7437                sort_order: 8,
7438                is_total: true,
7439            },
7440            CashFlowItem {
7441                item_code: "CF-DEBT".to_string(),
7442                label: "Net Borrowings / (Repayments)".to_string(),
7443                category: CashFlowCategory::Financing,
7444                amount: debt_change,
7445                amount_prior: None,
7446                sort_order: 9,
7447                is_total: false,
7448            },
7449            CashFlowItem {
7450                item_code: "CF-EQ".to_string(),
7451                label: "Equity Changes".to_string(),
7452                category: CashFlowCategory::Financing,
7453                amount: equity_change,
7454                amount_prior: None,
7455                sort_order: 10,
7456                is_total: false,
7457            },
7458            CashFlowItem {
7459                item_code: "CF-FIN-T".to_string(),
7460                label: "Net Cash from Financing Activities".to_string(),
7461                category: CashFlowCategory::Financing,
7462                amount: financing_cf,
7463                amount_prior: None,
7464                sort_order: 11,
7465                is_total: true,
7466            },
7467            CashFlowItem {
7468                item_code: "CF-NET".to_string(),
7469                label: "Net Change in Cash".to_string(),
7470                category: CashFlowCategory::Operating,
7471                amount: net_change,
7472                amount_prior: None,
7473                sort_order: 12,
7474                is_total: true,
7475            },
7476        ]
7477    }
7478
7479    /// Calculate net income from a set of trial balance entries.
7480    ///
7481    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7482    fn calculate_net_income_from_tb(
7483        tb: &[datasynth_generators::TrialBalanceEntry],
7484    ) -> rust_decimal::Decimal {
7485        use rust_decimal::Decimal;
7486
7487        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7488        for entry in tb {
7489            let net = entry.debit_balance - entry.credit_balance;
7490            *aggregated.entry(entry.category.clone()).or_default() += net;
7491        }
7492
7493        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7494        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7495        let opex = *aggregated
7496            .get("OperatingExpenses")
7497            .unwrap_or(&Decimal::ZERO);
7498        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7499        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7500
7501        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7502        // other_income is typically negative (credit), other_expenses is typically positive
7503        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7504        let tax_rate = Decimal::new(25, 2); // 0.25
7505        let tax = operating_income * tax_rate;
7506        operating_income - tax
7507    }
7508
7509    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7510    ///
7511    /// Uses the first two digits of the account code to classify into the categories
7512    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7513    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7514    /// OperatingExpenses, OtherIncome, OtherExpenses.
7515    /// Map an account code to the orchestrator's 13-bucket category string
7516    /// (`"Cash"` / `"Receivables"` / `"Inventory"` / `"FixedAssets"` /
7517    /// `"Payables"` / `"AccruedLiabilities"` / `"LongTermDebt"` /
7518    /// `"Equity"` / `"Revenue"` / `"CostOfSales"` / `"OperatingExpenses"`
7519    /// / `"OtherIncome"` / `"OtherExpenses"`).
7520    ///
7521    /// `framework` controls which numbering convention is applied:
7522    ///
7523    /// - `"us_gaap"` / `"ifrs"` / `"dual_reporting"` — US-style 4-digit
7524    ///   chart (1xxx assets, 2xxx liabilities, 3xxx equity, 4xxx revenue,
7525    ///   5xxx COGS, 6xxx OpEx, 7xxx other income, 8xxx other expense).
7526    /// - `"french_gaap"` — French PCG (1 = capital/liabilities, 2 = fixed
7527    ///   assets, 3 = inventory, 4 = third parties, 5 = cash, 6 = expenses,
7528    ///   7 = revenue).
7529    /// - `"german_gaap"` / `"hgb"` — German SKR04 (0 = fixed assets,
7530    ///   1 = current assets, 2 = equity, 3 = liabilities, 4 = revenue,
7531    ///   5 = COGS, 6 = OpEx, 7 = financial, 8 = tax/extraordinary).
7532    ///
7533    /// Unknown frameworks fall back to US-style.
7534    fn category_from_account_code(code: &str, framework: &str) -> String {
7535        match framework {
7536            "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7537            "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7538            _ => Self::us_gaap_category(code),
7539        }
7540        .to_string()
7541    }
7542
7543    fn us_gaap_category(code: &str) -> &'static str {
7544        let prefix: String = code.chars().take(2).collect();
7545        match prefix.as_str() {
7546            "10" => "Cash",
7547            "11" => "Receivables",
7548            "12" | "13" | "14" => "Inventory",
7549            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7550            "20" => "Payables",
7551            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7552            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7553            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7554            "40" | "41" | "42" | "43" | "44" => "Revenue",
7555            "50" | "51" | "52" => "CostOfSales",
7556            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7557                "OperatingExpenses"
7558            }
7559            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7560            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7561            _ => "OperatingExpenses",
7562        }
7563    }
7564
7565    /// SKR04 (German GAAP) prefix → orchestrator category.
7566    ///
7567    /// 0 = fixed assets, 1 = current assets (10-12 cash, 13-14 receivables,
7568    /// 15-19 inventory), 2 = equity, 3 = liabilities (3-31 payables,
7569    /// 32-37 accrued, 38-39 long-term debt), 4 = revenue, 5 = COGS,
7570    /// 6 = OpEx, 7 = financial income, 8 = tax/extraordinary expense.
7571    fn skr_category(code: &str) -> &'static str {
7572        let first = code.chars().next().and_then(|c| c.to_digit(10));
7573        let prefix: String = code.chars().take(2).collect();
7574        match first {
7575            Some(0) => "FixedAssets",
7576            Some(1) => match prefix.as_str() {
7577                "10" | "11" | "12" => "Cash",
7578                "13" | "14" => "Receivables",
7579                _ => "Inventory",
7580            },
7581            Some(2) => "Equity",
7582            Some(3) => match prefix.as_str() {
7583                "30" | "31" => "Payables",
7584                "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7585                _ => "LongTermDebt",
7586            },
7587            Some(4) => "Revenue",
7588            Some(5) => "CostOfSales",
7589            Some(6) => "OperatingExpenses",
7590            Some(7) => "OtherIncome",
7591            Some(8) => "OtherExpenses",
7592            _ => "OperatingExpenses",
7593        }
7594    }
7595
7596    /// French PCG prefix → orchestrator category.
7597    ///
7598    /// 10-14 = equity, 15-19 = liabilities (provisions, debts),
7599    /// 2 = fixed assets, 3 = inventory, 40 = payables, 41 = receivables,
7600    /// 42-49 = liabilities (personnel, tax, group), 5 = cash, 6 = expenses,
7601    /// 7 = revenue.
7602    fn pcg_category(code: &str) -> &'static str {
7603        let first = code.chars().next().and_then(|c| c.to_digit(10));
7604        let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7605        match first {
7606            Some(1) => match second {
7607                Some(0..=4) => "Equity",
7608                Some(5) => "AccruedLiabilities",
7609                _ => "LongTermDebt",
7610            },
7611            Some(2) => "FixedAssets",
7612            Some(3) => "Inventory",
7613            Some(4) => match second {
7614                Some(0) => "Payables",
7615                Some(1) => "Receivables",
7616                _ => "AccruedLiabilities",
7617            },
7618            Some(5) => "Cash",
7619            Some(6) => "OperatingExpenses",
7620            Some(7) => "Revenue",
7621            Some(8) | Some(9) => "OperatingExpenses",
7622            _ => "OperatingExpenses",
7623        }
7624    }
7625
7626    /// Test whether an account code maps to a balance-sheet line under
7627    /// the given framework. Drives the cumulative-vs-period bucketing in
7628    /// [`Self::build_cumulative_trial_balance`].
7629    ///
7630    /// Delegates to the framework-aware classifier in
7631    /// `datasynth-core::framework_accounts` so SKR (German) and PCG
7632    /// (French) codes are recognised, not silently routed through a
7633    /// US-style prefix table.
7634    fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7635        // `AccountType` here is the `balance::AccountType` imported at
7636        // the top of the file; `FrameworkAccounts::classify_account_type`
7637        // returns the same enum, so no cross-namespace mapping is needed.
7638        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7639        matches!(
7640            fa.classify_account_type(code),
7641            AccountType::Asset
7642                | AccountType::ContraAsset
7643                | AccountType::Liability
7644                | AccountType::ContraLiability
7645                | AccountType::Equity
7646                | AccountType::ContraEquity
7647        )
7648    }
7649
7650    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7651    fn phase_hr_data(
7652        &mut self,
7653        stats: &mut EnhancedGenerationStatistics,
7654    ) -> SynthResult<HrSnapshot> {
7655        if !self.phase_config.generate_hr {
7656            debug!("Phase 16: Skipped (HR generation disabled)");
7657            return Ok(HrSnapshot::default());
7658        }
7659
7660        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7661
7662        let seed = self.seed;
7663        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7664            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7665        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7666        let company_code = self
7667            .config
7668            .companies
7669            .first()
7670            .map(|c| c.code.as_str())
7671            .unwrap_or("1000");
7672        let currency = self
7673            .config
7674            .companies
7675            .first()
7676            .map(|c| c.currency.as_str())
7677            .unwrap_or("USD");
7678
7679        let employee_ids: Vec<String> = self
7680            .master_data
7681            .employees
7682            .iter()
7683            .map(|e| e.employee_id.clone())
7684            .collect();
7685
7686        if employee_ids.is_empty() {
7687            debug!("Phase 16: Skipped (no employees available)");
7688            return Ok(HrSnapshot::default());
7689        }
7690
7691        // Extract cost-center pool from master data employees for cross-reference
7692        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7693        let cost_center_ids: Vec<String> = self
7694            .master_data
7695            .employees
7696            .iter()
7697            .filter_map(|e| e.cost_center.clone())
7698            .collect::<std::collections::HashSet<_>>()
7699            .into_iter()
7700            .collect();
7701
7702        let mut snapshot = HrSnapshot::default();
7703
7704        // Generate payroll runs (one per month)
7705        if self.config.hr.payroll.enabled {
7706            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7707                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7708
7709            // Look up country pack for payroll deductions and labels
7710            let payroll_pack = self.primary_pack();
7711
7712            // Store the pack on the generator so generate() resolves
7713            // localized deduction rates and labels from it.
7714            payroll_gen.set_country_pack(payroll_pack.clone());
7715
7716            let employees_with_salary: Vec<(
7717                String,
7718                rust_decimal::Decimal,
7719                Option<String>,
7720                Option<String>,
7721            )> = self
7722                .master_data
7723                .employees
7724                .iter()
7725                .map(|e| {
7726                    // Use the employee's actual annual base salary.
7727                    // Fall back to $60,000 / yr if somehow zero.
7728                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7729                        e.base_salary
7730                    } else {
7731                        rust_decimal::Decimal::from(60_000)
7732                    };
7733                    (
7734                        e.employee_id.clone(),
7735                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7736                        e.cost_center.clone(),
7737                        e.department_id.clone(),
7738                    )
7739                })
7740                .collect();
7741
7742            // Use generate_with_changes when employee change history is available
7743            // so that salary adjustments, transfers, etc. are reflected in payroll.
7744            let change_history = &self.master_data.employee_change_history;
7745            let has_changes = !change_history.is_empty();
7746            if has_changes {
7747                debug!(
7748                    "Payroll will incorporate {} employee change events",
7749                    change_history.len()
7750                );
7751            }
7752
7753            for month in 0..self.config.global.period_months {
7754                let period_start = start_date + chrono::Months::new(month);
7755                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7756                let (run, items) = if has_changes {
7757                    payroll_gen.generate_with_changes(
7758                        company_code,
7759                        &employees_with_salary,
7760                        period_start,
7761                        period_end,
7762                        currency,
7763                        change_history,
7764                    )
7765                } else {
7766                    payroll_gen.generate(
7767                        company_code,
7768                        &employees_with_salary,
7769                        period_start,
7770                        period_end,
7771                        currency,
7772                    )
7773                };
7774                snapshot.payroll_runs.push(run);
7775                snapshot.payroll_run_count += 1;
7776                snapshot.payroll_line_item_count += items.len();
7777                snapshot.payroll_line_items.extend(items);
7778            }
7779        }
7780
7781        // Generate time entries
7782        if self.config.hr.time_attendance.enabled {
7783            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7784                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7785            // v3.4.2: when a temporal context is configured, time entries
7786            // respect holidays (not just weekends) and submitted_at lag
7787            // snaps to business days.
7788            if let Some(ctx) = &self.temporal_context {
7789                time_gen.set_temporal_context(Arc::clone(ctx));
7790            }
7791            let entries = time_gen.generate(
7792                &employee_ids,
7793                start_date,
7794                end_date,
7795                &self.config.hr.time_attendance,
7796            );
7797            snapshot.time_entry_count = entries.len();
7798            snapshot.time_entries = entries;
7799        }
7800
7801        // Generate expense reports
7802        if self.config.hr.expenses.enabled {
7803            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7804                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7805            expense_gen.set_country_pack(self.primary_pack().clone());
7806            // v3.4.2: snap submission / approval / paid / line-item dates
7807            // to business days when temporal_context is present.
7808            if let Some(ctx) = &self.temporal_context {
7809                expense_gen.set_temporal_context(Arc::clone(ctx));
7810            }
7811            let company_currency = self
7812                .config
7813                .companies
7814                .first()
7815                .map(|c| c.currency.as_str())
7816                .unwrap_or("USD");
7817            let reports = expense_gen.generate_with_currency(
7818                &employee_ids,
7819                start_date,
7820                end_date,
7821                &self.config.hr.expenses,
7822                company_currency,
7823            );
7824            snapshot.expense_report_count = reports.len();
7825            snapshot.expense_reports = reports;
7826        }
7827
7828        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7829        if self.config.hr.payroll.enabled {
7830            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7831            let employee_pairs: Vec<(String, String)> = self
7832                .master_data
7833                .employees
7834                .iter()
7835                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7836                .collect();
7837            let enrollments =
7838                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7839            snapshot.benefit_enrollment_count = enrollments.len();
7840            snapshot.benefit_enrollments = enrollments;
7841        }
7842
7843        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7844        if self.phase_config.generate_hr {
7845            let entity_name = self
7846                .config
7847                .companies
7848                .first()
7849                .map(|c| c.name.as_str())
7850                .unwrap_or("Entity");
7851            let period_months = self.config.global.period_months;
7852            let period_label = {
7853                let y = start_date.year();
7854                let m = start_date.month();
7855                if period_months >= 12 {
7856                    format!("FY{y}")
7857                } else {
7858                    format!("{y}-{m:02}")
7859                }
7860            };
7861            let reporting_date =
7862                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7863
7864            // Compute average annual salary from actual payroll data when available.
7865            // PayrollRun.total_gross covers all employees for one pay period; we sum
7866            // across all runs and divide by employee_count to get per-employee total,
7867            // then annualise for sub-annual periods.
7868            let avg_salary: Option<rust_decimal::Decimal> = {
7869                let employee_count = employee_ids.len();
7870                if self.config.hr.payroll.enabled
7871                    && employee_count > 0
7872                    && !snapshot.payroll_runs.is_empty()
7873                {
7874                    // Sum total gross pay across all payroll runs for this company
7875                    let total_gross: rust_decimal::Decimal = snapshot
7876                        .payroll_runs
7877                        .iter()
7878                        .filter(|r| r.company_code == company_code)
7879                        .map(|r| r.total_gross)
7880                        .sum();
7881                    if total_gross > rust_decimal::Decimal::ZERO {
7882                        // Annualise: total_gross covers `period_months` months of pay
7883                        let annual_total = if period_months > 0 && period_months < 12 {
7884                            total_gross * rust_decimal::Decimal::from(12u32)
7885                                / rust_decimal::Decimal::from(period_months)
7886                        } else {
7887                            total_gross
7888                        };
7889                        Some(
7890                            (annual_total / rust_decimal::Decimal::from(employee_count))
7891                                .round_dp(2),
7892                        )
7893                    } else {
7894                        None
7895                    }
7896                } else {
7897                    None
7898                }
7899            };
7900
7901            let mut pension_gen =
7902                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7903            let pension_snap = pension_gen.generate(
7904                company_code,
7905                entity_name,
7906                &period_label,
7907                reporting_date,
7908                employee_ids.len(),
7909                currency,
7910                avg_salary,
7911                period_months,
7912            );
7913            snapshot.pension_plan_count = pension_snap.plans.len();
7914            snapshot.pension_plans = pension_snap.plans;
7915            snapshot.pension_obligations = pension_snap.obligations;
7916            snapshot.pension_plan_assets = pension_snap.plan_assets;
7917            snapshot.pension_disclosures = pension_snap.disclosures;
7918            // Pension JEs are returned here so they can be added to entries
7919            // in the caller (stored temporarily on snapshot for transfer).
7920            // We embed them in the hr snapshot for simplicity; the orchestrator
7921            // will extract and extend `entries`.
7922            snapshot.pension_journal_entries = pension_snap.journal_entries;
7923        }
7924
7925        // Generate stock-based compensation (ASC 718 / IFRS 2)
7926        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7927            let period_months = self.config.global.period_months;
7928            let period_label = {
7929                let y = start_date.year();
7930                let m = start_date.month();
7931                if period_months >= 12 {
7932                    format!("FY{y}")
7933                } else {
7934                    format!("{y}-{m:02}")
7935                }
7936            };
7937            let reporting_date =
7938                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7939
7940            let mut stock_comp_gen =
7941                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7942            let stock_snap = stock_comp_gen.generate(
7943                company_code,
7944                &employee_ids,
7945                start_date,
7946                &period_label,
7947                reporting_date,
7948                currency,
7949            );
7950            snapshot.stock_grant_count = stock_snap.grants.len();
7951            snapshot.stock_grants = stock_snap.grants;
7952            snapshot.stock_comp_expenses = stock_snap.expenses;
7953            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7954        }
7955
7956        stats.payroll_run_count = snapshot.payroll_run_count;
7957        stats.time_entry_count = snapshot.time_entry_count;
7958        stats.expense_report_count = snapshot.expense_report_count;
7959        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7960        stats.pension_plan_count = snapshot.pension_plan_count;
7961        stats.stock_grant_count = snapshot.stock_grant_count;
7962
7963        info!(
7964            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7965            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7966            snapshot.time_entry_count, snapshot.expense_report_count,
7967            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7968            snapshot.stock_grant_count
7969        );
7970        self.check_resources_with_log("post-hr")?;
7971
7972        Ok(snapshot)
7973    }
7974
7975    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7976    fn phase_accounting_standards(
7977        &mut self,
7978        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7979        journal_entries: &[JournalEntry],
7980        stats: &mut EnhancedGenerationStatistics,
7981    ) -> SynthResult<AccountingStandardsSnapshot> {
7982        if !self.phase_config.generate_accounting_standards {
7983            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7984            return Ok(AccountingStandardsSnapshot::default());
7985        }
7986        info!("Phase 17: Generating Accounting Standards Data");
7987
7988        let seed = self.seed;
7989        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7990            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7991        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7992        let company_code = self
7993            .config
7994            .companies
7995            .first()
7996            .map(|c| c.code.as_str())
7997            .unwrap_or("1000");
7998        let currency = self
7999            .config
8000            .companies
8001            .first()
8002            .map(|c| c.currency.as_str())
8003            .unwrap_or("USD");
8004
8005        // Convert config framework to standards framework.
8006        // If the user explicitly set a framework in the YAML config, use that.
8007        // Otherwise, fall back to the country pack's accounting.framework field,
8008        // and if that is also absent or unrecognised, default to US GAAP.
8009        let framework = match self.config.accounting_standards.framework {
8010            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8011                datasynth_standards::framework::AccountingFramework::UsGaap
8012            }
8013            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8014                datasynth_standards::framework::AccountingFramework::Ifrs
8015            }
8016            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8017                datasynth_standards::framework::AccountingFramework::DualReporting
8018            }
8019            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8020                datasynth_standards::framework::AccountingFramework::FrenchGaap
8021            }
8022            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8023                datasynth_standards::framework::AccountingFramework::GermanGaap
8024            }
8025            None => {
8026                // Derive framework from the primary company's country pack
8027                let pack = self.primary_pack();
8028                let pack_fw = pack.accounting.framework.as_str();
8029                match pack_fw {
8030                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8031                    "dual_reporting" => {
8032                        datasynth_standards::framework::AccountingFramework::DualReporting
8033                    }
8034                    "french_gaap" => {
8035                        datasynth_standards::framework::AccountingFramework::FrenchGaap
8036                    }
8037                    "german_gaap" | "hgb" => {
8038                        datasynth_standards::framework::AccountingFramework::GermanGaap
8039                    }
8040                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
8041                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8042                }
8043            }
8044        };
8045
8046        let mut snapshot = AccountingStandardsSnapshot::default();
8047
8048        // Revenue recognition
8049        if self.config.accounting_standards.revenue_recognition.enabled {
8050            let customer_ids: Vec<String> = self
8051                .master_data
8052                .customers
8053                .iter()
8054                .map(|c| c.customer_id.clone())
8055                .collect();
8056
8057            if !customer_ids.is_empty() {
8058                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8059                let contracts = rev_gen.generate(
8060                    company_code,
8061                    &customer_ids,
8062                    start_date,
8063                    end_date,
8064                    currency,
8065                    &self.config.accounting_standards.revenue_recognition,
8066                    framework,
8067                );
8068                snapshot.revenue_contract_count = contracts.len();
8069                snapshot.contracts = contracts;
8070            }
8071        }
8072
8073        // Impairment testing
8074        if self.config.accounting_standards.impairment.enabled {
8075            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8076                .master_data
8077                .assets
8078                .iter()
8079                .map(|a| {
8080                    (
8081                        a.asset_id.clone(),
8082                        a.description.clone(),
8083                        a.acquisition_cost,
8084                    )
8085                })
8086                .collect();
8087
8088            if !asset_data.is_empty() {
8089                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8090                let tests = imp_gen.generate(
8091                    company_code,
8092                    &asset_data,
8093                    end_date,
8094                    &self.config.accounting_standards.impairment,
8095                    framework,
8096                );
8097                snapshot.impairment_test_count = tests.len();
8098                snapshot.impairment_tests = tests;
8099            }
8100        }
8101
8102        // Business combinations (IFRS 3 / ASC 805)
8103        if self
8104            .config
8105            .accounting_standards
8106            .business_combinations
8107            .enabled
8108        {
8109            let bc_config = &self.config.accounting_standards.business_combinations;
8110            let framework_str = match framework {
8111                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8112                _ => "US_GAAP",
8113            };
8114            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8115            let bc_snap = bc_gen.generate(
8116                company_code,
8117                currency,
8118                start_date,
8119                end_date,
8120                bc_config.acquisition_count,
8121                framework_str,
8122            );
8123            snapshot.business_combination_count = bc_snap.combinations.len();
8124            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8125            snapshot.business_combinations = bc_snap.combinations;
8126        }
8127
8128        // Expected Credit Loss (IFRS 9 / ASC 326)
8129        if self
8130            .config
8131            .accounting_standards
8132            .expected_credit_loss
8133            .enabled
8134        {
8135            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8136            let framework_str = match framework {
8137                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8138                _ => "ASC_326",
8139            };
8140
8141            // Use AR aging data from the subledger snapshot if available;
8142            // otherwise generate synthetic bucket exposures.
8143            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8144
8145            let mut ecl_gen = EclGenerator::new(seed + 43);
8146
8147            // Collect combined bucket totals across all company AR aging reports.
8148            let bucket_exposures: Vec<(
8149                datasynth_core::models::subledger::ar::AgingBucket,
8150                rust_decimal::Decimal,
8151            )> = if ar_aging_reports.is_empty() {
8152                // No AR aging data — synthesise plausible bucket exposures.
8153                use datasynth_core::models::subledger::ar::AgingBucket;
8154                vec![
8155                    (
8156                        AgingBucket::Current,
8157                        rust_decimal::Decimal::from(500_000_u32),
8158                    ),
8159                    (
8160                        AgingBucket::Days1To30,
8161                        rust_decimal::Decimal::from(120_000_u32),
8162                    ),
8163                    (
8164                        AgingBucket::Days31To60,
8165                        rust_decimal::Decimal::from(45_000_u32),
8166                    ),
8167                    (
8168                        AgingBucket::Days61To90,
8169                        rust_decimal::Decimal::from(15_000_u32),
8170                    ),
8171                    (
8172                        AgingBucket::Over90Days,
8173                        rust_decimal::Decimal::from(8_000_u32),
8174                    ),
8175                ]
8176            } else {
8177                use datasynth_core::models::subledger::ar::AgingBucket;
8178                // Sum bucket totals from all reports.
8179                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8180                    std::collections::HashMap::new();
8181                for report in ar_aging_reports {
8182                    for (bucket, amount) in &report.bucket_totals {
8183                        *totals.entry(*bucket).or_default() += amount;
8184                    }
8185                }
8186                AgingBucket::all()
8187                    .into_iter()
8188                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8189                    .collect()
8190            };
8191
8192            let ecl_snap = ecl_gen.generate(
8193                company_code,
8194                end_date,
8195                &bucket_exposures,
8196                ecl_config,
8197                &period_label,
8198                framework_str,
8199            );
8200
8201            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8202            snapshot.ecl_models = ecl_snap.ecl_models;
8203            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8204            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8205        }
8206
8207        // Provisions and contingencies (IAS 37 / ASC 450)
8208        {
8209            let framework_str = match framework {
8210                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8211                _ => "US_GAAP",
8212            };
8213
8214            // Compute actual revenue from the journal entries generated so far.
8215            // The `journal_entries` slice passed to this phase contains all GL entries
8216            // up to and including Period Close. Fall back to a minimum of 100_000 to
8217            // avoid degenerate zero-based provision amounts on first-period datasets.
8218            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8219                .max(rust_decimal::Decimal::from(100_000_u32));
8220
8221            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8222
8223            let mut prov_gen = ProvisionGenerator::new(seed + 44);
8224            let prov_snap = prov_gen.generate(
8225                company_code,
8226                currency,
8227                revenue_proxy,
8228                end_date,
8229                &period_label,
8230                framework_str,
8231                None, // prior_opening: no carry-forward data in single-period runs
8232            );
8233
8234            snapshot.provision_count = prov_snap.provisions.len();
8235            snapshot.provisions = prov_snap.provisions;
8236            snapshot.provision_movements = prov_snap.movements;
8237            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8238            snapshot.provision_journal_entries = prov_snap.journal_entries;
8239        }
8240
8241        // IAS 21 Functional Currency Translation
8242        // For each company whose functional currency differs from the presentation
8243        // currency, generate a CurrencyTranslationResult with CTA (OCI).
8244        {
8245            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8246
8247            let presentation_currency = self
8248                .config
8249                .global
8250                .presentation_currency
8251                .clone()
8252                .unwrap_or_else(|| self.config.global.group_currency.clone());
8253
8254            // Build a minimal rate table populated with approximate rates from
8255            // the FX model base rates (USD-based) so we can do the translation.
8256            let mut rate_table = FxRateTable::new(&presentation_currency);
8257
8258            // Populate with base rates against USD; if presentation_currency is
8259            // not USD we do a best-effort two-step conversion using the table's
8260            // triangulation support.
8261            let base_rates = base_rates_usd();
8262            for (ccy, rate) in &base_rates {
8263                rate_table.add_rate(FxRate::new(
8264                    ccy,
8265                    "USD",
8266                    RateType::Closing,
8267                    end_date,
8268                    *rate,
8269                    "SYNTHETIC",
8270                ));
8271                // Average rate = 98% of closing (approximation).
8272                // 0.98 = 98/100 = Decimal::new(98, 2)
8273                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8274                rate_table.add_rate(FxRate::new(
8275                    ccy,
8276                    "USD",
8277                    RateType::Average,
8278                    end_date,
8279                    avg,
8280                    "SYNTHETIC",
8281                ));
8282            }
8283
8284            let mut translation_results = Vec::new();
8285            for company in &self.config.companies {
8286                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
8287                // to ensure the translation produces non-trivial CTA amounts.
8288                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8289                    .max(rust_decimal::Decimal::from(100_000_u32));
8290
8291                let func_ccy = company
8292                    .functional_currency
8293                    .clone()
8294                    .unwrap_or_else(|| company.currency.clone());
8295
8296                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8297                    &company.code,
8298                    &func_ccy,
8299                    &presentation_currency,
8300                    &ias21_period_label,
8301                    end_date,
8302                    company_revenue,
8303                    &rate_table,
8304                );
8305                translation_results.push(result);
8306            }
8307
8308            snapshot.currency_translation_count = translation_results.len();
8309            snapshot.currency_translation_results = translation_results;
8310        }
8311
8312        stats.revenue_contract_count = snapshot.revenue_contract_count;
8313        stats.impairment_test_count = snapshot.impairment_test_count;
8314        stats.business_combination_count = snapshot.business_combination_count;
8315        stats.ecl_model_count = snapshot.ecl_model_count;
8316        stats.provision_count = snapshot.provision_count;
8317
8318        // ------------------------------------------------------------
8319        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
8320        // ------------------------------------------------------------
8321        if self.config.accounting_standards.leases.enabled {
8322            use datasynth_generators::standards::LeaseGenerator;
8323            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8324                .unwrap_or_else(|_| {
8325                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8326                });
8327            let framework =
8328                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8329            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8330            for company in &self.config.companies {
8331                let leases = lease_gen.generate(
8332                    &company.code,
8333                    start_date,
8334                    &self.config.accounting_standards.leases,
8335                    framework,
8336                );
8337                snapshot.lease_count += leases.len();
8338                snapshot.leases.extend(leases);
8339            }
8340            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8341        }
8342
8343        // ------------------------------------------------------------
8344        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
8345        // ------------------------------------------------------------
8346        if self.config.accounting_standards.fair_value.enabled {
8347            use datasynth_generators::standards::FairValueGenerator;
8348            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8349                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8350                + chrono::Months::new(self.config.global.period_months);
8351            let framework =
8352                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8353            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8354            for company in &self.config.companies {
8355                let measurements = fv_gen.generate(
8356                    &company.code,
8357                    end_date,
8358                    &company.currency,
8359                    &self.config.accounting_standards.fair_value,
8360                    framework,
8361                );
8362                snapshot.fair_value_measurement_count += measurements.len();
8363                snapshot.fair_value_measurements.extend(measurements);
8364            }
8365            info!(
8366                "v3.3.1 fair value measurements: {}",
8367                snapshot.fair_value_measurement_count
8368            );
8369        }
8370
8371        // ------------------------------------------------------------
8372        // v3.3.1: Framework reconciliation (dual reporting only)
8373        // ------------------------------------------------------------
8374        if self.config.accounting_standards.generate_differences
8375            && matches!(
8376                self.config.accounting_standards.framework,
8377                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8378            )
8379        {
8380            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8381            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8382                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8383                + chrono::Months::new(self.config.global.period_months);
8384            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8385            for company in &self.config.companies {
8386                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8387                snapshot.framework_difference_count += records.len();
8388                snapshot.framework_differences.extend(records);
8389                snapshot.framework_reconciliations.push(reconciliation);
8390            }
8391            info!(
8392                "v3.3.1 framework reconciliation: {} differences across {} entities",
8393                snapshot.framework_difference_count,
8394                snapshot.framework_reconciliations.len()
8395            );
8396        }
8397
8398        info!(
8399            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8400            snapshot.revenue_contract_count,
8401            snapshot.impairment_test_count,
8402            snapshot.business_combination_count,
8403            snapshot.ecl_model_count,
8404            snapshot.provision_count,
8405            snapshot.currency_translation_count,
8406            snapshot.lease_count,
8407            snapshot.fair_value_measurement_count,
8408            snapshot.framework_difference_count,
8409        );
8410        self.check_resources_with_log("post-accounting-standards")?;
8411
8412        Ok(snapshot)
8413    }
8414
8415    /// v3.3.1: helper to resolve the accounting-standards framework enum
8416    /// from config into the `datasynth_standards::framework::AccountingFramework`
8417    /// type expected by standards generators. Falls back to US GAAP.
8418    fn resolve_accounting_framework(
8419        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8420    ) -> datasynth_standards::framework::AccountingFramework {
8421        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8422        use datasynth_standards::framework::AccountingFramework as Fw;
8423        match cfg {
8424            Some(Cfg::Ifrs) => Fw::Ifrs,
8425            Some(Cfg::DualReporting) => Fw::DualReporting,
8426            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8427            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8428            _ => Fw::UsGaap,
8429        }
8430    }
8431
8432    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8433    fn phase_manufacturing(
8434        &mut self,
8435        stats: &mut EnhancedGenerationStatistics,
8436    ) -> SynthResult<ManufacturingSnapshot> {
8437        if !self.phase_config.generate_manufacturing {
8438            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8439            return Ok(ManufacturingSnapshot::default());
8440        }
8441        info!("Phase 18: Generating Manufacturing Data");
8442
8443        let seed = self.seed;
8444        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8445            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8446        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8447        let company_code = self
8448            .config
8449            .companies
8450            .first()
8451            .map(|c| c.code.as_str())
8452            .unwrap_or("1000");
8453
8454        let material_data: Vec<(String, String)> = self
8455            .master_data
8456            .materials
8457            .iter()
8458            .map(|m| (m.material_id.clone(), m.description.clone()))
8459            .collect();
8460
8461        if material_data.is_empty() {
8462            debug!("Phase 18: Skipped (no materials available)");
8463            return Ok(ManufacturingSnapshot::default());
8464        }
8465
8466        let mut snapshot = ManufacturingSnapshot::default();
8467
8468        // Generate production orders
8469        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8470        // v3.4.3: snap planned / actual / operation dates to business days.
8471        if let Some(ctx) = &self.temporal_context {
8472            prod_gen.set_temporal_context(Arc::clone(ctx));
8473        }
8474        let production_orders = prod_gen.generate(
8475            company_code,
8476            &material_data,
8477            start_date,
8478            end_date,
8479            &self.config.manufacturing.production_orders,
8480            &self.config.manufacturing.costing,
8481            &self.config.manufacturing.routing,
8482        );
8483        snapshot.production_order_count = production_orders.len();
8484
8485        // Generate quality inspections from production orders
8486        let inspection_data: Vec<(String, String, String)> = production_orders
8487            .iter()
8488            .map(|po| {
8489                (
8490                    po.order_id.clone(),
8491                    po.material_id.clone(),
8492                    po.material_description.clone(),
8493                )
8494            })
8495            .collect();
8496
8497        snapshot.production_orders = production_orders;
8498
8499        if !inspection_data.is_empty() {
8500            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8501            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8502            snapshot.quality_inspection_count = inspections.len();
8503            snapshot.quality_inspections = inspections;
8504        }
8505
8506        // Generate cycle counts (one per month)
8507        let storage_locations: Vec<(String, String)> = material_data
8508            .iter()
8509            .enumerate()
8510            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8511            .collect();
8512
8513        let employee_ids: Vec<String> = self
8514            .master_data
8515            .employees
8516            .iter()
8517            .map(|e| e.employee_id.clone())
8518            .collect();
8519        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8520            .with_employee_pool(employee_ids);
8521        let mut cycle_count_total = 0usize;
8522        for month in 0..self.config.global.period_months {
8523            let count_date = start_date + chrono::Months::new(month);
8524            let items_per_count = storage_locations.len().clamp(10, 50);
8525            let cc = cc_gen.generate(
8526                company_code,
8527                &storage_locations,
8528                count_date,
8529                items_per_count,
8530            );
8531            snapshot.cycle_counts.push(cc);
8532            cycle_count_total += 1;
8533        }
8534        snapshot.cycle_count_count = cycle_count_total;
8535
8536        // Generate BOM components
8537        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8538        let bom_components = bom_gen.generate(company_code, &material_data);
8539        snapshot.bom_component_count = bom_components.len();
8540        snapshot.bom_components = bom_components;
8541
8542        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8543        let currency = self
8544            .config
8545            .companies
8546            .first()
8547            .map(|c| c.currency.as_str())
8548            .unwrap_or("USD");
8549        let production_order_ids: Vec<String> = snapshot
8550            .production_orders
8551            .iter()
8552            .map(|po| po.order_id.clone())
8553            .collect();
8554        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8555        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8556            company_code,
8557            &material_data,
8558            start_date,
8559            end_date,
8560            2,
8561            currency,
8562            &production_order_ids,
8563        );
8564        snapshot.inventory_movement_count = inventory_movements.len();
8565        snapshot.inventory_movements = inventory_movements;
8566
8567        stats.production_order_count = snapshot.production_order_count;
8568        stats.quality_inspection_count = snapshot.quality_inspection_count;
8569        stats.cycle_count_count = snapshot.cycle_count_count;
8570        stats.bom_component_count = snapshot.bom_component_count;
8571        stats.inventory_movement_count = snapshot.inventory_movement_count;
8572
8573        info!(
8574            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8575            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8576            snapshot.bom_component_count, snapshot.inventory_movement_count
8577        );
8578        self.check_resources_with_log("post-manufacturing")?;
8579
8580        Ok(snapshot)
8581    }
8582
8583    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8584    fn phase_sales_kpi_budgets(
8585        &mut self,
8586        coa: &Arc<ChartOfAccounts>,
8587        financial_reporting: &FinancialReportingSnapshot,
8588        stats: &mut EnhancedGenerationStatistics,
8589    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8590        if !self.phase_config.generate_sales_kpi_budgets {
8591            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8592            return Ok(SalesKpiBudgetsSnapshot::default());
8593        }
8594        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8595
8596        let seed = self.seed;
8597        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8598            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8599        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8600        let company_code = self
8601            .config
8602            .companies
8603            .first()
8604            .map(|c| c.code.as_str())
8605            .unwrap_or("1000");
8606
8607        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8608
8609        // Sales Quotes
8610        if self.config.sales_quotes.enabled {
8611            let customer_data: Vec<(String, String)> = self
8612                .master_data
8613                .customers
8614                .iter()
8615                .map(|c| (c.customer_id.clone(), c.name.clone()))
8616                .collect();
8617            let material_data: Vec<(String, String)> = self
8618                .master_data
8619                .materials
8620                .iter()
8621                .map(|m| (m.material_id.clone(), m.description.clone()))
8622                .collect();
8623
8624            if !customer_data.is_empty() && !material_data.is_empty() {
8625                let employee_ids: Vec<String> = self
8626                    .master_data
8627                    .employees
8628                    .iter()
8629                    .map(|e| e.employee_id.clone())
8630                    .collect();
8631                let customer_ids: Vec<String> = self
8632                    .master_data
8633                    .customers
8634                    .iter()
8635                    .map(|c| c.customer_id.clone())
8636                    .collect();
8637                let company_currency = self
8638                    .config
8639                    .companies
8640                    .first()
8641                    .map(|c| c.currency.as_str())
8642                    .unwrap_or("USD");
8643
8644                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8645                    .with_pools(employee_ids, customer_ids);
8646                let quotes = quote_gen.generate_with_currency(
8647                    company_code,
8648                    &customer_data,
8649                    &material_data,
8650                    start_date,
8651                    end_date,
8652                    &self.config.sales_quotes,
8653                    company_currency,
8654                );
8655                snapshot.sales_quote_count = quotes.len();
8656                snapshot.sales_quotes = quotes;
8657            }
8658        }
8659
8660        // Management KPIs
8661        if self.config.financial_reporting.management_kpis.enabled {
8662            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8663            let mut kpis = kpi_gen.generate(
8664                company_code,
8665                start_date,
8666                end_date,
8667                &self.config.financial_reporting.management_kpis,
8668            );
8669
8670            // Override financial KPIs with actual data from financial statements
8671            {
8672                use rust_decimal::Decimal;
8673
8674                if let Some(income_stmt) =
8675                    financial_reporting.financial_statements.iter().find(|fs| {
8676                        fs.statement_type == StatementType::IncomeStatement
8677                            && fs.company_code == company_code
8678                    })
8679                {
8680                    // Extract revenue and COGS from income statement line items
8681                    let total_revenue: Decimal = income_stmt
8682                        .line_items
8683                        .iter()
8684                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8685                        .map(|li| li.amount)
8686                        .sum();
8687                    let total_cogs: Decimal = income_stmt
8688                        .line_items
8689                        .iter()
8690                        .filter(|li| {
8691                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8692                                && !li.is_total
8693                        })
8694                        .map(|li| li.amount.abs())
8695                        .sum();
8696                    let total_opex: Decimal = income_stmt
8697                        .line_items
8698                        .iter()
8699                        .filter(|li| {
8700                            li.section.contains("Expense")
8701                                && !li.is_total
8702                                && !li.section.contains("Cost")
8703                        })
8704                        .map(|li| li.amount.abs())
8705                        .sum();
8706
8707                    if total_revenue > Decimal::ZERO {
8708                        let hundred = Decimal::from(100);
8709                        let gross_margin_pct =
8710                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8711                        let operating_income = total_revenue - total_cogs - total_opex;
8712                        let op_margin_pct =
8713                            (operating_income * hundred / total_revenue).round_dp(2);
8714
8715                        // Override gross margin and operating margin KPIs
8716                        for kpi in &mut kpis {
8717                            if kpi.name == "Gross Margin" {
8718                                kpi.value = gross_margin_pct;
8719                            } else if kpi.name == "Operating Margin" {
8720                                kpi.value = op_margin_pct;
8721                            }
8722                        }
8723                    }
8724                }
8725
8726                // Override Current Ratio from balance sheet
8727                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8728                    fs.statement_type == StatementType::BalanceSheet
8729                        && fs.company_code == company_code
8730                }) {
8731                    let current_assets: Decimal = bs
8732                        .line_items
8733                        .iter()
8734                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8735                        .map(|li| li.amount)
8736                        .sum();
8737                    let current_liabilities: Decimal = bs
8738                        .line_items
8739                        .iter()
8740                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8741                        .map(|li| li.amount.abs())
8742                        .sum();
8743
8744                    if current_liabilities > Decimal::ZERO {
8745                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8746                        for kpi in &mut kpis {
8747                            if kpi.name == "Current Ratio" {
8748                                kpi.value = current_ratio;
8749                            }
8750                        }
8751                    }
8752                }
8753            }
8754
8755            snapshot.kpi_count = kpis.len();
8756            snapshot.kpis = kpis;
8757        }
8758
8759        // Budgets
8760        if self.config.financial_reporting.budgets.enabled {
8761            let account_data: Vec<(String, String)> = coa
8762                .accounts
8763                .iter()
8764                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8765                .collect();
8766
8767            if !account_data.is_empty() {
8768                let fiscal_year = start_date.year() as u32;
8769                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8770                let budget = budget_gen.generate(
8771                    company_code,
8772                    fiscal_year,
8773                    &account_data,
8774                    &self.config.financial_reporting.budgets,
8775                );
8776                snapshot.budget_line_count = budget.line_items.len();
8777                snapshot.budgets.push(budget);
8778            }
8779        }
8780
8781        stats.sales_quote_count = snapshot.sales_quote_count;
8782        stats.kpi_count = snapshot.kpi_count;
8783        stats.budget_line_count = snapshot.budget_line_count;
8784
8785        info!(
8786            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8787            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8788        );
8789        self.check_resources_with_log("post-sales-kpi-budgets")?;
8790
8791        Ok(snapshot)
8792    }
8793
8794    /// Compute pre-tax income for a single company from actual journal entries.
8795    ///
8796    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8797    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8798    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8799    /// and the period-close engine so that all three use a consistent definition.
8800    fn compute_pre_tax_income(
8801        company_code: &str,
8802        journal_entries: &[JournalEntry],
8803    ) -> rust_decimal::Decimal {
8804        use datasynth_core::accounts::AccountCategory;
8805        use rust_decimal::Decimal;
8806
8807        let mut total_revenue = Decimal::ZERO;
8808        let mut total_expenses = Decimal::ZERO;
8809
8810        for je in journal_entries {
8811            if je.header.company_code != company_code {
8812                continue;
8813            }
8814            for line in &je.lines {
8815                let cat = AccountCategory::from_account(&line.gl_account);
8816                match cat {
8817                    AccountCategory::Revenue => {
8818                        total_revenue += line.credit_amount - line.debit_amount;
8819                    }
8820                    AccountCategory::Cogs
8821                    | AccountCategory::OperatingExpense
8822                    | AccountCategory::OtherIncomeExpense => {
8823                        total_expenses += line.debit_amount - line.credit_amount;
8824                    }
8825                    _ => {}
8826                }
8827            }
8828        }
8829
8830        let pti = (total_revenue - total_expenses).round_dp(2);
8831        if pti == rust_decimal::Decimal::ZERO {
8832            // No income statement activity yet — fall back to a synthetic value so the
8833            // tax provision generator can still produce meaningful output.
8834            rust_decimal::Decimal::from(1_000_000u32)
8835        } else {
8836            pti
8837        }
8838    }
8839
8840    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8841    fn phase_tax_generation(
8842        &mut self,
8843        document_flows: &DocumentFlowSnapshot,
8844        journal_entries: &[JournalEntry],
8845        stats: &mut EnhancedGenerationStatistics,
8846    ) -> SynthResult<TaxSnapshot> {
8847        if !self.phase_config.generate_tax {
8848            debug!("Phase 20: Skipped (tax generation disabled)");
8849            return Ok(TaxSnapshot::default());
8850        }
8851        info!("Phase 20: Generating Tax Data");
8852
8853        let seed = self.seed;
8854        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8855            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8856        let fiscal_year = start_date.year();
8857        let company_code = self
8858            .config
8859            .companies
8860            .first()
8861            .map(|c| c.code.as_str())
8862            .unwrap_or("1000");
8863
8864        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8865            seed + 370,
8866            self.config.tax.clone(),
8867        );
8868
8869        let pack = self.primary_pack().clone();
8870        let (jurisdictions, codes) =
8871            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8872
8873        // Generate tax provisions for each company
8874        let mut provisions = Vec::new();
8875        if self.config.tax.provisions.enabled {
8876            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8877            for company in &self.config.companies {
8878                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8879                let statutory_rate = rust_decimal::Decimal::new(
8880                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8881                    2,
8882                );
8883                let provision = provision_gen.generate(
8884                    &company.code,
8885                    start_date,
8886                    pre_tax_income,
8887                    statutory_rate,
8888                );
8889                provisions.push(provision);
8890            }
8891        }
8892
8893        // Generate tax lines from document invoices
8894        let mut tax_lines = Vec::new();
8895        if !codes.is_empty() {
8896            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8897                datasynth_generators::TaxLineGeneratorConfig::default(),
8898                codes.clone(),
8899                seed + 372,
8900            );
8901
8902            // Tax lines from vendor invoices (input tax)
8903            // Use the first company's country as buyer country
8904            let buyer_country = self
8905                .config
8906                .companies
8907                .first()
8908                .map(|c| c.country.as_str())
8909                .unwrap_or("US");
8910            for vi in &document_flows.vendor_invoices {
8911                let lines = tax_line_gen.generate_for_document(
8912                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8913                    &vi.header.document_id,
8914                    buyer_country, // seller approx same country
8915                    buyer_country,
8916                    vi.payable_amount,
8917                    vi.header.document_date,
8918                    None,
8919                );
8920                tax_lines.extend(lines);
8921            }
8922
8923            // Tax lines from customer invoices (output tax)
8924            for ci in &document_flows.customer_invoices {
8925                let lines = tax_line_gen.generate_for_document(
8926                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8927                    &ci.header.document_id,
8928                    buyer_country, // seller is the company
8929                    buyer_country,
8930                    ci.total_gross_amount,
8931                    ci.header.document_date,
8932                    None,
8933                );
8934                tax_lines.extend(lines);
8935            }
8936        }
8937
8938        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8939        let deferred_tax = {
8940            let companies: Vec<(&str, &str)> = self
8941                .config
8942                .companies
8943                .iter()
8944                .map(|c| (c.code.as_str(), c.country.as_str()))
8945                .collect();
8946            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8947            deferred_gen.generate(&companies, start_date, journal_entries)
8948        };
8949
8950        // Build a document_id → posting_date map so each tax JE uses its
8951        // source document's date rather than a blanket period-end date.
8952        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8953            std::collections::HashMap::new();
8954        for vi in &document_flows.vendor_invoices {
8955            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8956        }
8957        for ci in &document_flows.customer_invoices {
8958            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8959        }
8960
8961        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8962        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8963        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8964            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8965                &tax_lines,
8966                company_code,
8967                &doc_dates,
8968                end_date,
8969            );
8970            debug!("Generated {} tax posting JEs", jes.len());
8971            jes
8972        } else {
8973            Vec::new()
8974        };
8975
8976        let snapshot = TaxSnapshot {
8977            jurisdiction_count: jurisdictions.len(),
8978            code_count: codes.len(),
8979            jurisdictions,
8980            codes,
8981            tax_provisions: provisions,
8982            tax_lines,
8983            tax_returns: Vec::new(),
8984            withholding_records: Vec::new(),
8985            tax_anomaly_labels: Vec::new(),
8986            deferred_tax,
8987            tax_posting_journal_entries,
8988        };
8989
8990        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8991        stats.tax_code_count = snapshot.code_count;
8992        stats.tax_provision_count = snapshot.tax_provisions.len();
8993        stats.tax_line_count = snapshot.tax_lines.len();
8994
8995        info!(
8996            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8997            snapshot.jurisdiction_count,
8998            snapshot.code_count,
8999            snapshot.tax_provisions.len(),
9000            snapshot.deferred_tax.temporary_differences.len(),
9001            snapshot.deferred_tax.journal_entries.len(),
9002            snapshot.tax_posting_journal_entries.len(),
9003        );
9004        self.check_resources_with_log("post-tax")?;
9005
9006        Ok(snapshot)
9007    }
9008
9009    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
9010    fn phase_esg_generation(
9011        &mut self,
9012        document_flows: &DocumentFlowSnapshot,
9013        manufacturing: &ManufacturingSnapshot,
9014        stats: &mut EnhancedGenerationStatistics,
9015    ) -> SynthResult<EsgSnapshot> {
9016        if !self.phase_config.generate_esg {
9017            debug!("Phase 21: Skipped (ESG generation disabled)");
9018            return Ok(EsgSnapshot::default());
9019        }
9020        let degradation = self.check_resources()?;
9021        if degradation >= DegradationLevel::Reduced {
9022            debug!(
9023                "Phase skipped due to resource pressure (degradation: {:?})",
9024                degradation
9025            );
9026            return Ok(EsgSnapshot::default());
9027        }
9028        info!("Phase 21: Generating ESG Data");
9029
9030        let seed = self.seed;
9031        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9032            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9033        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9034        let entity_id = self
9035            .config
9036            .companies
9037            .first()
9038            .map(|c| c.code.as_str())
9039            .unwrap_or("1000");
9040
9041        let esg_cfg = &self.config.esg;
9042        let mut snapshot = EsgSnapshot::default();
9043
9044        // Energy consumption (feeds into scope 1 & 2 emissions)
9045        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9046            esg_cfg.environmental.energy.clone(),
9047            seed + 80,
9048        );
9049        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9050
9051        // Water usage
9052        let facility_count = esg_cfg.environmental.energy.facility_count;
9053        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9054        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9055
9056        // Waste
9057        let mut waste_gen = datasynth_generators::WasteGenerator::new(
9058            seed + 82,
9059            esg_cfg.environmental.waste.diversion_target,
9060            facility_count,
9061        );
9062        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9063
9064        // Emissions (scope 1, 2, 3)
9065        let mut emission_gen =
9066            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9067
9068        // Build EnergyInput from energy_records
9069        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9070            .iter()
9071            .map(|e| datasynth_generators::EnergyInput {
9072                facility_id: e.facility_id.clone(),
9073                energy_type: match e.energy_source {
9074                    EnergySourceType::NaturalGas => {
9075                        datasynth_generators::EnergyInputType::NaturalGas
9076                    }
9077                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9078                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9079                    _ => datasynth_generators::EnergyInputType::Electricity,
9080                },
9081                consumption_kwh: e.consumption_kwh,
9082                period: e.period,
9083            })
9084            .collect();
9085
9086        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
9087        if !manufacturing.production_orders.is_empty() {
9088            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9089                &manufacturing.production_orders,
9090                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
9091                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
9092            );
9093            if !mfg_energy.is_empty() {
9094                info!(
9095                    "ESG: {} energy inputs derived from {} production orders",
9096                    mfg_energy.len(),
9097                    manufacturing.production_orders.len(),
9098                );
9099                energy_inputs.extend(mfg_energy);
9100            }
9101        }
9102
9103        let mut emissions = Vec::new();
9104        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9105        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9106
9107        // Scope 3: use vendor spend data from actual payments
9108        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9109            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9110            for payment in &document_flows.payments {
9111                if payment.is_vendor {
9112                    *totals
9113                        .entry(payment.business_partner_id.clone())
9114                        .or_default() += payment.amount;
9115                }
9116            }
9117            totals
9118        };
9119        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9120            .master_data
9121            .vendors
9122            .iter()
9123            .map(|v| {
9124                let spend = vendor_payment_totals
9125                    .get(&v.vendor_id)
9126                    .copied()
9127                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9128                datasynth_generators::VendorSpendInput {
9129                    vendor_id: v.vendor_id.clone(),
9130                    category: format!("{:?}", v.vendor_type).to_lowercase(),
9131                    spend,
9132                    country: v.country.clone(),
9133                }
9134            })
9135            .collect();
9136        if !vendor_spend.is_empty() {
9137            emissions.extend(emission_gen.generate_scope3_purchased_goods(
9138                entity_id,
9139                &vendor_spend,
9140                start_date,
9141                end_date,
9142            ));
9143        }
9144
9145        // Business travel & commuting (scope 3)
9146        let headcount = self.master_data.employees.len() as u32;
9147        if headcount > 0 {
9148            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9149            emissions.extend(emission_gen.generate_scope3_business_travel(
9150                entity_id,
9151                travel_spend,
9152                start_date,
9153            ));
9154            emissions
9155                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9156        }
9157
9158        snapshot.emission_count = emissions.len();
9159        snapshot.emissions = emissions;
9160        snapshot.energy = energy_records;
9161
9162        // Social: Workforce diversity, pay equity, safety
9163        let mut workforce_gen =
9164            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9165        let total_headcount = headcount.max(100);
9166        snapshot.diversity =
9167            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9168        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9169
9170        // v2.4: Derive additional workforce diversity metrics from actual employee data
9171        if !self.master_data.employees.is_empty() {
9172            let hr_diversity = workforce_gen.generate_diversity_from_employees(
9173                entity_id,
9174                &self.master_data.employees,
9175                end_date,
9176            );
9177            if !hr_diversity.is_empty() {
9178                info!(
9179                    "ESG: {} diversity metrics derived from {} actual employees",
9180                    hr_diversity.len(),
9181                    self.master_data.employees.len(),
9182                );
9183                snapshot.diversity.extend(hr_diversity);
9184            }
9185        }
9186
9187        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9188            entity_id,
9189            facility_count,
9190            start_date,
9191            end_date,
9192        );
9193
9194        // Compute safety metrics
9195        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
9196        let safety_metric = workforce_gen.compute_safety_metrics(
9197            entity_id,
9198            &snapshot.safety_incidents,
9199            total_hours,
9200            start_date,
9201        );
9202        snapshot.safety_metrics = vec![safety_metric];
9203
9204        // Governance
9205        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9206            seed + 85,
9207            esg_cfg.governance.board_size,
9208            esg_cfg.governance.independence_target,
9209        );
9210        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9211
9212        // Supplier ESG assessments
9213        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9214            esg_cfg.supply_chain_esg.clone(),
9215            seed + 86,
9216        );
9217        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9218            .master_data
9219            .vendors
9220            .iter()
9221            .map(|v| datasynth_generators::VendorInput {
9222                vendor_id: v.vendor_id.clone(),
9223                country: v.country.clone(),
9224                industry: format!("{:?}", v.vendor_type).to_lowercase(),
9225                quality_score: None,
9226            })
9227            .collect();
9228        snapshot.supplier_assessments =
9229            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9230
9231        // Disclosures
9232        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9233            seed + 87,
9234            esg_cfg.reporting.clone(),
9235            esg_cfg.climate_scenarios.clone(),
9236        );
9237        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9238        snapshot.disclosures = disclosure_gen.generate_disclosures(
9239            entity_id,
9240            &snapshot.materiality,
9241            start_date,
9242            end_date,
9243        );
9244        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9245        snapshot.disclosure_count = snapshot.disclosures.len();
9246
9247        // Anomaly injection
9248        if esg_cfg.anomaly_rate > 0.0 {
9249            let mut anomaly_injector =
9250                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9251            let mut labels = Vec::new();
9252            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9253            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9254            labels.extend(
9255                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9256            );
9257            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9258            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9259            snapshot.anomaly_labels = labels;
9260        }
9261
9262        stats.esg_emission_count = snapshot.emission_count;
9263        stats.esg_disclosure_count = snapshot.disclosure_count;
9264
9265        info!(
9266            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9267            snapshot.emission_count,
9268            snapshot.disclosure_count,
9269            snapshot.supplier_assessments.len()
9270        );
9271        self.check_resources_with_log("post-esg")?;
9272
9273        Ok(snapshot)
9274    }
9275
9276    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
9277    fn phase_treasury_data(
9278        &mut self,
9279        document_flows: &DocumentFlowSnapshot,
9280        subledger: &SubledgerSnapshot,
9281        intercompany: &IntercompanySnapshot,
9282        stats: &mut EnhancedGenerationStatistics,
9283    ) -> SynthResult<TreasurySnapshot> {
9284        if !self.phase_config.generate_treasury {
9285            debug!("Phase 22: Skipped (treasury generation disabled)");
9286            return Ok(TreasurySnapshot::default());
9287        }
9288        let degradation = self.check_resources()?;
9289        if degradation >= DegradationLevel::Reduced {
9290            debug!(
9291                "Phase skipped due to resource pressure (degradation: {:?})",
9292                degradation
9293            );
9294            return Ok(TreasurySnapshot::default());
9295        }
9296        info!("Phase 22: Generating Treasury Data");
9297
9298        let seed = self.seed;
9299        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9300            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9301        let currency = self
9302            .config
9303            .companies
9304            .first()
9305            .map(|c| c.currency.as_str())
9306            .unwrap_or("USD");
9307        let entity_id = self
9308            .config
9309            .companies
9310            .first()
9311            .map(|c| c.code.as_str())
9312            .unwrap_or("1000");
9313
9314        let mut snapshot = TreasurySnapshot::default();
9315
9316        // Generate debt instruments
9317        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9318            self.config.treasury.debt.clone(),
9319            seed + 90,
9320        );
9321        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9322
9323        // Generate hedging instruments (IR swaps for floating-rate debt)
9324        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9325            self.config.treasury.hedging.clone(),
9326            seed + 91,
9327        );
9328        for debt in &snapshot.debt_instruments {
9329            if debt.rate_type == InterestRateType::Variable {
9330                let swap = hedge_gen.generate_ir_swap(
9331                    currency,
9332                    debt.principal,
9333                    debt.origination_date,
9334                    debt.maturity_date,
9335                );
9336                snapshot.hedging_instruments.push(swap);
9337            }
9338        }
9339
9340        // Build FX exposures from foreign-currency payments and generate
9341        // FX forwards + hedge relationship designations via generate() API.
9342        {
9343            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9344            for payment in &document_flows.payments {
9345                if payment.currency != currency {
9346                    let entry = fx_map
9347                        .entry(payment.currency.clone())
9348                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9349                    entry.0 += payment.amount;
9350                    // Use the latest settlement date among grouped payments
9351                    if payment.header.document_date > entry.1 {
9352                        entry.1 = payment.header.document_date;
9353                    }
9354                }
9355            }
9356            if !fx_map.is_empty() {
9357                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9358                    .into_iter()
9359                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9360                        datasynth_generators::treasury::FxExposure {
9361                            currency_pair: format!("{foreign_ccy}/{currency}"),
9362                            foreign_currency: foreign_ccy,
9363                            net_amount,
9364                            settlement_date,
9365                            description: "AP payment FX exposure".to_string(),
9366                        }
9367                    })
9368                    .collect();
9369                let (fx_instruments, fx_relationships) =
9370                    hedge_gen.generate(start_date, &fx_exposures);
9371                snapshot.hedging_instruments.extend(fx_instruments);
9372                snapshot.hedge_relationships.extend(fx_relationships);
9373            }
9374        }
9375
9376        // Inject anomalies if configured
9377        if self.config.treasury.anomaly_rate > 0.0 {
9378            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9379                seed + 92,
9380                self.config.treasury.anomaly_rate,
9381            );
9382            let mut labels = Vec::new();
9383            labels.extend(
9384                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9385            );
9386            snapshot.treasury_anomaly_labels = labels;
9387        }
9388
9389        // Generate cash positions from payment flows
9390        if self.config.treasury.cash_positioning.enabled {
9391            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9392
9393            // AP payments as outflows
9394            for payment in &document_flows.payments {
9395                cash_flows.push(datasynth_generators::treasury::CashFlow {
9396                    date: payment.header.document_date,
9397                    account_id: format!("{entity_id}-MAIN"),
9398                    amount: payment.amount,
9399                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9400                });
9401            }
9402
9403            // Customer receipts (from O2C chains) as inflows
9404            for chain in &document_flows.o2c_chains {
9405                if let Some(ref receipt) = chain.customer_receipt {
9406                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9407                        date: receipt.header.document_date,
9408                        account_id: format!("{entity_id}-MAIN"),
9409                        amount: receipt.amount,
9410                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9411                    });
9412                }
9413                // Remainder receipts (follow-up to partial payments)
9414                for receipt in &chain.remainder_receipts {
9415                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9416                        date: receipt.header.document_date,
9417                        account_id: format!("{entity_id}-MAIN"),
9418                        amount: receipt.amount,
9419                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9420                    });
9421                }
9422            }
9423
9424            if !cash_flows.is_empty() {
9425                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9426                    self.config.treasury.cash_positioning.clone(),
9427                    seed + 93,
9428                );
9429                let account_id = format!("{entity_id}-MAIN");
9430                snapshot.cash_positions = cash_gen.generate(
9431                    entity_id,
9432                    &account_id,
9433                    currency,
9434                    &cash_flows,
9435                    start_date,
9436                    start_date + chrono::Months::new(self.config.global.period_months),
9437                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9438                );
9439            }
9440        }
9441
9442        // Generate cash forecasts from AR/AP aging
9443        if self.config.treasury.cash_forecasting.enabled {
9444            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9445
9446            // Build AR aging items from subledger AR invoices
9447            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9448                .ar_invoices
9449                .iter()
9450                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9451                .map(|inv| {
9452                    let days_past_due = if inv.due_date < end_date {
9453                        (end_date - inv.due_date).num_days().max(0) as u32
9454                    } else {
9455                        0
9456                    };
9457                    datasynth_generators::treasury::ArAgingItem {
9458                        expected_date: inv.due_date,
9459                        amount: inv.amount_remaining,
9460                        days_past_due,
9461                        document_id: inv.invoice_number.clone(),
9462                    }
9463                })
9464                .collect();
9465
9466            // Build AP aging items from subledger AP invoices
9467            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9468                .ap_invoices
9469                .iter()
9470                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9471                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9472                    payment_date: inv.due_date,
9473                    amount: inv.amount_remaining,
9474                    document_id: inv.invoice_number.clone(),
9475                })
9476                .collect();
9477
9478            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9479                self.config.treasury.cash_forecasting.clone(),
9480                seed + 94,
9481            );
9482            let forecast = forecast_gen.generate(
9483                entity_id,
9484                currency,
9485                end_date,
9486                &ar_items,
9487                &ap_items,
9488                &[], // scheduled disbursements - empty for now
9489            );
9490            snapshot.cash_forecasts.push(forecast);
9491        }
9492
9493        // Generate cash pools and sweeps
9494        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9495            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9496            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9497                self.config.treasury.cash_pooling.clone(),
9498                seed + 95,
9499            );
9500
9501            // Create a pool from available accounts
9502            let account_ids: Vec<String> = snapshot
9503                .cash_positions
9504                .iter()
9505                .map(|cp| cp.bank_account_id.clone())
9506                .collect::<std::collections::HashSet<_>>()
9507                .into_iter()
9508                .collect();
9509
9510            if let Some(pool) =
9511                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9512            {
9513                // Generate sweeps - build participant balances from last cash position per account
9514                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9515                for cp in &snapshot.cash_positions {
9516                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9517                }
9518
9519                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9520                    latest_balances
9521                        .into_iter()
9522                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9523                        .map(
9524                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9525                                account_id: id,
9526                                balance,
9527                            },
9528                        )
9529                        .collect();
9530
9531                let sweeps =
9532                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9533                snapshot.cash_pool_sweeps = sweeps;
9534                snapshot.cash_pools.push(pool);
9535            }
9536        }
9537
9538        // Generate bank guarantees
9539        if self.config.treasury.bank_guarantees.enabled {
9540            let vendor_names: Vec<String> = self
9541                .master_data
9542                .vendors
9543                .iter()
9544                .map(|v| v.name.clone())
9545                .collect();
9546            if !vendor_names.is_empty() {
9547                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9548                    self.config.treasury.bank_guarantees.clone(),
9549                    seed + 96,
9550                );
9551                snapshot.bank_guarantees =
9552                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9553            }
9554        }
9555
9556        // Generate netting runs from intercompany matched pairs
9557        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9558            let entity_ids: Vec<String> = self
9559                .config
9560                .companies
9561                .iter()
9562                .map(|c| c.code.clone())
9563                .collect();
9564            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9565                .matched_pairs
9566                .iter()
9567                .map(|mp| {
9568                    (
9569                        mp.seller_company.clone(),
9570                        mp.buyer_company.clone(),
9571                        mp.amount,
9572                    )
9573                })
9574                .collect();
9575            if entity_ids.len() >= 2 {
9576                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9577                    self.config.treasury.netting.clone(),
9578                    seed + 97,
9579                );
9580                snapshot.netting_runs = netting_gen.generate(
9581                    &entity_ids,
9582                    currency,
9583                    start_date,
9584                    self.config.global.period_months,
9585                    &ic_amounts,
9586                );
9587            }
9588        }
9589
9590        // Generate treasury journal entries from the instruments we just created.
9591        {
9592            use datasynth_generators::treasury::TreasuryAccounting;
9593
9594            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9595            let mut treasury_jes = Vec::new();
9596
9597            // Debt interest accrual JEs
9598            if !snapshot.debt_instruments.is_empty() {
9599                let debt_jes =
9600                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9601                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9602                treasury_jes.extend(debt_jes);
9603            }
9604
9605            // Hedge mark-to-market JEs
9606            if !snapshot.hedging_instruments.is_empty() {
9607                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9608                    &snapshot.hedging_instruments,
9609                    &snapshot.hedge_relationships,
9610                    end_date,
9611                    entity_id,
9612                );
9613                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9614                treasury_jes.extend(hedge_jes);
9615            }
9616
9617            // Cash pool sweep JEs
9618            if !snapshot.cash_pool_sweeps.is_empty() {
9619                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9620                    &snapshot.cash_pool_sweeps,
9621                    entity_id,
9622                );
9623                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9624                treasury_jes.extend(sweep_jes);
9625            }
9626
9627            if !treasury_jes.is_empty() {
9628                debug!("Total treasury journal entries: {}", treasury_jes.len());
9629            }
9630            snapshot.journal_entries = treasury_jes;
9631        }
9632
9633        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9634        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9635        stats.cash_position_count = snapshot.cash_positions.len();
9636        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9637        stats.cash_pool_count = snapshot.cash_pools.len();
9638
9639        info!(
9640            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9641            snapshot.debt_instruments.len(),
9642            snapshot.hedging_instruments.len(),
9643            snapshot.cash_positions.len(),
9644            snapshot.cash_forecasts.len(),
9645            snapshot.cash_pools.len(),
9646            snapshot.bank_guarantees.len(),
9647            snapshot.netting_runs.len(),
9648            snapshot.journal_entries.len(),
9649        );
9650        self.check_resources_with_log("post-treasury")?;
9651
9652        Ok(snapshot)
9653    }
9654
9655    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9656    fn phase_project_accounting(
9657        &mut self,
9658        document_flows: &DocumentFlowSnapshot,
9659        hr: &HrSnapshot,
9660        stats: &mut EnhancedGenerationStatistics,
9661    ) -> SynthResult<ProjectAccountingSnapshot> {
9662        if !self.phase_config.generate_project_accounting {
9663            debug!("Phase 23: Skipped (project accounting disabled)");
9664            return Ok(ProjectAccountingSnapshot::default());
9665        }
9666        let degradation = self.check_resources()?;
9667        if degradation >= DegradationLevel::Reduced {
9668            debug!(
9669                "Phase skipped due to resource pressure (degradation: {:?})",
9670                degradation
9671            );
9672            return Ok(ProjectAccountingSnapshot::default());
9673        }
9674        info!("Phase 23: Generating Project Accounting Data");
9675
9676        let seed = self.seed;
9677        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9678            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9679        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9680        let company_code = self
9681            .config
9682            .companies
9683            .first()
9684            .map(|c| c.code.as_str())
9685            .unwrap_or("1000");
9686
9687        let mut snapshot = ProjectAccountingSnapshot::default();
9688
9689        // Generate projects with WBS hierarchies
9690        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9691            self.config.project_accounting.clone(),
9692            seed + 95,
9693        );
9694        let pool = project_gen.generate(company_code, start_date, end_date);
9695        snapshot.projects = pool.projects.clone();
9696
9697        // Link source documents to projects for cost allocation
9698        {
9699            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9700                Vec::new();
9701
9702            // Time entries
9703            for te in &hr.time_entries {
9704                let total_hours = te.hours_regular + te.hours_overtime;
9705                if total_hours > 0.0 {
9706                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9707                        id: te.entry_id.clone(),
9708                        entity_id: company_code.to_string(),
9709                        date: te.date,
9710                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9711                            .unwrap_or(rust_decimal::Decimal::ZERO),
9712                        source_type: CostSourceType::TimeEntry,
9713                        hours: Some(
9714                            rust_decimal::Decimal::from_f64_retain(total_hours)
9715                                .unwrap_or(rust_decimal::Decimal::ZERO),
9716                        ),
9717                    });
9718                }
9719            }
9720
9721            // Expense reports
9722            for er in &hr.expense_reports {
9723                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9724                    id: er.report_id.clone(),
9725                    entity_id: company_code.to_string(),
9726                    date: er.submission_date,
9727                    amount: er.total_amount,
9728                    source_type: CostSourceType::ExpenseReport,
9729                    hours: None,
9730                });
9731            }
9732
9733            // Purchase orders
9734            for po in &document_flows.purchase_orders {
9735                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9736                    id: po.header.document_id.clone(),
9737                    entity_id: company_code.to_string(),
9738                    date: po.header.document_date,
9739                    amount: po.total_net_amount,
9740                    source_type: CostSourceType::PurchaseOrder,
9741                    hours: None,
9742                });
9743            }
9744
9745            // Vendor invoices
9746            for vi in &document_flows.vendor_invoices {
9747                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9748                    id: vi.header.document_id.clone(),
9749                    entity_id: company_code.to_string(),
9750                    date: vi.header.document_date,
9751                    amount: vi.payable_amount,
9752                    source_type: CostSourceType::VendorInvoice,
9753                    hours: None,
9754                });
9755            }
9756
9757            if !source_docs.is_empty() && !pool.projects.is_empty() {
9758                let mut cost_gen =
9759                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9760                        self.config.project_accounting.cost_allocation.clone(),
9761                        seed + 99,
9762                    );
9763                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9764            }
9765        }
9766
9767        // Generate change orders
9768        if self.config.project_accounting.change_orders.enabled {
9769            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9770                self.config.project_accounting.change_orders.clone(),
9771                seed + 96,
9772            );
9773            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9774        }
9775
9776        // Generate milestones
9777        if self.config.project_accounting.milestones.enabled {
9778            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9779                self.config.project_accounting.milestones.clone(),
9780                seed + 97,
9781            );
9782            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9783        }
9784
9785        // Generate earned value metrics (needs cost lines, so only if we have projects)
9786        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9787            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9788                self.config.project_accounting.earned_value.clone(),
9789                seed + 98,
9790            );
9791            snapshot.earned_value_metrics =
9792                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9793        }
9794
9795        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9796        if self.config.project_accounting.revenue_recognition.enabled
9797            && !snapshot.projects.is_empty()
9798            && !snapshot.cost_lines.is_empty()
9799        {
9800            use datasynth_generators::project_accounting::RevenueGenerator;
9801            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9802            let avg_contract_value =
9803                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9804                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9805
9806            // Build contract value tuples: only customer-type projects get revenue recognition.
9807            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9808            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9809                snapshot
9810                    .projects
9811                    .iter()
9812                    .filter(|p| {
9813                        matches!(
9814                            p.project_type,
9815                            datasynth_core::models::ProjectType::Customer
9816                        )
9817                    })
9818                    .map(|p| {
9819                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9820                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9821                        // budget × 1.25 → contract value
9822                        } else {
9823                            avg_contract_value
9824                        };
9825                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9826                        (p.project_id.clone(), cv, etc)
9827                    })
9828                    .collect();
9829
9830            if !contract_values.is_empty() {
9831                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9832                snapshot.revenue_records = rev_gen.generate(
9833                    &snapshot.projects,
9834                    &snapshot.cost_lines,
9835                    &contract_values,
9836                    start_date,
9837                    end_date,
9838                );
9839                debug!(
9840                    "Generated {} revenue recognition records for {} customer projects",
9841                    snapshot.revenue_records.len(),
9842                    contract_values.len()
9843                );
9844            }
9845        }
9846
9847        stats.project_count = snapshot.projects.len();
9848        stats.project_change_order_count = snapshot.change_orders.len();
9849        stats.project_cost_line_count = snapshot.cost_lines.len();
9850
9851        info!(
9852            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9853            snapshot.projects.len(),
9854            snapshot.change_orders.len(),
9855            snapshot.milestones.len(),
9856            snapshot.earned_value_metrics.len()
9857        );
9858        self.check_resources_with_log("post-project-accounting")?;
9859
9860        Ok(snapshot)
9861    }
9862
9863    /// Phase 24: Generate process evolution and organizational events.
9864    fn phase_evolution_events(
9865        &mut self,
9866        stats: &mut EnhancedGenerationStatistics,
9867    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9868        if !self.phase_config.generate_evolution_events {
9869            debug!("Phase 24: Skipped (evolution events disabled)");
9870            return Ok((Vec::new(), Vec::new()));
9871        }
9872        info!("Phase 24: Generating Process Evolution + Organizational Events");
9873
9874        let seed = self.seed;
9875        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9876            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9877        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9878
9879        // Process evolution events
9880        let mut proc_gen =
9881            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9882                seed + 100,
9883            );
9884        let process_events = proc_gen.generate_events(start_date, end_date);
9885
9886        // Organizational events
9887        let company_codes: Vec<String> = self
9888            .config
9889            .companies
9890            .iter()
9891            .map(|c| c.code.clone())
9892            .collect();
9893        let mut org_gen =
9894            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9895                seed + 101,
9896            );
9897        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9898
9899        stats.process_evolution_event_count = process_events.len();
9900        stats.organizational_event_count = org_events.len();
9901
9902        info!(
9903            "Evolution events generated: {} process evolution, {} organizational",
9904            process_events.len(),
9905            org_events.len()
9906        );
9907        self.check_resources_with_log("post-evolution-events")?;
9908
9909        Ok((process_events, org_events))
9910    }
9911
9912    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9913    /// data recovery, and regulatory changes).
9914    fn phase_disruption_events(
9915        &self,
9916        stats: &mut EnhancedGenerationStatistics,
9917    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9918        if !self.config.organizational_events.enabled {
9919            debug!("Phase 24b: Skipped (organizational events disabled)");
9920            return Ok(Vec::new());
9921        }
9922        info!("Phase 24b: Generating Disruption Events");
9923
9924        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9925            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9926        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9927
9928        let company_codes: Vec<String> = self
9929            .config
9930            .companies
9931            .iter()
9932            .map(|c| c.code.clone())
9933            .collect();
9934
9935        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9936        let events = gen.generate(start_date, end_date, &company_codes);
9937
9938        stats.disruption_event_count = events.len();
9939        info!("Disruption events generated: {} events", events.len());
9940        self.check_resources_with_log("post-disruption-events")?;
9941
9942        Ok(events)
9943    }
9944
9945    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9946    ///
9947    /// Produces paired examples where each pair contains the original clean JE
9948    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9949    /// split transaction). Useful for training anomaly detection models with
9950    /// known ground truth.
9951    fn phase_counterfactuals(
9952        &self,
9953        journal_entries: &[JournalEntry],
9954        stats: &mut EnhancedGenerationStatistics,
9955    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9956        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9957            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9958            return Ok(Vec::new());
9959        }
9960        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9961
9962        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9963
9964        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9965
9966        // Rotating set of specs to produce diverse mutation types
9967        let specs = [
9968            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9969            CounterfactualSpec::ShiftDate { days: -14 },
9970            CounterfactualSpec::SelfApprove,
9971            CounterfactualSpec::SplitTransaction { split_count: 3 },
9972        ];
9973
9974        let pairs: Vec<_> = journal_entries
9975            .iter()
9976            .enumerate()
9977            .map(|(i, je)| {
9978                let spec = &specs[i % specs.len()];
9979                gen.generate(je, spec)
9980            })
9981            .collect();
9982
9983        stats.counterfactual_pair_count = pairs.len();
9984        info!(
9985            "Counterfactual pairs generated: {} pairs from {} journal entries",
9986            pairs.len(),
9987            journal_entries.len()
9988        );
9989        self.check_resources_with_log("post-counterfactuals")?;
9990
9991        Ok(pairs)
9992    }
9993
9994    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9995    ///
9996    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9997    /// fraudulent, then generates probabilistic red flags on all chain documents.
9998    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9999    /// to produce realistic ML training data.
10000    fn phase_red_flags(
10001        &self,
10002        anomaly_labels: &AnomalyLabels,
10003        document_flows: &DocumentFlowSnapshot,
10004        stats: &mut EnhancedGenerationStatistics,
10005    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10006        if !self.config.fraud.enabled {
10007            debug!("Phase 26: Skipped (fraud generation disabled)");
10008            return Ok(Vec::new());
10009        }
10010        info!("Phase 26: Generating Fraud Red-Flag Indicators");
10011
10012        use datasynth_generators::fraud::RedFlagGenerator;
10013
10014        let generator = RedFlagGenerator::new();
10015        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10016
10017        // Build a set of document IDs that are known-fraudulent from anomaly labels.
10018        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10019            .labels
10020            .iter()
10021            .filter(|label| label.anomaly_type.is_intentional())
10022            .map(|label| label.document_id.as_str())
10023            .collect();
10024
10025        let mut flags = Vec::new();
10026
10027        // Iterate P2P chains: use the purchase order document ID as the chain key.
10028        for chain in &document_flows.p2p_chains {
10029            let doc_id = &chain.purchase_order.header.document_id;
10030            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10031            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10032        }
10033
10034        // Iterate O2C chains: use the sales order document ID as the chain key.
10035        for chain in &document_flows.o2c_chains {
10036            let doc_id = &chain.sales_order.header.document_id;
10037            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10038            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10039        }
10040
10041        stats.red_flag_count = flags.len();
10042        info!(
10043            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10044            flags.len(),
10045            document_flows.p2p_chains.len(),
10046            document_flows.o2c_chains.len(),
10047            fraud_doc_ids.len()
10048        );
10049        self.check_resources_with_log("post-red-flags")?;
10050
10051        Ok(flags)
10052    }
10053
10054    /// Phase 26b: Generate collusion rings from employee/vendor pools.
10055    ///
10056    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
10057    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
10058    /// advance them over the simulation period.
10059    fn phase_collusion_rings(
10060        &mut self,
10061        stats: &mut EnhancedGenerationStatistics,
10062    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10063        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10064            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10065            return Ok(Vec::new());
10066        }
10067        info!("Phase 26b: Generating Collusion Rings");
10068
10069        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10070            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10071        let months = self.config.global.period_months;
10072
10073        let employee_ids: Vec<String> = self
10074            .master_data
10075            .employees
10076            .iter()
10077            .map(|e| e.employee_id.clone())
10078            .collect();
10079        let vendor_ids: Vec<String> = self
10080            .master_data
10081            .vendors
10082            .iter()
10083            .map(|v| v.vendor_id.clone())
10084            .collect();
10085
10086        let mut generator =
10087            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10088        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10089
10090        stats.collusion_ring_count = rings.len();
10091        info!(
10092            "Collusion rings generated: {} rings, total members: {}",
10093            rings.len(),
10094            rings
10095                .iter()
10096                .map(datasynth_generators::fraud::CollusionRing::size)
10097                .sum::<usize>()
10098        );
10099        self.check_resources_with_log("post-collusion-rings")?;
10100
10101        Ok(rings)
10102    }
10103
10104    /// Phase 27: Generate bi-temporal version chains for vendor entities.
10105    ///
10106    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
10107    /// master data changes over time, supporting bi-temporal audit queries.
10108    fn phase_temporal_attributes(
10109        &mut self,
10110        stats: &mut EnhancedGenerationStatistics,
10111    ) -> SynthResult<
10112        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10113    > {
10114        if !self.config.temporal_attributes.enabled {
10115            debug!("Phase 27: Skipped (temporal attributes disabled)");
10116            return Ok(Vec::new());
10117        }
10118        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10119
10120        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10121            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10122
10123        // Build a TemporalAttributeConfig from the user's config.
10124        // Since Phase 27 is already gated on temporal_attributes.enabled,
10125        // default to enabling version chains so users get actual mutations.
10126        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10127            || self.config.temporal_attributes.enabled;
10128        let temporal_config = {
10129            let ta = &self.config.temporal_attributes;
10130            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10131                .enabled(ta.enabled)
10132                .closed_probability(ta.valid_time.closed_probability)
10133                .avg_validity_days(ta.valid_time.avg_validity_days)
10134                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10135                .with_version_chains(if generate_version_chains {
10136                    ta.avg_versions_per_entity
10137                } else {
10138                    1.0
10139                })
10140                .build()
10141        };
10142        // Apply backdating settings if configured
10143        let temporal_config = if self
10144            .config
10145            .temporal_attributes
10146            .transaction_time
10147            .allow_backdating
10148        {
10149            let mut c = temporal_config;
10150            c.transaction_time.allow_backdating = true;
10151            c.transaction_time.backdating_probability = self
10152                .config
10153                .temporal_attributes
10154                .transaction_time
10155                .backdating_probability;
10156            c.transaction_time.max_backdate_days = self
10157                .config
10158                .temporal_attributes
10159                .transaction_time
10160                .max_backdate_days;
10161            c
10162        } else {
10163            temporal_config
10164        };
10165        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10166            temporal_config,
10167            self.seed + 130,
10168            start_date,
10169        );
10170
10171        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10172            self.seed + 130,
10173            datasynth_core::GeneratorType::Vendor,
10174        );
10175
10176        let chains: Vec<_> = self
10177            .master_data
10178            .vendors
10179            .iter()
10180            .map(|vendor| {
10181                let id = uuid_factory.next();
10182                gen.generate_version_chain(vendor.clone(), id)
10183            })
10184            .collect();
10185
10186        stats.temporal_version_chain_count = chains.len();
10187        info!("Temporal version chains generated: {} chains", chains.len());
10188        self.check_resources_with_log("post-temporal-attributes")?;
10189
10190        Ok(chains)
10191    }
10192
10193    /// Phase 28: Build entity relationship graph and cross-process links.
10194    ///
10195    /// Part 1 (gated on `relationship_strength.enabled`): builds an
10196    /// `EntityGraph` from master-data vendor/customer entities and
10197    /// journal-entry-derived transaction summaries.
10198    ///
10199    /// Part 2 (gated on `cross_process_links.enabled`): extracts
10200    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
10201    /// generates inventory-movement cross-process links.
10202    fn phase_entity_relationships(
10203        &self,
10204        journal_entries: &[JournalEntry],
10205        document_flows: &DocumentFlowSnapshot,
10206        stats: &mut EnhancedGenerationStatistics,
10207    ) -> SynthResult<(
10208        Option<datasynth_core::models::EntityGraph>,
10209        Vec<datasynth_core::models::CrossProcessLink>,
10210    )> {
10211        use datasynth_generators::relationships::{
10212            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10213            TransactionSummary,
10214        };
10215
10216        let rs_enabled = self.config.relationship_strength.enabled;
10217        let cpl_enabled = self.config.cross_process_links.enabled
10218            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10219
10220        if !rs_enabled && !cpl_enabled {
10221            debug!(
10222                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10223            );
10224            return Ok((None, Vec::new()));
10225        }
10226
10227        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10228
10229        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10230            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10231
10232        let company_code = self
10233            .config
10234            .companies
10235            .first()
10236            .map(|c| c.code.as_str())
10237            .unwrap_or("1000");
10238
10239        // Build the generator with matching config flags
10240        let gen_config = EntityGraphConfig {
10241            enabled: rs_enabled,
10242            cross_process: datasynth_generators::relationships::CrossProcessConfig {
10243                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10244                enable_return_flows: false,
10245                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10246                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10247                // Use higher link rate for small datasets to avoid probabilistic empty results
10248                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10249                    1.0
10250                } else {
10251                    0.30
10252                },
10253                ..Default::default()
10254            },
10255            strength_config: datasynth_generators::relationships::StrengthConfig {
10256                transaction_volume_weight: self
10257                    .config
10258                    .relationship_strength
10259                    .calculation
10260                    .transaction_volume_weight,
10261                transaction_count_weight: self
10262                    .config
10263                    .relationship_strength
10264                    .calculation
10265                    .transaction_count_weight,
10266                duration_weight: self
10267                    .config
10268                    .relationship_strength
10269                    .calculation
10270                    .relationship_duration_weight,
10271                recency_weight: self.config.relationship_strength.calculation.recency_weight,
10272                mutual_connections_weight: self
10273                    .config
10274                    .relationship_strength
10275                    .calculation
10276                    .mutual_connections_weight,
10277                recency_half_life_days: self
10278                    .config
10279                    .relationship_strength
10280                    .calculation
10281                    .recency_half_life_days,
10282            },
10283            ..Default::default()
10284        };
10285
10286        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10287
10288        // --- Part 1: Entity Relationship Graph ---
10289        let entity_graph = if rs_enabled {
10290            // Build EntitySummary lists from master data
10291            let vendor_summaries: Vec<EntitySummary> = self
10292                .master_data
10293                .vendors
10294                .iter()
10295                .map(|v| {
10296                    EntitySummary::new(
10297                        &v.vendor_id,
10298                        &v.name,
10299                        datasynth_core::models::GraphEntityType::Vendor,
10300                        start_date,
10301                    )
10302                })
10303                .collect();
10304
10305            let customer_summaries: Vec<EntitySummary> = self
10306                .master_data
10307                .customers
10308                .iter()
10309                .map(|c| {
10310                    EntitySummary::new(
10311                        &c.customer_id,
10312                        &c.name,
10313                        datasynth_core::models::GraphEntityType::Customer,
10314                        start_date,
10315                    )
10316                })
10317                .collect();
10318
10319            // Build transaction summaries from journal entries.
10320            // Key = (company_code, trading_partner) for entries that have a
10321            // trading partner.  This captures intercompany flows and any JE
10322            // whose line items carry a trading_partner reference.
10323            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10324                std::collections::HashMap::new();
10325
10326            for je in journal_entries {
10327                let cc = je.header.company_code.clone();
10328                let posting_date = je.header.posting_date;
10329                for line in &je.lines {
10330                    if let Some(ref tp) = line.trading_partner {
10331                        let amount = if line.debit_amount > line.credit_amount {
10332                            line.debit_amount
10333                        } else {
10334                            line.credit_amount
10335                        };
10336                        let entry = txn_summaries
10337                            .entry((cc.clone(), tp.clone()))
10338                            .or_insert_with(|| TransactionSummary {
10339                                total_volume: rust_decimal::Decimal::ZERO,
10340                                transaction_count: 0,
10341                                first_transaction_date: posting_date,
10342                                last_transaction_date: posting_date,
10343                                related_entities: std::collections::HashSet::new(),
10344                            });
10345                        entry.total_volume += amount;
10346                        entry.transaction_count += 1;
10347                        if posting_date < entry.first_transaction_date {
10348                            entry.first_transaction_date = posting_date;
10349                        }
10350                        if posting_date > entry.last_transaction_date {
10351                            entry.last_transaction_date = posting_date;
10352                        }
10353                        entry.related_entities.insert(cc.clone());
10354                    }
10355                }
10356            }
10357
10358            // Also extract transaction relationships from document flow chains.
10359            // P2P chains: Company → Vendor relationships
10360            for chain in &document_flows.p2p_chains {
10361                let cc = chain.purchase_order.header.company_code.clone();
10362                let vendor_id = chain.purchase_order.vendor_id.clone();
10363                let po_date = chain.purchase_order.header.document_date;
10364                let amount = chain.purchase_order.total_net_amount;
10365
10366                let entry = txn_summaries
10367                    .entry((cc.clone(), vendor_id))
10368                    .or_insert_with(|| TransactionSummary {
10369                        total_volume: rust_decimal::Decimal::ZERO,
10370                        transaction_count: 0,
10371                        first_transaction_date: po_date,
10372                        last_transaction_date: po_date,
10373                        related_entities: std::collections::HashSet::new(),
10374                    });
10375                entry.total_volume += amount;
10376                entry.transaction_count += 1;
10377                if po_date < entry.first_transaction_date {
10378                    entry.first_transaction_date = po_date;
10379                }
10380                if po_date > entry.last_transaction_date {
10381                    entry.last_transaction_date = po_date;
10382                }
10383                entry.related_entities.insert(cc);
10384            }
10385
10386            // O2C chains: Company → Customer relationships
10387            for chain in &document_flows.o2c_chains {
10388                let cc = chain.sales_order.header.company_code.clone();
10389                let customer_id = chain.sales_order.customer_id.clone();
10390                let so_date = chain.sales_order.header.document_date;
10391                let amount = chain.sales_order.total_net_amount;
10392
10393                let entry = txn_summaries
10394                    .entry((cc.clone(), customer_id))
10395                    .or_insert_with(|| TransactionSummary {
10396                        total_volume: rust_decimal::Decimal::ZERO,
10397                        transaction_count: 0,
10398                        first_transaction_date: so_date,
10399                        last_transaction_date: so_date,
10400                        related_entities: std::collections::HashSet::new(),
10401                    });
10402                entry.total_volume += amount;
10403                entry.transaction_count += 1;
10404                if so_date < entry.first_transaction_date {
10405                    entry.first_transaction_date = so_date;
10406                }
10407                if so_date > entry.last_transaction_date {
10408                    entry.last_transaction_date = so_date;
10409                }
10410                entry.related_entities.insert(cc);
10411            }
10412
10413            let as_of_date = journal_entries
10414                .last()
10415                .map(|je| je.header.posting_date)
10416                .unwrap_or(start_date);
10417
10418            let graph = gen.generate_entity_graph(
10419                company_code,
10420                as_of_date,
10421                &vendor_summaries,
10422                &customer_summaries,
10423                &txn_summaries,
10424            );
10425
10426            info!(
10427                "Entity relationship graph: {} nodes, {} edges",
10428                graph.nodes.len(),
10429                graph.edges.len()
10430            );
10431            stats.entity_relationship_node_count = graph.nodes.len();
10432            stats.entity_relationship_edge_count = graph.edges.len();
10433            Some(graph)
10434        } else {
10435            None
10436        };
10437
10438        // --- Part 2: Cross-Process Links ---
10439        let cross_process_links = if cpl_enabled {
10440            // Build GoodsReceiptRef from P2P chains
10441            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10442                .p2p_chains
10443                .iter()
10444                .flat_map(|chain| {
10445                    let vendor_id = chain.purchase_order.vendor_id.clone();
10446                    let cc = chain.purchase_order.header.company_code.clone();
10447                    chain.goods_receipts.iter().flat_map(move |gr| {
10448                        gr.items.iter().filter_map({
10449                            let doc_id = gr.header.document_id.clone();
10450                            let v_id = vendor_id.clone();
10451                            let company = cc.clone();
10452                            let receipt_date = gr.header.document_date;
10453                            move |item| {
10454                                item.base
10455                                    .material_id
10456                                    .as_ref()
10457                                    .map(|mat_id| GoodsReceiptRef {
10458                                        document_id: doc_id.clone(),
10459                                        material_id: mat_id.clone(),
10460                                        quantity: item.base.quantity,
10461                                        receipt_date,
10462                                        vendor_id: v_id.clone(),
10463                                        company_code: company.clone(),
10464                                    })
10465                            }
10466                        })
10467                    })
10468                })
10469                .collect();
10470
10471            // Build DeliveryRef from O2C chains
10472            let del_refs: Vec<DeliveryRef> = document_flows
10473                .o2c_chains
10474                .iter()
10475                .flat_map(|chain| {
10476                    let customer_id = chain.sales_order.customer_id.clone();
10477                    let cc = chain.sales_order.header.company_code.clone();
10478                    chain.deliveries.iter().flat_map(move |del| {
10479                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10480                        del.items.iter().filter_map({
10481                            let doc_id = del.header.document_id.clone();
10482                            let c_id = customer_id.clone();
10483                            let company = cc.clone();
10484                            move |item| {
10485                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10486                                    document_id: doc_id.clone(),
10487                                    material_id: mat_id.clone(),
10488                                    quantity: item.base.quantity,
10489                                    delivery_date,
10490                                    customer_id: c_id.clone(),
10491                                    company_code: company.clone(),
10492                                })
10493                            }
10494                        })
10495                    })
10496                })
10497                .collect();
10498
10499            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10500            info!("Cross-process links generated: {} links", links.len());
10501            stats.cross_process_link_count = links.len();
10502            links
10503        } else {
10504            Vec::new()
10505        };
10506
10507        self.check_resources_with_log("post-entity-relationships")?;
10508        Ok((entity_graph, cross_process_links))
10509    }
10510
10511    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10512    fn phase_industry_data(
10513        &self,
10514        stats: &mut EnhancedGenerationStatistics,
10515    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10516        if !self.config.industry_specific.enabled {
10517            return None;
10518        }
10519        info!("Phase 29: Generating industry-specific data");
10520        let output = datasynth_generators::industry::factory::generate_industry_output(
10521            self.config.global.industry,
10522        );
10523        stats.industry_gl_account_count = output.gl_accounts.len();
10524        info!(
10525            "Industry data generated: {} GL accounts for {:?}",
10526            output.gl_accounts.len(),
10527            self.config.global.industry
10528        );
10529        Some(output)
10530    }
10531
10532    /// Phase 3b: Generate opening balances for each company.
10533    ///
10534    /// # Order of precedence
10535    ///
10536    /// 1. **v5.3 chain carryover** (ShardContext.opening_balances non-empty):
10537    ///    convert each EntityOpeningBalance into a
10538    ///    GeneratedOpeningBalance per company. This branch runs
10539    ///    UNCONDITIONALLY — even when `balance.generate_opening_balances`
10540    ///    is `false` — so a non-overlay preset that gets driven through
10541    ///    `group generate-chain` still applies the prior-year carry-
10542    ///    forward instead of silently dropping it.
10543    /// 2. **`generate_opening_balances` flag**: if off (and no carryover),
10544    ///    return empty Vec.
10545    /// 3. **OpeningBalanceGenerator**: industry-mix sampler for the
10546    ///    period-0 engagement.
10547    fn phase_opening_balances(
10548        &mut self,
10549        coa: &Arc<ChartOfAccounts>,
10550        stats: &mut EnhancedGenerationStatistics,
10551    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10552        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10553            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10554        let fiscal_year = start_date.year();
10555
10556        // 1. v5.3 chain carryover — runs unconditionally when present.
10557        if let Some(ctx) = &self.shard_context {
10558            if !ctx.opening_balances.is_empty() {
10559                info!(
10560                    "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10561                    ctx.opening_balances.len(),
10562                    self.config.companies.len(),
10563                );
10564                let mut results = Vec::new();
10565                for company in &self.config.companies {
10566                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10567                        .opening_balances
10568                        .iter()
10569                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10570                        .collect();
10571                    let total_assets = ctx
10572                        .opening_balances
10573                        .iter()
10574                        .filter(|ob| {
10575                            matches!(
10576                                ob.account_type,
10577                                AccountType::Asset | AccountType::ContraAsset
10578                            )
10579                        })
10580                        .map(|ob| ob.net_balance())
10581                        .sum::<rust_decimal::Decimal>();
10582                    let total_liabilities = ctx
10583                        .opening_balances
10584                        .iter()
10585                        .filter(|ob| {
10586                            matches!(
10587                                ob.account_type,
10588                                AccountType::Liability | AccountType::ContraLiability
10589                            )
10590                        })
10591                        .map(|ob| ob.net_balance())
10592                        .sum::<rust_decimal::Decimal>();
10593                    let total_equity = ctx
10594                        .opening_balances
10595                        .iter()
10596                        .filter(|ob| {
10597                            matches!(
10598                                ob.account_type,
10599                                AccountType::Equity | AccountType::ContraEquity
10600                            )
10601                        })
10602                        .map(|ob| ob.net_balance())
10603                        .sum::<rust_decimal::Decimal>();
10604                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10605                        < rust_decimal::Decimal::ONE;
10606                    results.push(GeneratedOpeningBalance {
10607                        company_code: company.code.clone(),
10608                        as_of_date: start_date,
10609                        balances,
10610                        total_assets,
10611                        total_liabilities,
10612                        total_equity,
10613                        is_balanced,
10614                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10615                            current_ratio: None,
10616                            quick_ratio: None,
10617                            debt_to_equity: None,
10618                            working_capital: rust_decimal::Decimal::ZERO,
10619                        },
10620                    });
10621                }
10622                stats.opening_balance_count = results.len();
10623                self.check_resources_with_log("post-opening-balances")?;
10624                return Ok(results);
10625            }
10626        }
10627
10628        // 2. Generator path is opt-in via the config flag.
10629        if !self.config.balance.generate_opening_balances {
10630            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10631            return Ok(Vec::new());
10632        }
10633        info!("Phase 3b: Generating Opening Balances");
10634
10635        // 3. OpeningBalanceGenerator — industry-mix sampler for period 0.
10636        let industry = match self.config.global.industry {
10637            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10638            IndustrySector::Retail => IndustryType::Retail,
10639            IndustrySector::FinancialServices => IndustryType::Financial,
10640            IndustrySector::Healthcare => IndustryType::Healthcare,
10641            IndustrySector::Technology => IndustryType::Technology,
10642            _ => IndustryType::Manufacturing,
10643        };
10644
10645        let config = datasynth_generators::OpeningBalanceConfig {
10646            industry,
10647            ..Default::default()
10648        };
10649        let mut gen =
10650            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10651
10652        let mut results = Vec::new();
10653        for company in &self.config.companies {
10654            let spec = OpeningBalanceSpec::new(
10655                company.code.clone(),
10656                start_date,
10657                fiscal_year,
10658                company.currency.clone(),
10659                rust_decimal::Decimal::new(10_000_000, 0),
10660                industry,
10661            );
10662            let ob = gen.generate(&spec, coa, start_date, &company.code);
10663            results.push(ob);
10664        }
10665
10666        stats.opening_balance_count = results.len();
10667        info!("Opening balances generated: {} companies", results.len());
10668        self.check_resources_with_log("post-opening-balances")?;
10669
10670        Ok(results)
10671    }
10672
10673    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10674    fn phase_subledger_reconciliation(
10675        &mut self,
10676        subledger: &SubledgerSnapshot,
10677        entries: &[JournalEntry],
10678        stats: &mut EnhancedGenerationStatistics,
10679    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10680        if !self.config.balance.reconcile_subledgers {
10681            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10682            return Ok(Vec::new());
10683        }
10684        info!("Phase 9b: Reconciling GL to subledger balances");
10685
10686        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10687            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10688            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10689
10690        // Build GL balance map from journal entries using a balance tracker
10691        let tracker_config = BalanceTrackerConfig {
10692            validate_on_each_entry: false,
10693            track_history: false,
10694            fail_on_validation_error: false,
10695            ..Default::default()
10696        };
10697        let recon_currency = self
10698            .config
10699            .companies
10700            .first()
10701            .map(|c| c.currency.clone())
10702            .unwrap_or_else(|| "USD".to_string());
10703        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10704        let validation_errors = tracker.apply_entries(entries);
10705        if !validation_errors.is_empty() {
10706            warn!(
10707                error_count = validation_errors.len(),
10708                "Balance tracker encountered validation errors during subledger reconciliation"
10709            );
10710            for err in &validation_errors {
10711                debug!("Balance validation error: {:?}", err);
10712            }
10713        }
10714
10715        let mut engine = datasynth_generators::ReconciliationEngine::new(
10716            datasynth_generators::ReconciliationConfig::default(),
10717        );
10718
10719        let mut results = Vec::new();
10720        let company_code = self
10721            .config
10722            .companies
10723            .first()
10724            .map(|c| c.code.as_str())
10725            .unwrap_or("1000");
10726
10727        // Reconcile AR
10728        if !subledger.ar_invoices.is_empty() {
10729            let gl_balance = tracker
10730                .get_account_balance(
10731                    company_code,
10732                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10733                )
10734                .map(|b| b.closing_balance)
10735                .unwrap_or_default();
10736            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10737            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10738        }
10739
10740        // Reconcile AP
10741        if !subledger.ap_invoices.is_empty() {
10742            let gl_balance = tracker
10743                .get_account_balance(
10744                    company_code,
10745                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10746                )
10747                .map(|b| b.closing_balance)
10748                .unwrap_or_default();
10749            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10750            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10751        }
10752
10753        // Reconcile FA
10754        if !subledger.fa_records.is_empty() {
10755            let gl_asset_balance = tracker
10756                .get_account_balance(
10757                    company_code,
10758                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10759                )
10760                .map(|b| b.closing_balance)
10761                .unwrap_or_default();
10762            let gl_accum_depr_balance = tracker
10763                .get_account_balance(
10764                    company_code,
10765                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10766                )
10767                .map(|b| b.closing_balance)
10768                .unwrap_or_default();
10769            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10770                subledger.fa_records.iter().collect();
10771            let (asset_recon, depr_recon) = engine.reconcile_fa(
10772                company_code,
10773                end_date,
10774                gl_asset_balance,
10775                gl_accum_depr_balance,
10776                &fa_refs,
10777            );
10778            results.push(asset_recon);
10779            results.push(depr_recon);
10780        }
10781
10782        // Reconcile Inventory
10783        if !subledger.inventory_positions.is_empty() {
10784            let gl_balance = tracker
10785                .get_account_balance(
10786                    company_code,
10787                    datasynth_core::accounts::control_accounts::INVENTORY,
10788                )
10789                .map(|b| b.closing_balance)
10790                .unwrap_or_default();
10791            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10792                subledger.inventory_positions.iter().collect();
10793            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10794        }
10795
10796        stats.subledger_reconciliation_count = results.len();
10797        let passed = results.iter().filter(|r| r.is_balanced()).count();
10798        let failed = results.len() - passed;
10799        info!(
10800            "Subledger reconciliation: {} checks, {} passed, {} failed",
10801            results.len(),
10802            passed,
10803            failed
10804        );
10805        self.check_resources_with_log("post-subledger-reconciliation")?;
10806
10807        Ok(results)
10808    }
10809
10810    /// Generate the chart of accounts.
10811    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10812        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10813
10814        let coa_framework = self.resolve_coa_framework();
10815
10816        let mut gen = ChartOfAccountsGenerator::new(
10817            self.config.chart_of_accounts.complexity,
10818            self.config.global.industry,
10819            self.seed,
10820        )
10821        .with_coa_framework(coa_framework)
10822        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10823        .with_expand_industry_subaccounts(
10824            self.config.chart_of_accounts.expand_industry_subaccounts,
10825        );
10826
10827        let mut built = gen.generate();
10828        // v4.4.1: propagate the accounting framework label from config
10829        // onto the CoA struct so SDK consumers can read it without
10830        // cross-referencing the config (they previously saw null).
10831        if self.config.accounting_standards.enabled {
10832            use datasynth_config::schema::AccountingFrameworkConfig;
10833            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10834                match f {
10835                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10836                    AccountingFrameworkConfig::Ifrs => "ifrs",
10837                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10838                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10839                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10840                }
10841                .to_string()
10842            });
10843        }
10844        // SP4.2 W8.2 + W7.1 — remap synthetic account numbers to corpus
10845        // ones first (W8.2), then enrich descriptions via the overlay (W7.1).
10846        // Applied before Arc::new so we only build one Arc (no clone needed).
10847        if let Some(ref cached) = self.cached_priors {
10848            if let Some(ref coa_prior) = cached.coa_semantic {
10849                use datasynth_generators::coa_generator::{
10850                    remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10851                };
10852                // W8.2 — replace synthetic account numbers with corpus
10853                // ones so the W7.1 overlay fires at ~80% instead of ~16%.
10854                let mut rng =
10855                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10856                let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10857                tracing::info!(
10858                    target: "datasynth_runtime::coa",
10859                    remapped,
10860                    total = built.accounts.len(),
10861                    "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10862                );
10863                // W7.1 — now overlay descriptions / class metadata for the
10864                // (now mostly corpus-numbered) accounts.
10865                let applied =
10866                    ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10867                tracing::info!(
10868                    target: "datasynth_runtime::coa",
10869                    applied,
10870                    total = built.accounts.len(),
10871                    "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10872                );
10873            }
10874            // SP6 — taxonomy overlay: run AFTER the semantic overlay so
10875            // taxonomy-templated accounts take precedence over verbatim
10876            // semantic descriptions.  Uses SyntheticExampleResolver because
10877            // the CoA is built before master-data pools are populated (so
10878            // vendor/customer names are not yet available).
10879            if let Some(tx) = cached.text_taxonomy.as_ref() {
10880                use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
10881                use datasynth_generators::coa_generator::overlay_coa_taxonomy;
10882                let mut resolver = SyntheticExampleResolver;
10883                let mut rng =
10884                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
10885                overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
10886                tracing::info!(
10887                    target: "datasynth_runtime::coa",
10888                    total = built.accounts.len(),
10889                    "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
10890                );
10891            }
10892        }
10893
10894        let coa = Arc::new(built);
10895        self.coa = Some(Arc::clone(&coa));
10896
10897        if let Some(pb) = pb {
10898            pb.finish_with_message("Chart of Accounts complete");
10899        }
10900
10901        Ok(coa)
10902    }
10903
10904    /// Generate master data entities.
10905    fn generate_master_data(&mut self) -> SynthResult<()> {
10906        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10907            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10908        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10909
10910        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10911        let pb = self.create_progress_bar(total, "Generating Master Data");
10912
10913        // Resolve country pack once for all companies (uses primary company's country)
10914        let pack = self.primary_pack().clone();
10915
10916        // Capture config values needed inside the parallel closure
10917        let vendors_per_company = self.phase_config.vendors_per_company;
10918        let customers_per_company = self.phase_config.customers_per_company;
10919        let materials_per_company = self.phase_config.materials_per_company;
10920        let assets_per_company = self.phase_config.assets_per_company;
10921        let coa_framework = self.resolve_coa_framework();
10922
10923        // Generate all master data in parallel across companies.
10924        // Each company's data is independent, making this embarrassingly parallel.
10925        let per_company_results: Vec<_> = self
10926            .config
10927            .companies
10928            .par_iter()
10929            .enumerate()
10930            .map(|(i, company)| {
10931                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10932                let pack = pack.clone();
10933
10934                // Generate vendors (offset counter so IDs are globally unique across companies)
10935                let mut vendor_gen = VendorGenerator::new(company_seed);
10936                vendor_gen.set_country_pack(pack.clone());
10937                vendor_gen.set_coa_framework(coa_framework);
10938                vendor_gen.set_counter_offset(i * vendors_per_company);
10939                // v3.2.0+: user-supplied bank names (and future template
10940                // strings) flow through the shared provider.
10941                vendor_gen.set_template_provider(self.template_provider.clone());
10942                // Wire vendor network config when enabled
10943                if self.config.vendor_network.enabled {
10944                    let vn = &self.config.vendor_network;
10945                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10946                        enabled: true,
10947                        depth: vn.depth,
10948                        tier1_count: datasynth_generators::TierCountConfig::new(
10949                            vn.tier1.min,
10950                            vn.tier1.max,
10951                        ),
10952                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10953                            vn.tier2_per_parent.min,
10954                            vn.tier2_per_parent.max,
10955                        ),
10956                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10957                            vn.tier3_per_parent.min,
10958                            vn.tier3_per_parent.max,
10959                        ),
10960                        cluster_distribution: datasynth_generators::ClusterDistribution {
10961                            reliable_strategic: vn.clusters.reliable_strategic,
10962                            standard_operational: vn.clusters.standard_operational,
10963                            transactional: vn.clusters.transactional,
10964                            problematic: vn.clusters.problematic,
10965                        },
10966                        concentration_limits: datasynth_generators::ConcentrationLimits {
10967                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10968                            max_top5: vn.dependencies.top_5_concentration,
10969                        },
10970                        ..datasynth_generators::VendorNetworkConfig::default()
10971                    });
10972                }
10973                let vendor_pool =
10974                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10975
10976                // Generate customers (offset counter so IDs are globally unique across companies)
10977                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10978                customer_gen.set_country_pack(pack.clone());
10979                customer_gen.set_coa_framework(coa_framework);
10980                customer_gen.set_counter_offset(i * customers_per_company);
10981                // v3.2.0+: user-supplied customer names flow through the shared provider.
10982                customer_gen.set_template_provider(self.template_provider.clone());
10983                // Wire customer segmentation config when enabled
10984                if self.config.customer_segmentation.enabled {
10985                    let cs = &self.config.customer_segmentation;
10986                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10987                        enabled: true,
10988                        segment_distribution: datasynth_generators::SegmentDistribution {
10989                            enterprise: cs.value_segments.enterprise.customer_share,
10990                            mid_market: cs.value_segments.mid_market.customer_share,
10991                            smb: cs.value_segments.smb.customer_share,
10992                            consumer: cs.value_segments.consumer.customer_share,
10993                        },
10994                        referral_config: datasynth_generators::ReferralConfig {
10995                            enabled: cs.networks.referrals.enabled,
10996                            referral_rate: cs.networks.referrals.referral_rate,
10997                            ..Default::default()
10998                        },
10999                        hierarchy_config: datasynth_generators::HierarchyConfig {
11000                            enabled: cs.networks.corporate_hierarchies.enabled,
11001                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11002                            ..Default::default()
11003                        },
11004                        ..Default::default()
11005                    };
11006                    customer_gen.set_segmentation_config(seg_cfg);
11007                }
11008                let customer_pool = customer_gen.generate_customer_pool(
11009                    customers_per_company,
11010                    &company.code,
11011                    start_date,
11012                );
11013
11014                // Generate materials (offset counter so IDs are globally unique across companies)
11015                let mut material_gen = MaterialGenerator::new(company_seed + 200);
11016                material_gen.set_country_pack(pack.clone());
11017                material_gen.set_counter_offset(i * materials_per_company);
11018                // v3.2.1+: user-supplied material descriptions flow through shared provider
11019                material_gen.set_template_provider(self.template_provider.clone());
11020                let material_pool = material_gen.generate_material_pool(
11021                    materials_per_company,
11022                    &company.code,
11023                    start_date,
11024                );
11025
11026                // Generate fixed assets
11027                let mut asset_gen = AssetGenerator::new(company_seed + 300);
11028                // v3.2.1+: user-supplied asset descriptions flow through shared provider
11029                asset_gen.set_template_provider(self.template_provider.clone());
11030                let asset_pool = asset_gen.generate_asset_pool(
11031                    assets_per_company,
11032                    &company.code,
11033                    (start_date, end_date),
11034                );
11035
11036                // Generate employees
11037                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11038                employee_gen.set_country_pack(pack);
11039                // v3.2.1+: user-supplied department names flow through shared provider
11040                employee_gen.set_template_provider(self.template_provider.clone());
11041                let employee_pool =
11042                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11043
11044                // Generate employee change history (2-5 events per employee)
11045                let employee_change_history =
11046                    employee_gen.generate_all_change_history(&employee_pool, end_date);
11047
11048                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
11049                let employee_ids: Vec<String> = employee_pool
11050                    .employees
11051                    .iter()
11052                    .map(|e| e.employee_id.clone())
11053                    .collect();
11054                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11055                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11056
11057                // v5.1: profit centre hierarchy (two-level: top-level
11058                // segment / region / product-group nodes + sub-units).
11059                let mut pc_gen =
11060                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11061                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11062
11063                (
11064                    vendor_pool.vendors,
11065                    customer_pool.customers,
11066                    material_pool.materials,
11067                    asset_pool.assets,
11068                    employee_pool.employees,
11069                    employee_change_history,
11070                    cost_centers,
11071                    profit_centers,
11072                )
11073            })
11074            .collect();
11075
11076        // Aggregate results from all companies
11077        for (
11078            vendors,
11079            customers,
11080            materials,
11081            assets,
11082            employees,
11083            change_history,
11084            cost_centers,
11085            profit_centers,
11086        ) in per_company_results
11087        {
11088            self.master_data.vendors.extend(vendors);
11089            self.master_data.customers.extend(customers);
11090            self.master_data.materials.extend(materials);
11091            self.master_data.assets.extend(assets);
11092            self.master_data.employees.extend(employees);
11093            self.master_data.cost_centers.extend(cost_centers);
11094            self.master_data.profit_centers.extend(profit_centers);
11095            self.master_data
11096                .employee_change_history
11097                .extend(change_history);
11098        }
11099
11100        // v3.3.0: one OrganizationalProfile per company. Cheap to
11101        // generate (derived from industry + company_code) so we
11102        // always emit when master data runs; no separate config flag.
11103        {
11104            use datasynth_core::models::IndustrySector;
11105            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11106            let industry = match self.config.global.industry {
11107                IndustrySector::Manufacturing => "manufacturing",
11108                IndustrySector::Retail => "retail",
11109                IndustrySector::FinancialServices => "financial_services",
11110                IndustrySector::Technology => "technology",
11111                IndustrySector::Healthcare => "healthcare",
11112                _ => "other",
11113            };
11114            for (i, company) in self.config.companies.iter().enumerate() {
11115                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11116                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11117                let profile = profile_gen.generate(&company.code, industry);
11118                self.master_data.organizational_profiles.push(profile);
11119            }
11120        }
11121
11122        if let Some(pb) = &pb {
11123            pb.inc(total);
11124        }
11125        if let Some(pb) = pb {
11126            pb.finish_with_message("Master data generation complete");
11127        }
11128
11129        Ok(())
11130    }
11131
11132    /// Generate document flows (P2P and O2C).
11133    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11134        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11135            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11136
11137        // Generate P2P chains
11138        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
11139        let months = (self.config.global.period_months as usize).max(1);
11140        let p2p_count = self
11141            .phase_config
11142            .p2p_chains
11143            .min(self.master_data.vendors.len() * 2 * months);
11144        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11145
11146        // Convert P2P config from schema to generator config
11147        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11148        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11149        p2p_gen.set_country_pack(self.primary_pack().clone());
11150        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
11151        // to business days. No-op when `temporal_patterns.business_days.
11152        // enabled = false`.
11153        if let Some(ctx) = &self.temporal_context {
11154            p2p_gen.set_temporal_context(Arc::clone(ctx));
11155        }
11156
11157        for i in 0..p2p_count {
11158            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11159            let materials: Vec<&Material> = self
11160                .master_data
11161                .materials
11162                .iter()
11163                .skip(i % self.master_data.materials.len().max(1))
11164                .take(2.min(self.master_data.materials.len()))
11165                .collect();
11166
11167            if materials.is_empty() {
11168                continue;
11169            }
11170
11171            let company = &self.config.companies[i % self.config.companies.len()];
11172            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11173            let fiscal_period = po_date.month() as u8;
11174            let created_by = if self.master_data.employees.is_empty() {
11175                "SYSTEM"
11176            } else {
11177                self.master_data.employees[i % self.master_data.employees.len()]
11178                    .user_id
11179                    .as_str()
11180            };
11181
11182            let chain = p2p_gen.generate_chain(
11183                &company.code,
11184                vendor,
11185                &materials,
11186                po_date,
11187                start_date.year() as u16,
11188                fiscal_period,
11189                created_by,
11190            );
11191
11192            // Flatten documents
11193            flows.purchase_orders.push(chain.purchase_order.clone());
11194            flows.goods_receipts.extend(chain.goods_receipts.clone());
11195            if let Some(vi) = &chain.vendor_invoice {
11196                flows.vendor_invoices.push(vi.clone());
11197            }
11198            if let Some(payment) = &chain.payment {
11199                flows.payments.push(payment.clone());
11200            }
11201            for remainder in &chain.remainder_payments {
11202                flows.payments.push(remainder.clone());
11203            }
11204            flows.p2p_chains.push(chain);
11205
11206            if let Some(pb) = &pb {
11207                pb.inc(1);
11208            }
11209        }
11210
11211        if let Some(pb) = pb {
11212            pb.finish_with_message("P2P document flows complete");
11213        }
11214
11215        // Generate O2C chains
11216        // Cap at ~2 SOs per customer per month to keep order volume realistic
11217        let o2c_count = self
11218            .phase_config
11219            .o2c_chains
11220            .min(self.master_data.customers.len() * 2 * months);
11221        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11222
11223        // Convert O2C config from schema to generator config
11224        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11225        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11226        o2c_gen.set_country_pack(self.primary_pack().clone());
11227        // v3.4.1: wire temporal context (no-op when business_days disabled).
11228        if let Some(ctx) = &self.temporal_context {
11229            o2c_gen.set_temporal_context(Arc::clone(ctx));
11230        }
11231
11232        for i in 0..o2c_count {
11233            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11234            let materials: Vec<&Material> = self
11235                .master_data
11236                .materials
11237                .iter()
11238                .skip(i % self.master_data.materials.len().max(1))
11239                .take(2.min(self.master_data.materials.len()))
11240                .collect();
11241
11242            if materials.is_empty() {
11243                continue;
11244            }
11245
11246            let company = &self.config.companies[i % self.config.companies.len()];
11247            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11248            let fiscal_period = so_date.month() as u8;
11249            let created_by = if self.master_data.employees.is_empty() {
11250                "SYSTEM"
11251            } else {
11252                self.master_data.employees[i % self.master_data.employees.len()]
11253                    .user_id
11254                    .as_str()
11255            };
11256
11257            let chain = o2c_gen.generate_chain(
11258                &company.code,
11259                customer,
11260                &materials,
11261                so_date,
11262                start_date.year() as u16,
11263                fiscal_period,
11264                created_by,
11265            );
11266
11267            // Flatten documents
11268            flows.sales_orders.push(chain.sales_order.clone());
11269            flows.deliveries.extend(chain.deliveries.clone());
11270            if let Some(ci) = &chain.customer_invoice {
11271                flows.customer_invoices.push(ci.clone());
11272            }
11273            if let Some(receipt) = &chain.customer_receipt {
11274                flows.payments.push(receipt.clone());
11275            }
11276            // Extract remainder receipts (follow-up to partial payments)
11277            for receipt in &chain.remainder_receipts {
11278                flows.payments.push(receipt.clone());
11279            }
11280            flows.o2c_chains.push(chain);
11281
11282            if let Some(pb) = &pb {
11283                pb.inc(1);
11284            }
11285        }
11286
11287        if let Some(pb) = pb {
11288            pb.finish_with_message("O2C document flows complete");
11289        }
11290
11291        // Collect all document cross-references from document headers.
11292        // Each document embeds references to its predecessor(s) via add_reference(); here we
11293        // denormalise them into a flat list for the document_references.json output file.
11294        {
11295            let mut refs = Vec::new();
11296            for doc in &flows.purchase_orders {
11297                refs.extend(doc.header.document_references.iter().cloned());
11298            }
11299            for doc in &flows.goods_receipts {
11300                refs.extend(doc.header.document_references.iter().cloned());
11301            }
11302            for doc in &flows.vendor_invoices {
11303                refs.extend(doc.header.document_references.iter().cloned());
11304            }
11305            for doc in &flows.sales_orders {
11306                refs.extend(doc.header.document_references.iter().cloned());
11307            }
11308            for doc in &flows.deliveries {
11309                refs.extend(doc.header.document_references.iter().cloned());
11310            }
11311            for doc in &flows.customer_invoices {
11312                refs.extend(doc.header.document_references.iter().cloned());
11313            }
11314            for doc in &flows.payments {
11315                refs.extend(doc.header.document_references.iter().cloned());
11316            }
11317            debug!(
11318                "Collected {} document cross-references from document headers",
11319                refs.len()
11320            );
11321            flows.document_references = refs;
11322        }
11323
11324        Ok(())
11325    }
11326
11327    /// Generate journal entries using parallel generation across multiple cores.
11328    fn generate_journal_entries(
11329        &mut self,
11330        coa: &Arc<ChartOfAccounts>,
11331    ) -> SynthResult<Vec<JournalEntry>> {
11332        use datasynth_core::traits::ParallelGenerator;
11333
11334        let total = self.calculate_total_transactions();
11335        let pb = self.create_progress_bar(total, "Generating Journal Entries");
11336
11337        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11338            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11339        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11340
11341        let company_codes: Vec<String> = self
11342            .config
11343            .companies
11344            .iter()
11345            .map(|c| c.code.clone())
11346            .collect();
11347
11348        let mut generator = JournalEntryGenerator::new_with_params(
11349            self.config.transactions.clone(),
11350            Arc::clone(coa),
11351            company_codes,
11352            start_date,
11353            end_date,
11354            self.seed,
11355        );
11356        // Wire the `business_processes.*_weight` config through (phantom knob
11357        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
11358        let bp = &self.config.business_processes;
11359        generator.set_business_process_weights(
11360            bp.o2c_weight,
11361            bp.p2p_weight,
11362            bp.r2r_weight,
11363            bp.h2r_weight,
11364            bp.a2r_weight,
11365        );
11366        // v3.4.0: wire advanced distributions (mixture models + industry
11367        // profiles). No-op when `distributions.enabled = false` or
11368        // `distributions.amounts.enabled = false`, preserving v3.3.2
11369        // byte-identical output on default configs.
11370        generator
11371            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11372            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11373
11374        // SP3: load and wire industry priors when the config opts in via
11375        //   distributions.industry_profile.priors.enabled = true
11376        // When disabled (or when using the legacy bare-name form), this block
11377        // is a no-op and generation behavior is identical to v5.11.
11378        if let Some(profile) = &self.config.distributions.industry_profile {
11379            if let Some(priors_cfg) = profile.priors() {
11380                if priors_cfg.enabled {
11381                    use datasynth_config::schema::PriorsSource;
11382                    use datasynth_generators::priors_loader::LoadedPriors;
11383
11384                    let mut priors_rng =
11385                        rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11386                    let period_days = i64::from(self.config.global.period_months) * 30;
11387                    let industry_slug = profile.profile_type().slug();
11388
11389                    let loaded = match priors_cfg.source {
11390                        PriorsSource::Bundled => {
11391                            LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11392                                .map_err(|e| {
11393                                    SynthError::config(format!(
11394                                "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11395                            ))
11396                                })?
11397                        }
11398                        PriorsSource::File => {
11399                            let path = priors_cfg.path.as_ref().ok_or_else(|| {
11400                                SynthError::config(
11401                                    "SP3: industry_profile.priors.path required when source = file"
11402                                        .to_string(),
11403                                )
11404                            })?;
11405                            LoadedPriors::load_from_path(
11406                                path,
11407                                &mut priors_rng,
11408                                period_days,
11409                                Some(industry_slug),
11410                            )
11411                            .map_err(|e| {
11412                                SynthError::config(format!(
11413                                    "SP3: failed to load priors from '{}': {e}",
11414                                    path.display()
11415                                ))
11416                            })?
11417                        }
11418                    };
11419
11420                    // SP3.12 — cache priors in Arc so document-flow generator
11421                    // can also apply lines-per-JE padding without re-loading.
11422                    let loaded = std::sync::Arc::new(loaded);
11423                    self.cached_priors = Some(loaded.clone());
11424                    generator.loaded_priors = Some((*loaded).clone());
11425
11426                    // SP3.4 — instantiate VelocityCalibrator when the config
11427                    // opts in.  Default target rates (R7/R9) are a sensible
11428                    // baseline; they can be derived from the loaded priors in
11429                    // a future hardening pass.
11430                    if priors_cfg.velocity_calibration {
11431                        use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11432                        let mut targets = std::collections::HashMap::new();
11433                        targets.insert("R7".to_string(), 0.10);
11434                        targets.insert("R9".to_string(), 0.10);
11435                        let calibrator = VelocityCalibrator::new(targets, 10_000);
11436                        generator.velocity_calibrator = Some(calibrator);
11437                    }
11438                }
11439            }
11440        }
11441
11442        let generator = generator;
11443
11444        // Connect generated master data to ensure JEs reference real entities
11445        // Enable persona-based error injection for realistic human behavior
11446        // Pass fraud configuration for fraud injection
11447        let je_pack = self.primary_pack();
11448
11449        // Master-data CC / PC pools so JE.cost_center and
11450        // JE.profit_center join back to `cost_centers.id` and
11451        // `profit_centers.id` (closes the v5.9.0 linkage gap that
11452        // had `JE.cost_center = "CC1000"` while master used
11453        // `CC-1000-FIN` etc.).  Empty when no master is present —
11454        // the generator falls back to its hardcoded constants.
11455        let cc_pool: Vec<String> = self
11456            .master_data
11457            .cost_centers
11458            .iter()
11459            .map(|c| c.id.clone())
11460            .collect();
11461        let pc_pool: Vec<String> = self
11462            .master_data
11463            .profit_centers
11464            .iter()
11465            .map(|p| p.id.clone())
11466            .collect();
11467
11468        // Build a UserPool from the generated employee master so
11469        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
11470        // closes the third linkage gap (the previous behaviour had
11471        // JeGenerator generate its own UserPool internally with
11472        // ids disjoint from the employee master).
11473        let user_pool_from_employees =
11474            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11475
11476        let mut generator = generator
11477            .with_master_data(
11478                &self.master_data.vendors,
11479                &self.master_data.customers,
11480                &self.master_data.materials,
11481            )
11482            .with_cost_center_pool(cc_pool)
11483            .with_profit_center_pool(pc_pool)
11484            .with_country_pack_names(je_pack)
11485            .with_user_pool(user_pool_from_employees)
11486            .with_country_pack_temporal(
11487                self.config.temporal_patterns.clone(),
11488                self.seed + 200,
11489                je_pack,
11490            )
11491            .with_persona_errors(true)
11492            .with_fraud_config(self.config.fraud.clone());
11493
11494        // Apply temporal drift if configured. v3.5.2+: also merge
11495        // `distributions.regime_changes` (regime events, economic
11496        // cycles, parameter drifts) into the same DriftConfig so both
11497        // knobs flow through the shared DriftController.
11498        let temporal_enabled = self.config.temporal.enabled;
11499        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11500        if temporal_enabled || regimes_enabled {
11501            let mut drift_config = if temporal_enabled {
11502                self.config.temporal.to_core_config()
11503            } else {
11504                // regime-changes only: start from default (drift OFF),
11505                // apply_to flips `enabled = true`.
11506                datasynth_core::distributions::DriftConfig::default()
11507            };
11508            if regimes_enabled {
11509                self.config
11510                    .distributions
11511                    .regime_changes
11512                    .apply_to(&mut drift_config, start_date);
11513            }
11514            generator = generator.with_drift_config(drift_config, self.seed + 100);
11515        }
11516
11517        // Check memory limit at start
11518        self.check_memory_limit()?;
11519
11520        // Determine parallelism: use available cores, but cap at total entries
11521        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11522
11523        // Use parallel generation for datasets with 10K+ entries.
11524        // Below this threshold, the statistical properties of a single-seeded
11525        // generator (e.g. Benford compliance) are better preserved.
11526        let entries = if total >= 10_000 && num_threads > 1 {
11527            // Parallel path: split the generator across cores and generate in parallel.
11528            // Each sub-generator gets a unique seed for deterministic, independent generation.
11529            let sub_generators = generator.split(num_threads);
11530            let entries_per_thread = total as usize / num_threads;
11531            let remainder = total as usize % num_threads;
11532
11533            let batches: Vec<Vec<JournalEntry>> = sub_generators
11534                .into_par_iter()
11535                .enumerate()
11536                .map(|(i, mut gen)| {
11537                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11538                    gen.generate_batch(count)
11539                })
11540                .collect();
11541
11542            // Merge all batches into a single Vec
11543            let entries = JournalEntryGenerator::merge_results(batches);
11544
11545            if let Some(pb) = &pb {
11546                pb.inc(total);
11547            }
11548            entries
11549        } else {
11550            // Sequential path for small datasets (< 1000 entries)
11551            let mut entries = Vec::with_capacity(total as usize);
11552            for _ in 0..total {
11553                let entry = generator.generate();
11554                entries.push(entry);
11555                if let Some(pb) = &pb {
11556                    pb.inc(1);
11557                }
11558            }
11559            entries
11560        };
11561
11562        if let Some(pb) = pb {
11563            pb.finish_with_message("Journal entries complete");
11564        }
11565
11566        Ok(entries)
11567    }
11568
11569    /// Generate journal entries from document flows.
11570    ///
11571    /// This creates proper GL entries for each document in the P2P and O2C flows,
11572    /// ensuring that document activity is reflected in the general ledger.
11573    fn generate_jes_from_document_flows(
11574        &mut self,
11575        flows: &DocumentFlowSnapshot,
11576    ) -> SynthResult<Vec<JournalEntry>> {
11577        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11578        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11579
11580        let je_config = match self.resolve_coa_framework() {
11581            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11582            CoAFramework::GermanSkr04 => {
11583                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11584                DocumentFlowJeConfig::from(&fa)
11585            }
11586            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11587        };
11588
11589        let populate_fec = je_config.populate_fec_fields;
11590        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11591
11592        // SP3.12 — propagate cached priors so document-flow JEs receive
11593        // the same lines-per-JE padding as standalone JEs.
11594        if let Some(ref priors) = self.cached_priors {
11595            generator.set_loaded_priors(priors.clone());
11596        }
11597
11598        // Master-data CC / PC pools so document-flow-derived JEs
11599        // (P2P / O2C postings) reference IDs that join back to the
11600        // cost-centers / profit-centers masters.  Same plumbing as
11601        // for `JeGenerator` above; falls back to hardcoded const
11602        // pools when masters are absent.
11603        let cc_pool: Vec<String> = self
11604            .master_data
11605            .cost_centers
11606            .iter()
11607            .map(|c| c.id.clone())
11608            .collect();
11609        let pc_pool: Vec<String> = self
11610            .master_data
11611            .profit_centers
11612            .iter()
11613            .map(|p| p.id.clone())
11614            .collect();
11615        if !cc_pool.is_empty() {
11616            generator.set_cost_center_pool(cc_pool);
11617        }
11618        if !pc_pool.is_empty() {
11619            generator.set_profit_center_pool(pc_pool);
11620        }
11621
11622        // Build auxiliary account lookup from vendor/customer master data so that
11623        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11624        // PCG "4010001") instead of raw partner IDs.
11625        if populate_fec {
11626            let mut aux_lookup = std::collections::HashMap::new();
11627            for vendor in &self.master_data.vendors {
11628                if let Some(ref aux) = vendor.auxiliary_gl_account {
11629                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11630                }
11631            }
11632            for customer in &self.master_data.customers {
11633                if let Some(ref aux) = customer.auxiliary_gl_account {
11634                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11635                }
11636            }
11637            if !aux_lookup.is_empty() {
11638                generator.set_auxiliary_account_lookup(aux_lookup);
11639            }
11640        }
11641
11642        let mut entries = Vec::new();
11643
11644        // Generate JEs from P2P chains
11645        for chain in &flows.p2p_chains {
11646            let chain_entries = generator.generate_from_p2p_chain(chain);
11647            entries.extend(chain_entries);
11648            if let Some(pb) = &pb {
11649                pb.inc(1);
11650            }
11651        }
11652
11653        // Generate JEs from O2C chains
11654        for chain in &flows.o2c_chains {
11655            let chain_entries = generator.generate_from_o2c_chain(chain);
11656            entries.extend(chain_entries);
11657            if let Some(pb) = &pb {
11658                pb.inc(1);
11659            }
11660        }
11661
11662        if let Some(pb) = pb {
11663            pb.finish_with_message(format!(
11664                "Generated {} JEs from document flows",
11665                entries.len()
11666            ));
11667        }
11668
11669        Ok(entries)
11670    }
11671
11672    /// Generate journal entries from payroll runs.
11673    ///
11674    /// Creates one JE per payroll run:
11675    /// - DR Salaries & Wages (6100) for gross pay
11676    /// - CR Payroll Clearing (9100) for gross pay
11677    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11678        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11679
11680        let mut jes = Vec::with_capacity(payroll_runs.len());
11681
11682        for run in payroll_runs {
11683            let mut je = JournalEntry::new_simple(
11684                format!("JE-PAYROLL-{}", run.payroll_id),
11685                run.company_code.clone(),
11686                run.run_date,
11687                format!("Payroll {}", run.payroll_id),
11688            );
11689
11690            // Debit Salaries & Wages for gross pay
11691            je.add_line(JournalEntryLine {
11692                line_number: 1,
11693                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11694                debit_amount: run.total_gross,
11695                reference: Some(run.payroll_id.clone()),
11696                text: Some(format!(
11697                    "Payroll {} ({} employees)",
11698                    run.payroll_id, run.employee_count
11699                )),
11700                ..Default::default()
11701            });
11702
11703            // Credit Payroll Clearing for gross pay
11704            je.add_line(JournalEntryLine {
11705                line_number: 2,
11706                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11707                credit_amount: run.total_gross,
11708                reference: Some(run.payroll_id.clone()),
11709                ..Default::default()
11710            });
11711
11712            jes.push(je);
11713        }
11714
11715        jes
11716    }
11717
11718    /// Link document flows to subledger records.
11719    ///
11720    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11721    /// ensuring subledger data is coherent with document flow data.
11722    fn link_document_flows_to_subledgers(
11723        &mut self,
11724        flows: &DocumentFlowSnapshot,
11725    ) -> SynthResult<SubledgerSnapshot> {
11726        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11727        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11728
11729        // Build vendor/customer name maps from master data for realistic subledger names
11730        let vendor_names: std::collections::HashMap<String, String> = self
11731            .master_data
11732            .vendors
11733            .iter()
11734            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11735            .collect();
11736        let customer_names: std::collections::HashMap<String, String> = self
11737            .master_data
11738            .customers
11739            .iter()
11740            .map(|c| (c.customer_id.clone(), c.name.clone()))
11741            .collect();
11742
11743        let mut linker = DocumentFlowLinker::new()
11744            .with_vendor_names(vendor_names)
11745            .with_customer_names(customer_names);
11746
11747        // Convert vendor invoices to AP invoices
11748        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11749        if let Some(pb) = &pb {
11750            pb.inc(flows.vendor_invoices.len() as u64);
11751        }
11752
11753        // Convert customer invoices to AR invoices
11754        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11755        if let Some(pb) = &pb {
11756            pb.inc(flows.customer_invoices.len() as u64);
11757        }
11758
11759        if let Some(pb) = pb {
11760            pb.finish_with_message(format!(
11761                "Linked {} AP and {} AR invoices",
11762                ap_invoices.len(),
11763                ar_invoices.len()
11764            ));
11765        }
11766
11767        Ok(SubledgerSnapshot {
11768            ap_invoices,
11769            ar_invoices,
11770            fa_records: Vec::new(),
11771            inventory_positions: Vec::new(),
11772            inventory_movements: Vec::new(),
11773            // Aging reports are computed after payment settlement in phase_document_flows.
11774            ar_aging_reports: Vec::new(),
11775            ap_aging_reports: Vec::new(),
11776            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11777            depreciation_runs: Vec::new(),
11778            inventory_valuations: Vec::new(),
11779            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11780            dunning_runs: Vec::new(),
11781            dunning_letters: Vec::new(),
11782        })
11783    }
11784
11785    /// Generate OCPM events from document flows.
11786    ///
11787    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11788    /// capturing the object-centric process perspective.
11789    #[allow(clippy::too_many_arguments)]
11790    fn generate_ocpm_events(
11791        &mut self,
11792        flows: &DocumentFlowSnapshot,
11793        sourcing: &SourcingSnapshot,
11794        hr: &HrSnapshot,
11795        manufacturing: &ManufacturingSnapshot,
11796        banking: &BankingSnapshot,
11797        audit: &AuditSnapshot,
11798        financial_reporting: &FinancialReportingSnapshot,
11799    ) -> SynthResult<OcpmSnapshot> {
11800        let total_chains = flows.p2p_chains.len()
11801            + flows.o2c_chains.len()
11802            + sourcing.sourcing_projects.len()
11803            + hr.payroll_runs.len()
11804            + manufacturing.production_orders.len()
11805            + banking.customers.len()
11806            + audit.engagements.len()
11807            + financial_reporting.bank_reconciliations.len();
11808        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11809
11810        // Create OCPM event log with standard types
11811        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11812        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11813
11814        // Configure the OCPM generator
11815        let ocpm_config = OcpmGeneratorConfig {
11816            generate_p2p: true,
11817            generate_o2c: true,
11818            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11819            generate_h2r: !hr.payroll_runs.is_empty(),
11820            generate_mfg: !manufacturing.production_orders.is_empty(),
11821            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11822            generate_bank: !banking.customers.is_empty(),
11823            generate_audit: !audit.engagements.is_empty(),
11824            happy_path_rate: 0.75,
11825            exception_path_rate: 0.20,
11826            error_path_rate: 0.05,
11827            add_duration_variability: true,
11828            duration_std_dev_factor: 0.3,
11829        };
11830        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11831        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11832
11833        // Get available users for resource assignment
11834        let available_users: Vec<String> = self
11835            .master_data
11836            .employees
11837            .iter()
11838            .take(20)
11839            .map(|e| e.user_id.clone())
11840            .collect();
11841
11842        // Deterministic base date from config (avoids Utc::now() non-determinism)
11843        let fallback_date =
11844            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11845        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11846            .unwrap_or(fallback_date);
11847        let base_midnight = base_date
11848            .and_hms_opt(0, 0, 0)
11849            .expect("midnight is always valid");
11850        let base_datetime =
11851            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11852
11853        // Helper closure to add case results to event log
11854        let add_result = |event_log: &mut OcpmEventLog,
11855                          result: datasynth_ocpm::CaseGenerationResult| {
11856            for event in result.events {
11857                event_log.add_event(event);
11858            }
11859            for object in result.objects {
11860                event_log.add_object(object);
11861            }
11862            for relationship in result.relationships {
11863                event_log.add_relationship(relationship);
11864            }
11865            for corr in result.correlation_events {
11866                event_log.add_correlation_event(corr);
11867            }
11868            event_log.add_case(result.case_trace);
11869        };
11870
11871        // Generate events from P2P chains
11872        for chain in &flows.p2p_chains {
11873            let po = &chain.purchase_order;
11874            let documents = P2pDocuments::new(
11875                &po.header.document_id,
11876                &po.vendor_id,
11877                &po.header.company_code,
11878                po.total_net_amount,
11879                &po.header.currency,
11880                &ocpm_uuid_factory,
11881            )
11882            .with_goods_receipt(
11883                chain
11884                    .goods_receipts
11885                    .first()
11886                    .map(|gr| gr.header.document_id.as_str())
11887                    .unwrap_or(""),
11888                &ocpm_uuid_factory,
11889            )
11890            .with_invoice(
11891                chain
11892                    .vendor_invoice
11893                    .as_ref()
11894                    .map(|vi| vi.header.document_id.as_str())
11895                    .unwrap_or(""),
11896                &ocpm_uuid_factory,
11897            )
11898            .with_payment(
11899                chain
11900                    .payment
11901                    .as_ref()
11902                    .map(|p| p.header.document_id.as_str())
11903                    .unwrap_or(""),
11904                &ocpm_uuid_factory,
11905            );
11906
11907            let start_time =
11908                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11909            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11910            add_result(&mut event_log, result);
11911
11912            if let Some(pb) = &pb {
11913                pb.inc(1);
11914            }
11915        }
11916
11917        // Generate events from O2C chains
11918        for chain in &flows.o2c_chains {
11919            let so = &chain.sales_order;
11920            let documents = O2cDocuments::new(
11921                &so.header.document_id,
11922                &so.customer_id,
11923                &so.header.company_code,
11924                so.total_net_amount,
11925                &so.header.currency,
11926                &ocpm_uuid_factory,
11927            )
11928            .with_delivery(
11929                chain
11930                    .deliveries
11931                    .first()
11932                    .map(|d| d.header.document_id.as_str())
11933                    .unwrap_or(""),
11934                &ocpm_uuid_factory,
11935            )
11936            .with_invoice(
11937                chain
11938                    .customer_invoice
11939                    .as_ref()
11940                    .map(|ci| ci.header.document_id.as_str())
11941                    .unwrap_or(""),
11942                &ocpm_uuid_factory,
11943            )
11944            .with_receipt(
11945                chain
11946                    .customer_receipt
11947                    .as_ref()
11948                    .map(|r| r.header.document_id.as_str())
11949                    .unwrap_or(""),
11950                &ocpm_uuid_factory,
11951            );
11952
11953            let start_time =
11954                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11955            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11956            add_result(&mut event_log, result);
11957
11958            if let Some(pb) = &pb {
11959                pb.inc(1);
11960            }
11961        }
11962
11963        // Generate events from S2C sourcing projects
11964        for project in &sourcing.sourcing_projects {
11965            // Find vendor from contracts or qualifications
11966            let vendor_id = sourcing
11967                .contracts
11968                .iter()
11969                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11970                .map(|c| c.vendor_id.clone())
11971                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11972                .or_else(|| {
11973                    self.master_data
11974                        .vendors
11975                        .first()
11976                        .map(|v| v.vendor_id.clone())
11977                })
11978                .unwrap_or_else(|| "V000".to_string());
11979            let mut docs = S2cDocuments::new(
11980                &project.project_id,
11981                &vendor_id,
11982                &project.company_code,
11983                project.estimated_annual_spend,
11984                &ocpm_uuid_factory,
11985            );
11986            // Link RFx if available
11987            if let Some(rfx) = sourcing
11988                .rfx_events
11989                .iter()
11990                .find(|r| r.sourcing_project_id == project.project_id)
11991            {
11992                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11993                // Link winning bid (status == Accepted)
11994                if let Some(bid) = sourcing.bids.iter().find(|b| {
11995                    b.rfx_id == rfx.rfx_id
11996                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11997                }) {
11998                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11999                }
12000            }
12001            // Link contract
12002            if let Some(contract) = sourcing
12003                .contracts
12004                .iter()
12005                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12006            {
12007                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12008            }
12009            let start_time = base_datetime - chrono::Duration::days(90);
12010            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12011            add_result(&mut event_log, result);
12012
12013            if let Some(pb) = &pb {
12014                pb.inc(1);
12015            }
12016        }
12017
12018        // Generate events from H2R payroll runs
12019        for run in &hr.payroll_runs {
12020            // Use first matching payroll line item's employee, or fallback
12021            let employee_id = hr
12022                .payroll_line_items
12023                .iter()
12024                .find(|li| li.payroll_id == run.payroll_id)
12025                .map(|li| li.employee_id.as_str())
12026                .unwrap_or("EMP000");
12027            let docs = H2rDocuments::new(
12028                &run.payroll_id,
12029                employee_id,
12030                &run.company_code,
12031                run.total_gross,
12032                &ocpm_uuid_factory,
12033            )
12034            .with_time_entries(
12035                hr.time_entries
12036                    .iter()
12037                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12038                    .take(5)
12039                    .map(|t| t.entry_id.as_str())
12040                    .collect(),
12041            );
12042            let start_time = base_datetime - chrono::Duration::days(30);
12043            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12044            add_result(&mut event_log, result);
12045
12046            if let Some(pb) = &pb {
12047                pb.inc(1);
12048            }
12049        }
12050
12051        // Generate events from MFG production orders
12052        for order in &manufacturing.production_orders {
12053            let mut docs = MfgDocuments::new(
12054                &order.order_id,
12055                &order.material_id,
12056                &order.company_code,
12057                order.planned_quantity,
12058                &ocpm_uuid_factory,
12059            )
12060            .with_operations(
12061                order
12062                    .operations
12063                    .iter()
12064                    .map(|o| format!("OP-{:04}", o.operation_number))
12065                    .collect::<Vec<_>>()
12066                    .iter()
12067                    .map(std::string::String::as_str)
12068                    .collect(),
12069            );
12070            // Link quality inspection if available (via reference_id matching order_id)
12071            if let Some(insp) = manufacturing
12072                .quality_inspections
12073                .iter()
12074                .find(|i| i.reference_id == order.order_id)
12075            {
12076                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12077            }
12078            // Link cycle count if available (match by material_id in items)
12079            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12080                cc.items
12081                    .iter()
12082                    .any(|item| item.material_id == order.material_id)
12083            }) {
12084                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12085            }
12086            let start_time = base_datetime - chrono::Duration::days(60);
12087            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12088            add_result(&mut event_log, result);
12089
12090            if let Some(pb) = &pb {
12091                pb.inc(1);
12092            }
12093        }
12094
12095        // Generate events from Banking customers
12096        for customer in &banking.customers {
12097            let customer_id_str = customer.customer_id.to_string();
12098            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12099            // Link accounts (primary_owner_id matches customer_id)
12100            if let Some(account) = banking
12101                .accounts
12102                .iter()
12103                .find(|a| a.primary_owner_id == customer.customer_id)
12104            {
12105                let account_id_str = account.account_id.to_string();
12106                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12107                // Link transactions for this account
12108                let txn_strs: Vec<String> = banking
12109                    .transactions
12110                    .iter()
12111                    .filter(|t| t.account_id == account.account_id)
12112                    .take(10)
12113                    .map(|t| t.transaction_id.to_string())
12114                    .collect();
12115                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12116                let txn_amounts: Vec<rust_decimal::Decimal> = banking
12117                    .transactions
12118                    .iter()
12119                    .filter(|t| t.account_id == account.account_id)
12120                    .take(10)
12121                    .map(|t| t.amount)
12122                    .collect();
12123                if !txn_ids.is_empty() {
12124                    docs = docs.with_transactions(txn_ids, txn_amounts);
12125                }
12126            }
12127            let start_time = base_datetime - chrono::Duration::days(180);
12128            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12129            add_result(&mut event_log, result);
12130
12131            if let Some(pb) = &pb {
12132                pb.inc(1);
12133            }
12134        }
12135
12136        // Generate events from Audit engagements
12137        for engagement in &audit.engagements {
12138            let engagement_id_str = engagement.engagement_id.to_string();
12139            let docs = AuditDocuments::new(
12140                &engagement_id_str,
12141                &engagement.client_entity_id,
12142                &ocpm_uuid_factory,
12143            )
12144            .with_workpapers(
12145                audit
12146                    .workpapers
12147                    .iter()
12148                    .filter(|w| w.engagement_id == engagement.engagement_id)
12149                    .take(10)
12150                    .map(|w| w.workpaper_id.to_string())
12151                    .collect::<Vec<_>>()
12152                    .iter()
12153                    .map(std::string::String::as_str)
12154                    .collect(),
12155            )
12156            .with_evidence(
12157                audit
12158                    .evidence
12159                    .iter()
12160                    .filter(|e| e.engagement_id == engagement.engagement_id)
12161                    .take(10)
12162                    .map(|e| e.evidence_id.to_string())
12163                    .collect::<Vec<_>>()
12164                    .iter()
12165                    .map(std::string::String::as_str)
12166                    .collect(),
12167            )
12168            .with_risks(
12169                audit
12170                    .risk_assessments
12171                    .iter()
12172                    .filter(|r| r.engagement_id == engagement.engagement_id)
12173                    .take(5)
12174                    .map(|r| r.risk_id.to_string())
12175                    .collect::<Vec<_>>()
12176                    .iter()
12177                    .map(std::string::String::as_str)
12178                    .collect(),
12179            )
12180            .with_findings(
12181                audit
12182                    .findings
12183                    .iter()
12184                    .filter(|f| f.engagement_id == engagement.engagement_id)
12185                    .take(5)
12186                    .map(|f| f.finding_id.to_string())
12187                    .collect::<Vec<_>>()
12188                    .iter()
12189                    .map(std::string::String::as_str)
12190                    .collect(),
12191            )
12192            .with_judgments(
12193                audit
12194                    .judgments
12195                    .iter()
12196                    .filter(|j| j.engagement_id == engagement.engagement_id)
12197                    .take(5)
12198                    .map(|j| j.judgment_id.to_string())
12199                    .collect::<Vec<_>>()
12200                    .iter()
12201                    .map(std::string::String::as_str)
12202                    .collect(),
12203            );
12204            let start_time = base_datetime - chrono::Duration::days(120);
12205            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12206            add_result(&mut event_log, result);
12207
12208            if let Some(pb) = &pb {
12209                pb.inc(1);
12210            }
12211        }
12212
12213        // Generate events from Bank Reconciliations
12214        for recon in &financial_reporting.bank_reconciliations {
12215            let docs = BankReconDocuments::new(
12216                &recon.reconciliation_id,
12217                &recon.bank_account_id,
12218                &recon.company_code,
12219                recon.bank_ending_balance,
12220                &ocpm_uuid_factory,
12221            )
12222            .with_statement_lines(
12223                recon
12224                    .statement_lines
12225                    .iter()
12226                    .take(20)
12227                    .map(|l| l.line_id.as_str())
12228                    .collect(),
12229            )
12230            .with_reconciling_items(
12231                recon
12232                    .reconciling_items
12233                    .iter()
12234                    .take(10)
12235                    .map(|i| i.item_id.as_str())
12236                    .collect(),
12237            );
12238            let start_time = base_datetime - chrono::Duration::days(30);
12239            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12240            add_result(&mut event_log, result);
12241
12242            if let Some(pb) = &pb {
12243                pb.inc(1);
12244            }
12245        }
12246
12247        // Compute process variants
12248        event_log.compute_variants();
12249
12250        let summary = event_log.summary();
12251
12252        if let Some(pb) = pb {
12253            pb.finish_with_message(format!(
12254                "Generated {} OCPM events, {} objects",
12255                summary.event_count, summary.object_count
12256            ));
12257        }
12258
12259        Ok(OcpmSnapshot {
12260            event_count: summary.event_count,
12261            object_count: summary.object_count,
12262            case_count: summary.case_count,
12263            event_log: Some(event_log),
12264        })
12265    }
12266
12267    /// Inject anomalies into journal entries.
12268    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12269        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12270
12271        // Read anomaly rates from config instead of using hardcoded values.
12272        // Priority: anomaly_injection config > fraud config > default 0.02
12273        let total_rate = if self.config.anomaly_injection.enabled {
12274            self.config.anomaly_injection.rates.total_rate
12275        } else if self.config.fraud.enabled {
12276            self.config.fraud.fraud_rate
12277        } else {
12278            0.02
12279        };
12280
12281        let fraud_rate = if self.config.anomaly_injection.enabled {
12282            self.config.anomaly_injection.rates.fraud_rate
12283        } else {
12284            AnomalyRateConfig::default().fraud_rate
12285        };
12286
12287        let error_rate = if self.config.anomaly_injection.enabled {
12288            self.config.anomaly_injection.rates.error_rate
12289        } else {
12290            AnomalyRateConfig::default().error_rate
12291        };
12292
12293        let process_issue_rate = if self.config.anomaly_injection.enabled {
12294            self.config.anomaly_injection.rates.process_rate
12295        } else {
12296            AnomalyRateConfig::default().process_issue_rate
12297        };
12298
12299        let anomaly_config = AnomalyInjectorConfig {
12300            rates: AnomalyRateConfig {
12301                total_rate,
12302                fraud_rate,
12303                error_rate,
12304                process_issue_rate,
12305                ..Default::default()
12306            },
12307            seed: self.seed + 5000,
12308            ..Default::default()
12309        };
12310
12311        let mut injector = AnomalyInjector::new(anomaly_config);
12312        let result = injector.process_entries(entries);
12313
12314        // Central concentration abstraction (#143, Phase 1): run the post-process
12315        // pipeline AFTER per-entry strategies. The pipeline merges the SOTA-12
12316        // tagger + new passes (trading-partner pool, Phase-2 account substitution)
12317        // through a single integration point — see
12318        // docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md.
12319        //
12320        // Back-compat: the legacy `anomaly_injection.source_conditional_rarity_rate`
12321        // key remains honored. If `concentration.source_conditional_rarity` is also
12322        // set in the same config, the unified DSL field wins.
12323        let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12324            use datasynth_config::schema::{
12325                ConcentrationConfig, ConsolidationOutlierPassConfig,
12326                SourceConditionalRarityPassConfig,
12327            };
12328            use datasynth_generators::concentration::ConcentrationPipeline;
12329
12330            // Decide effective ConcentrationConfig: start from user config, then
12331            // back-fill from the legacy SOTA-12 key if the unified DSL didn't set it.
12332            let mut effective: ConcentrationConfig = self.config.concentration.clone();
12333            if effective.source_conditional_rarity.is_none() {
12334                if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12335                    effective.enabled = true;
12336                    effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12337                        rate,
12338                        min_surprise: None,
12339                        min_per_source_lines: None,
12340                    });
12341                }
12342            }
12343            // v5.30 B2 (#154) — back-compat: surface
12344            // `anomaly_injection.rates.consolidation_outlier_rate` as a
12345            // `ConsolidationOutlierPassConfig` if the unified DSL didn't
12346            // set one. Default 0.001 baseline shipped via the schema's
12347            // `default_consolidation_outlier_rate` — only synthesise the
12348            // pass when the rate is > 0, otherwise it's a no-op anyway.
12349            if effective.consolidation_outlier.is_none() {
12350                let rate = self
12351                    .config
12352                    .anomaly_injection
12353                    .rates
12354                    .consolidation_outlier_rate;
12355                if rate > 0.0 {
12356                    effective.enabled = true;
12357                    effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12358                        rate,
12359                        ..Default::default()
12360                    });
12361                }
12362            }
12363
12364            if !effective.enabled {
12365                (0, 0)
12366            } else {
12367                let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12368                    SynthError::generation(format!(
12369                        "ConcentrationPipeline construction failed: {e}"
12370                    ))
12371                })?;
12372                if !pipeline.is_active() {
12373                    (0, 0)
12374                } else {
12375                    // Per-pipeline seed disjoint from every other generator stream.
12376                    const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12377                    let stats =
12378                        pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12379                    let sota12: usize = stats
12380                        .iter()
12381                        .filter(|s| s.pass == "source_conditional_rarity")
12382                        .map(|s| s.entries_modified)
12383                        .sum();
12384                    let consol: usize = stats
12385                        .iter()
12386                        .filter(|s| s.pass == "consolidation_outlier")
12387                        .map(|s| s.entries_modified)
12388                        .sum();
12389                    (sota12, consol)
12390                }
12391            }
12392        };
12393
12394        if let Some(pb) = &pb {
12395            pb.inc(entries.len() as u64);
12396            pb.finish_with_message("Anomaly injection complete");
12397        }
12398
12399        let mut by_type = HashMap::new();
12400        for label in &result.labels {
12401            *by_type
12402                .entry(format!("{:?}", label.anomaly_type))
12403                .or_insert(0) += 1;
12404        }
12405        if sota12_tagged > 0 {
12406            *by_type
12407                .entry("SourceConditionalRarity".to_string())
12408                .or_insert(0) += sota12_tagged;
12409        }
12410        // v5.30 B2 (#154): record the consolidation-outlier expansion
12411        // count under a stable label key so the orchestrator's run
12412        // report surfaces the heavy-tail emission rate alongside the
12413        // other anomaly buckets.
12414        if consolidation_outlier_expanded > 0 {
12415            *by_type
12416                .entry("ConsolidationOutlier".to_string())
12417                .or_insert(0) += consolidation_outlier_expanded;
12418        }
12419
12420        Ok(AnomalyLabels {
12421            labels: result.labels,
12422            summary: Some(result.summary),
12423            by_type,
12424        })
12425    }
12426
12427    /// Validate journal entries using running balance tracker.
12428    ///
12429    /// Applies all entries to the balance tracker and validates:
12430    /// - Each entry is internally balanced (debits = credits)
12431    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
12432    ///
12433    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
12434    /// excluded from balance validation as they may be intentionally unbalanced.
12435    fn validate_journal_entries(
12436        &mut self,
12437        entries: &[JournalEntry],
12438    ) -> SynthResult<BalanceValidationResult> {
12439        // Filter out entries with human errors as they may be intentionally unbalanced
12440        let clean_entries: Vec<&JournalEntry> = entries
12441            .iter()
12442            .filter(|e| {
12443                e.header
12444                    .header_text
12445                    .as_ref()
12446                    .map(|t| !t.contains("[HUMAN_ERROR:"))
12447                    .unwrap_or(true)
12448            })
12449            .collect();
12450
12451        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12452
12453        // Configure tracker to not fail on errors (collect them instead)
12454        let config = BalanceTrackerConfig {
12455            validate_on_each_entry: false,   // We'll validate at the end
12456            track_history: false,            // Skip history for performance
12457            fail_on_validation_error: false, // Collect errors, don't fail
12458            ..Default::default()
12459        };
12460        let validation_currency = self
12461            .config
12462            .companies
12463            .first()
12464            .map(|c| c.currency.clone())
12465            .unwrap_or_else(|| "USD".to_string());
12466
12467        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12468
12469        // Apply clean entries (without human errors)
12470        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12471        let errors = tracker.apply_entries(&clean_refs);
12472
12473        if let Some(pb) = &pb {
12474            pb.inc(entries.len() as u64);
12475        }
12476
12477        // Check if any entries were unbalanced
12478        // Note: When fail_on_validation_error is false, errors are stored in tracker
12479        let has_unbalanced = tracker
12480            .get_validation_errors()
12481            .iter()
12482            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12483
12484        // Validate balance sheet for each company
12485        // Include both returned errors and collected validation errors
12486        let mut all_errors = errors;
12487        all_errors.extend(tracker.get_validation_errors().iter().cloned());
12488        let company_codes: Vec<String> = self
12489            .config
12490            .companies
12491            .iter()
12492            .map(|c| c.code.clone())
12493            .collect();
12494
12495        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12496            .map(|d| d + chrono::Months::new(self.config.global.period_months))
12497            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12498
12499        for company_code in &company_codes {
12500            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12501                all_errors.push(e);
12502            }
12503        }
12504
12505        // Get statistics after all mutable operations are done
12506        let stats = tracker.get_statistics();
12507
12508        // Determine if balanced overall
12509        let is_balanced = all_errors.is_empty();
12510
12511        if let Some(pb) = pb {
12512            let msg = if is_balanced {
12513                "Balance validation passed"
12514            } else {
12515                "Balance validation completed with errors"
12516            };
12517            pb.finish_with_message(msg);
12518        }
12519
12520        Ok(BalanceValidationResult {
12521            validated: true,
12522            is_balanced,
12523            entries_processed: stats.entries_processed,
12524            total_debits: stats.total_debits,
12525            total_credits: stats.total_credits,
12526            accounts_tracked: stats.accounts_tracked,
12527            companies_tracked: stats.companies_tracked,
12528            validation_errors: all_errors,
12529            has_unbalanced_entries: has_unbalanced,
12530        })
12531    }
12532
12533    /// Inject data quality variations into journal entries.
12534    ///
12535    /// Applies typos, missing values, and format variations to make
12536    /// the synthetic data more realistic for testing data cleaning pipelines.
12537    fn inject_data_quality(
12538        &mut self,
12539        entries: &mut [JournalEntry],
12540    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12541        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12542
12543        // Build config from user-specified schema settings when data_quality is enabled;
12544        // otherwise fall back to the low-rate minimal() preset.
12545        let config = if self.config.data_quality.enabled {
12546            let dq = &self.config.data_quality;
12547            // Propagate per-field rates and protected fields from the schema
12548            // so users can dial in real-production NULL profiles per field
12549            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
12550            let field_rates = dq.missing_values.field_rates.clone();
12551            let mut required_fields: std::collections::HashSet<String> =
12552                dq.missing_values.protected_fields.iter().cloned().collect();
12553            // Always preserve audit-critical identifiers regardless of
12554            // user config — losing these breaks downstream joins.
12555            for f in [
12556                "document_id",
12557                "company_code",
12558                "posting_date",
12559                "fiscal_year",
12560                "fiscal_period",
12561                "gl_account",
12562                "line_number",
12563                "transaction_id",
12564            ] {
12565                required_fields.insert(f.to_string());
12566            }
12567            DataQualityConfig {
12568                enable_missing_values: dq.missing_values.enabled,
12569                missing_values: datasynth_generators::MissingValueConfig {
12570                    global_rate: dq.effective_missing_rate(),
12571                    field_rates,
12572                    required_fields,
12573                    ..Default::default()
12574                },
12575                enable_format_variations: dq.format_variations.enabled,
12576                format_variations: datasynth_generators::FormatVariationConfig {
12577                    date_variation_rate: dq.format_variations.dates.rate,
12578                    amount_variation_rate: dq.format_variations.amounts.rate,
12579                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12580                    ..Default::default()
12581                },
12582                enable_duplicates: dq.duplicates.enabled,
12583                duplicates: datasynth_generators::DuplicateConfig {
12584                    duplicate_rate: dq.effective_duplicate_rate(),
12585                    ..Default::default()
12586                },
12587                enable_typos: dq.typos.enabled,
12588                typos: datasynth_generators::TypoConfig {
12589                    char_error_rate: dq.effective_typo_rate(),
12590                    ..Default::default()
12591                },
12592                enable_encoding_issues: dq.encoding_issues.enabled,
12593                encoding_issue_rate: dq.encoding_issues.rate,
12594                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12595                track_statistics: true,
12596            }
12597        } else {
12598            DataQualityConfig::minimal()
12599        };
12600        let mut injector = DataQualityInjector::new(config);
12601
12602        // Wire country pack for locale-aware format baselines
12603        injector.set_country_pack(self.primary_pack().clone());
12604
12605        // Build context for missing value decisions
12606        let context = HashMap::new();
12607
12608        for entry in entries.iter_mut() {
12609            // Process header_text field (common target for typos)
12610            if let Some(text) = &entry.header.header_text {
12611                let processed = injector.process_text_field(
12612                    "header_text",
12613                    text,
12614                    &entry.header.document_id.to_string(),
12615                    &context,
12616                );
12617                match processed {
12618                    Some(new_text) if new_text != *text => {
12619                        entry.header.header_text = Some(new_text);
12620                    }
12621                    None => {
12622                        entry.header.header_text = None; // Missing value
12623                    }
12624                    _ => {}
12625                }
12626            }
12627
12628            // Process reference field
12629            if let Some(ref_text) = &entry.header.reference {
12630                let processed = injector.process_text_field(
12631                    "reference",
12632                    ref_text,
12633                    &entry.header.document_id.to_string(),
12634                    &context,
12635                );
12636                match processed {
12637                    Some(new_text) if new_text != *ref_text => {
12638                        entry.header.reference = Some(new_text);
12639                    }
12640                    None => {
12641                        entry.header.reference = None;
12642                    }
12643                    _ => {}
12644                }
12645            }
12646
12647            // Process user_persona field (potential for typos in user IDs)
12648            let user_persona = entry.header.user_persona.clone();
12649            if let Some(processed) = injector.process_text_field(
12650                "user_persona",
12651                &user_persona,
12652                &entry.header.document_id.to_string(),
12653                &context,
12654            ) {
12655                if processed != user_persona {
12656                    entry.header.user_persona = processed;
12657                }
12658            }
12659
12660            // Process line items
12661            for line in &mut entry.lines {
12662                // Process line description if present
12663                if let Some(ref text) = line.line_text {
12664                    let processed = injector.process_text_field(
12665                        "line_text",
12666                        text,
12667                        &entry.header.document_id.to_string(),
12668                        &context,
12669                    );
12670                    match processed {
12671                        Some(new_text) if new_text != *text => {
12672                            line.line_text = Some(new_text);
12673                        }
12674                        None => {
12675                            line.line_text = None;
12676                        }
12677                        _ => {}
12678                    }
12679                }
12680
12681                // Process cost_center if present
12682                if let Some(cc) = &line.cost_center {
12683                    let processed = injector.process_text_field(
12684                        "cost_center",
12685                        cc,
12686                        &entry.header.document_id.to_string(),
12687                        &context,
12688                    );
12689                    match processed {
12690                        Some(new_cc) if new_cc != *cc => {
12691                            line.cost_center = Some(new_cc);
12692                        }
12693                        None => {
12694                            line.cost_center = None;
12695                        }
12696                        _ => {}
12697                    }
12698                }
12699
12700                // Extended field coverage (v5.6+): apply NULL injection to
12701                // every Option<String> on the line so users can match
12702                // arbitrary real-production NULL profiles via
12703                // `data_quality.missing_values.field_rates`.
12704                //
12705                // Macro-free helper: process_field returns the new value
12706                // ({Some, None, unchanged}) and we apply it back.
12707                macro_rules! process_opt_field {
12708                    ($field_name:expr, $opt:expr) => {
12709                        if let Some(val) = $opt.as_ref() {
12710                            match injector.process_text_field(
12711                                $field_name,
12712                                val,
12713                                &entry.header.document_id.to_string(),
12714                                &context,
12715                            ) {
12716                                Some(new_val) if new_val != *val => {
12717                                    *$opt = Some(new_val);
12718                                }
12719                                None => {
12720                                    *$opt = None;
12721                                }
12722                                _ => {}
12723                            }
12724                        }
12725                    };
12726                }
12727
12728                process_opt_field!("profit_center", &mut line.profit_center);
12729                process_opt_field!("assignment", &mut line.assignment);
12730                process_opt_field!("tax_code", &mut line.tax_code);
12731                process_opt_field!("account_description", &mut line.account_description);
12732                process_opt_field!(
12733                    "auxiliary_account_number",
12734                    &mut line.auxiliary_account_number
12735                );
12736                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12737                process_opt_field!("lettrage", &mut line.lettrage);
12738            }
12739
12740            if let Some(pb) = &pb {
12741                pb.inc(1);
12742            }
12743        }
12744
12745        if let Some(pb) = pb {
12746            pb.finish_with_message("Data quality injection complete");
12747        }
12748
12749        let quality_issues = injector.issues().to_vec();
12750        Ok((injector.stats().clone(), quality_issues))
12751    }
12752
12753    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12754    ///
12755    /// Creates complete audit documentation for each company in the configuration,
12756    /// following ISA standards:
12757    /// - ISA 210/220: Engagement acceptance and terms
12758    /// - ISA 230: Audit documentation (workpapers)
12759    /// - ISA 265: Control deficiencies (findings)
12760    /// - ISA 315/330: Risk assessment and response
12761    /// - ISA 500: Audit evidence
12762    /// - ISA 200: Professional judgment
12763    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12764        // Check if FSM-driven audit generation is enabled
12765        let use_fsm = self
12766            .config
12767            .audit
12768            .fsm
12769            .as_ref()
12770            .map(|f| f.enabled)
12771            .unwrap_or(false);
12772
12773        if use_fsm {
12774            return self.generate_audit_data_with_fsm(entries);
12775        }
12776
12777        // --- Legacy (non-FSM) audit generation follows ---
12778        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12779            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12780        let fiscal_year = start_date.year() as u16;
12781        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12782
12783        // Calculate rough total revenue from entries for materiality
12784        let total_revenue: rust_decimal::Decimal = entries
12785            .iter()
12786            .flat_map(|e| e.lines.iter())
12787            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12788            .map(|l| l.credit_amount)
12789            .sum();
12790
12791        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12792        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12793
12794        let mut snapshot = AuditSnapshot::default();
12795
12796        // Initialize generators
12797        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12798        // v3.3.2: thread the user-facing audit schema config into the
12799        // engagement generator (team size range).
12800        engagement_gen.set_team_config(&self.config.audit.team);
12801
12802        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12803        // v3.3.2: thread workpaper + review workflow schema config into
12804        // the workpaper generator (per-section count range + review
12805        // delay ranges).
12806        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12807        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12808        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12809        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12810        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12811        finding_gen.set_template_provider(self.template_provider.clone());
12812        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12813        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12814        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12815        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12816        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12817        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12818        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12819
12820        // Get list of accounts from CoA for risk assessment
12821        let accounts: Vec<String> = self
12822            .coa
12823            .as_ref()
12824            .map(|coa| {
12825                coa.get_postable_accounts()
12826                    .iter()
12827                    .map(|acc| acc.account_code().to_string())
12828                    .collect()
12829            })
12830            .unwrap_or_default();
12831
12832        // Generate engagements for each company
12833        for (i, company) in self.config.companies.iter().enumerate() {
12834            // Calculate company-specific revenue (proportional to volume weight)
12835            let company_revenue = total_revenue
12836                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12837
12838            // Generate engagements for this company
12839            let engagements_for_company =
12840                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12841            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12842                1
12843            } else {
12844                0
12845            };
12846
12847            for _eng_idx in 0..(engagements_for_company + extra) {
12848                // v3.3.2: draw engagement type from the user-configured
12849                // distribution instead of always using the default
12850                // (AnnualAudit). Falls back to the default when all
12851                // probabilities are zero.
12852                let eng_type =
12853                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12854
12855                // Generate the engagement
12856                let mut engagement = engagement_gen.generate_engagement(
12857                    &company.code,
12858                    &company.name,
12859                    fiscal_year,
12860                    period_end,
12861                    company_revenue,
12862                    Some(eng_type),
12863                );
12864
12865                // Replace synthetic team IDs with real employee IDs from master data
12866                if !self.master_data.employees.is_empty() {
12867                    let emp_count = self.master_data.employees.len();
12868                    // Use employee IDs deterministically based on engagement index
12869                    let base = (i * 10 + _eng_idx) % emp_count;
12870                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12871                        .employee_id
12872                        .clone();
12873                    engagement.engagement_manager_id = self.master_data.employees
12874                        [(base + 1) % emp_count]
12875                        .employee_id
12876                        .clone();
12877                    let real_team: Vec<String> = engagement
12878                        .team_member_ids
12879                        .iter()
12880                        .enumerate()
12881                        .map(|(j, _)| {
12882                            self.master_data.employees[(base + 2 + j) % emp_count]
12883                                .employee_id
12884                                .clone()
12885                        })
12886                        .collect();
12887                    engagement.team_member_ids = real_team;
12888                }
12889
12890                if let Some(pb) = &pb {
12891                    pb.inc(1);
12892                }
12893
12894                // Get team members from the engagement
12895                let team_members: Vec<String> = engagement.team_member_ids.clone();
12896
12897                // Generate workpapers for the engagement.
12898                // v3.3.2: honor `audit.generate_workpapers` — when false,
12899                // workpapers (and dependent evidence) are skipped while
12900                // the engagement itself, risk assessments, findings, etc.
12901                // still generate normally.
12902                let workpapers = if self.config.audit.generate_workpapers {
12903                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12904                } else {
12905                    Vec::new()
12906                };
12907
12908                for wp in &workpapers {
12909                    if let Some(pb) = &pb {
12910                        pb.inc(1);
12911                    }
12912
12913                    // Generate evidence for each workpaper
12914                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12915                        wp,
12916                        &team_members,
12917                        wp.preparer_date,
12918                    );
12919
12920                    for _ in &evidence {
12921                        if let Some(pb) = &pb {
12922                            pb.inc(1);
12923                        }
12924                    }
12925
12926                    snapshot.evidence.extend(evidence);
12927                }
12928
12929                // Generate risk assessments for the engagement
12930                let risks =
12931                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12932
12933                for _ in &risks {
12934                    if let Some(pb) = &pb {
12935                        pb.inc(1);
12936                    }
12937                }
12938                snapshot.risk_assessments.extend(risks);
12939
12940                // Generate findings for the engagement
12941                let findings = finding_gen.generate_findings_for_engagement(
12942                    &engagement,
12943                    &workpapers,
12944                    &team_members,
12945                );
12946
12947                for _ in &findings {
12948                    if let Some(pb) = &pb {
12949                        pb.inc(1);
12950                    }
12951                }
12952                snapshot.findings.extend(findings);
12953
12954                // Generate professional judgments for the engagement
12955                let judgments =
12956                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12957
12958                for _ in &judgments {
12959                    if let Some(pb) = &pb {
12960                        pb.inc(1);
12961                    }
12962                }
12963                snapshot.judgments.extend(judgments);
12964
12965                // ISA 505: External confirmations and responses
12966                let (confs, resps) =
12967                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12968                snapshot.confirmations.extend(confs);
12969                snapshot.confirmation_responses.extend(resps);
12970
12971                // ISA 330: Procedure steps per workpaper
12972                let team_pairs: Vec<(String, String)> = team_members
12973                    .iter()
12974                    .map(|id| {
12975                        let name = self
12976                            .master_data
12977                            .employees
12978                            .iter()
12979                            .find(|e| e.employee_id == *id)
12980                            .map(|e| e.display_name.clone())
12981                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12982                        (id.clone(), name)
12983                    })
12984                    .collect();
12985                for wp in &workpapers {
12986                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12987                    snapshot.procedure_steps.extend(steps);
12988                }
12989
12990                // ISA 530: Samples per workpaper
12991                for wp in &workpapers {
12992                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12993                        snapshot.samples.push(sample);
12994                    }
12995                }
12996
12997                // ISA 520: Analytical procedures
12998                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12999                snapshot.analytical_results.extend(analytical);
13000
13001                // ISA 610: Internal audit function and reports
13002                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13003                snapshot.ia_functions.push(ia_func);
13004                snapshot.ia_reports.extend(ia_reports);
13005
13006                // ISA 550: Related parties and transactions
13007                let vendor_names: Vec<String> = self
13008                    .master_data
13009                    .vendors
13010                    .iter()
13011                    .map(|v| v.name.clone())
13012                    .collect();
13013                let customer_names: Vec<String> = self
13014                    .master_data
13015                    .customers
13016                    .iter()
13017                    .map(|c| c.name.clone())
13018                    .collect();
13019                let (parties, rp_txns) =
13020                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13021                snapshot.related_parties.extend(parties);
13022                snapshot.related_party_transactions.extend(rp_txns);
13023
13024                // Add workpapers after findings since findings need them
13025                snapshot.workpapers.extend(workpapers);
13026
13027                // Generate audit scope record for this engagement (one per engagement)
13028                {
13029                    let scope_id = format!(
13030                        "SCOPE-{}-{}",
13031                        engagement.engagement_id.simple(),
13032                        &engagement.client_entity_id
13033                    );
13034                    let scope = datasynth_core::models::audit::AuditScope::new(
13035                        scope_id.clone(),
13036                        engagement.engagement_id.to_string(),
13037                        engagement.client_entity_id.clone(),
13038                        engagement.materiality,
13039                    );
13040                    // Wire scope_id back to engagement
13041                    let mut eng = engagement;
13042                    eng.scope_id = Some(scope_id);
13043                    snapshot.audit_scopes.push(scope);
13044                    snapshot.engagements.push(eng);
13045                }
13046            }
13047        }
13048
13049        // ----------------------------------------------------------------
13050        // ISA 600: Group audit — component auditors, plan, instructions, reports
13051        // ----------------------------------------------------------------
13052        if self.config.companies.len() > 1 {
13053            // Use materiality from the first engagement if available, otherwise
13054            // derive a reasonable figure from total revenue.
13055            let group_materiality = snapshot
13056                .engagements
13057                .first()
13058                .map(|e| e.materiality)
13059                .unwrap_or_else(|| {
13060                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13061                    total_revenue * pct
13062                });
13063
13064            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13065            let group_engagement_id = snapshot
13066                .engagements
13067                .first()
13068                .map(|e| e.engagement_id.to_string())
13069                .unwrap_or_else(|| "GROUP-ENG".to_string());
13070
13071            let component_snapshot = component_gen.generate(
13072                &self.config.companies,
13073                group_materiality,
13074                &group_engagement_id,
13075                period_end,
13076            );
13077
13078            snapshot.component_auditors = component_snapshot.component_auditors;
13079            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13080            snapshot.component_instructions = component_snapshot.component_instructions;
13081            snapshot.component_reports = component_snapshot.component_reports;
13082
13083            info!(
13084                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13085                snapshot.component_auditors.len(),
13086                snapshot.component_instructions.len(),
13087                snapshot.component_reports.len(),
13088            );
13089        }
13090
13091        // ----------------------------------------------------------------
13092        // ISA 210: Engagement letters — one per engagement
13093        // ----------------------------------------------------------------
13094        {
13095            let applicable_framework = self
13096                .config
13097                .accounting_standards
13098                .framework
13099                .as_ref()
13100                .map(|f| format!("{f:?}"))
13101                .unwrap_or_else(|| "IFRS".to_string());
13102
13103            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13104            let entity_count = self.config.companies.len();
13105
13106            for engagement in &snapshot.engagements {
13107                let company = self
13108                    .config
13109                    .companies
13110                    .iter()
13111                    .find(|c| c.code == engagement.client_entity_id);
13112                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13113                let letter_date = engagement.planning_start;
13114                let letter = letter_gen.generate(
13115                    &engagement.engagement_id.to_string(),
13116                    &engagement.client_name,
13117                    entity_count,
13118                    engagement.period_end_date,
13119                    currency,
13120                    &applicable_framework,
13121                    letter_date,
13122                );
13123                snapshot.engagement_letters.push(letter);
13124            }
13125
13126            info!(
13127                "ISA 210 engagement letters: {} generated",
13128                snapshot.engagement_letters.len()
13129            );
13130        }
13131
13132        // ----------------------------------------------------------------
13133        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
13134        // ----------------------------------------------------------------
13135        if self.phase_config.generate_legal_documents {
13136            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13137            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13138            for engagement in &snapshot.engagements {
13139                // Build an employee name list for signatory drawing —
13140                // prefer employees from the engaged entity, fall back to
13141                // all employees.
13142                let employee_names: Vec<String> = self
13143                    .master_data
13144                    .employees
13145                    .iter()
13146                    .filter(|e| e.company_code == engagement.client_entity_id)
13147                    .map(|e| e.display_name.clone())
13148                    .collect();
13149                let names_to_use = if !employee_names.is_empty() {
13150                    employee_names
13151                } else {
13152                    self.master_data
13153                        .employees
13154                        .iter()
13155                        .take(10)
13156                        .map(|e| e.display_name.clone())
13157                        .collect()
13158                };
13159                let docs = legal_gen.generate(
13160                    &engagement.client_entity_id,
13161                    engagement.fiscal_year as i32,
13162                    &names_to_use,
13163                );
13164                snapshot.legal_documents.extend(docs);
13165            }
13166            info!(
13167                "v3.3.0 legal documents: {} emitted across {} engagements",
13168                snapshot.legal_documents.len(),
13169                snapshot.engagements.len()
13170            );
13171        }
13172
13173        // ----------------------------------------------------------------
13174        // v3.3.0: IT general controls — access logs + change records
13175        //
13176        // `ItControlsGenerator` runs one pass per company (not per
13177        // engagement) so employee sets and system catalogs stay
13178        // coherent. We derive the period from the earliest engagement's
13179        // planning_start through the latest engagement's period_end_date
13180        // for each company.
13181        // ----------------------------------------------------------------
13182        if self.phase_config.generate_it_controls {
13183            use datasynth_generators::it_controls_generator::ItControlsGenerator;
13184            use std::collections::HashMap;
13185            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13186
13187            // Group engagements by company to produce one IT-controls
13188            // window per entity.
13189            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13190                HashMap::new();
13191            for engagement in &snapshot.engagements {
13192                let entry = by_company
13193                    .entry(engagement.client_entity_id.clone())
13194                    .or_insert((engagement.planning_start, engagement.period_end_date));
13195                if engagement.planning_start < entry.0 {
13196                    entry.0 = engagement.planning_start;
13197                }
13198                if engagement.period_end_date > entry.1 {
13199                    entry.1 = engagement.period_end_date;
13200                }
13201            }
13202
13203            // Standard system catalog — populated from known ERP / app
13204            // names. Keeps the generator's data shape stable when the
13205            // user hasn't configured IT-system naming separately.
13206            let systems: Vec<String> = vec![
13207                "SAP ECC",
13208                "SAP S/4 HANA",
13209                "Oracle EBS",
13210                "Workday",
13211                "NetSuite",
13212                "Active Directory",
13213                "SharePoint",
13214                "Salesforce",
13215                "ServiceNow",
13216                "Jira",
13217                "GitHub Enterprise",
13218                "AWS Console",
13219                "Okta",
13220            ]
13221            .into_iter()
13222            .map(String::from)
13223            .collect();
13224
13225            for (company_code, (start, end)) in by_company {
13226                let emps: Vec<(String, String)> = self
13227                    .master_data
13228                    .employees
13229                    .iter()
13230                    .filter(|e| e.company_code == company_code)
13231                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13232                    .collect();
13233                if emps.is_empty() {
13234                    continue;
13235                }
13236                // Compute period in months, rounded up to the nearest
13237                // whole month (min 1).
13238                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13239                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13240                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13241                snapshot.it_controls_access_logs.extend(access_logs);
13242                snapshot.it_controls_change_records.extend(change_records);
13243            }
13244
13245            info!(
13246                "v3.3.0 IT controls: {} access logs, {} change records",
13247                snapshot.it_controls_access_logs.len(),
13248                snapshot.it_controls_change_records.len()
13249            );
13250        }
13251
13252        // ----------------------------------------------------------------
13253        // ISA 560 / IAS 10: Subsequent events
13254        // ----------------------------------------------------------------
13255        {
13256            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13257            let entity_codes: Vec<String> = self
13258                .config
13259                .companies
13260                .iter()
13261                .map(|c| c.code.clone())
13262                .collect();
13263            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13264            info!(
13265                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13266                subsequent.len(),
13267                subsequent
13268                    .iter()
13269                    .filter(|e| matches!(
13270                        e.classification,
13271                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13272                    ))
13273                    .count(),
13274                subsequent
13275                    .iter()
13276                    .filter(|e| matches!(
13277                        e.classification,
13278                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13279                    ))
13280                    .count(),
13281            );
13282            snapshot.subsequent_events = subsequent;
13283        }
13284
13285        // ----------------------------------------------------------------
13286        // ISA 402: Service organization controls
13287        // ----------------------------------------------------------------
13288        {
13289            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13290            let entity_codes: Vec<String> = self
13291                .config
13292                .companies
13293                .iter()
13294                .map(|c| c.code.clone())
13295                .collect();
13296            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13297            info!(
13298                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13299                soc_snapshot.service_organizations.len(),
13300                soc_snapshot.soc_reports.len(),
13301                soc_snapshot.user_entity_controls.len(),
13302            );
13303            snapshot.service_organizations = soc_snapshot.service_organizations;
13304            snapshot.soc_reports = soc_snapshot.soc_reports;
13305            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13306        }
13307
13308        // ----------------------------------------------------------------
13309        // ISA 570: Going concern assessments
13310        // ----------------------------------------------------------------
13311        {
13312            use datasynth_generators::audit::going_concern_generator::{
13313                GoingConcernGenerator, GoingConcernInput,
13314            };
13315            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13316            let entity_codes: Vec<String> = self
13317                .config
13318                .companies
13319                .iter()
13320                .map(|c| c.code.clone())
13321                .collect();
13322            // Assessment date = period end + 75 days (typical sign-off window).
13323            let assessment_date = period_end + chrono::Duration::days(75);
13324            let period_label = format!("FY{}", period_end.year());
13325
13326            // Build financial inputs from actual journal entries.
13327            //
13328            // We derive approximate P&L, working capital, and operating cash flow
13329            // by aggregating GL account balances from the journal entry population.
13330            // Account ranges used (standard chart):
13331            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
13332            //   Expenses:        6xxx (debit-normal)
13333            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
13334            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
13335            //   Operating CF:    net income adjusted for D&A (rough proxy)
13336            let gc_inputs: Vec<GoingConcernInput> = self
13337                .config
13338                .companies
13339                .iter()
13340                .map(|company| {
13341                    let code = &company.code;
13342                    let mut revenue = rust_decimal::Decimal::ZERO;
13343                    let mut expenses = rust_decimal::Decimal::ZERO;
13344                    let mut current_assets = rust_decimal::Decimal::ZERO;
13345                    let mut current_liabs = rust_decimal::Decimal::ZERO;
13346                    let mut total_debt = rust_decimal::Decimal::ZERO;
13347
13348                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
13349                        for line in &je.lines {
13350                            let acct = line.gl_account.as_str();
13351                            let net = line.debit_amount - line.credit_amount;
13352                            if acct.starts_with('4') {
13353                                // Revenue accounts: credit-normal, so negative net = revenue earned
13354                                revenue -= net;
13355                            } else if acct.starts_with('6') {
13356                                // Expense accounts: debit-normal
13357                                expenses += net;
13358                            }
13359                            // Balance sheet accounts for working capital
13360                            if acct.starts_with('1') {
13361                                // Current asset accounts (1000–1499)
13362                                if let Ok(n) = acct.parse::<u32>() {
13363                                    if (1000..=1499).contains(&n) {
13364                                        current_assets += net;
13365                                    }
13366                                }
13367                            } else if acct.starts_with('2') {
13368                                if let Ok(n) = acct.parse::<u32>() {
13369                                    if (2000..=2499).contains(&n) {
13370                                        // Current liabilities
13371                                        current_liabs -= net; // credit-normal
13372                                    } else if (2500..=2999).contains(&n) {
13373                                        // Long-term debt
13374                                        total_debt -= net;
13375                                    }
13376                                }
13377                            }
13378                        }
13379                    }
13380
13381                    let net_income = revenue - expenses;
13382                    let working_capital = current_assets - current_liabs;
13383                    // Rough operating CF proxy: net income (full accrual CF calculation
13384                    // is done separately in the cash flow statement generator)
13385                    let operating_cash_flow = net_income;
13386
13387                    GoingConcernInput {
13388                        entity_code: code.clone(),
13389                        net_income,
13390                        working_capital,
13391                        operating_cash_flow,
13392                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13393                        assessment_date,
13394                    }
13395                })
13396                .collect();
13397
13398            let assessments = if gc_inputs.is_empty() {
13399                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13400            } else {
13401                gc_gen.generate_for_entities_with_inputs(
13402                    &entity_codes,
13403                    &gc_inputs,
13404                    assessment_date,
13405                    &period_label,
13406                )
13407            };
13408            info!(
13409                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13410                assessments.len(),
13411                assessments.iter().filter(|a| matches!(
13412                    a.auditor_conclusion,
13413                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13414                )).count(),
13415                assessments.iter().filter(|a| matches!(
13416                    a.auditor_conclusion,
13417                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13418                )).count(),
13419                assessments.iter().filter(|a| matches!(
13420                    a.auditor_conclusion,
13421                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13422                )).count(),
13423            );
13424            snapshot.going_concern_assessments = assessments;
13425        }
13426
13427        // ----------------------------------------------------------------
13428        // ISA 540: Accounting estimates
13429        // ----------------------------------------------------------------
13430        {
13431            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13432            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13433            let entity_codes: Vec<String> = self
13434                .config
13435                .companies
13436                .iter()
13437                .map(|c| c.code.clone())
13438                .collect();
13439            let estimates = est_gen.generate_for_entities(&entity_codes);
13440            info!(
13441                "ISA 540 accounting estimates: {} estimates across {} entities \
13442                 ({} with retrospective reviews, {} with auditor point estimates)",
13443                estimates.len(),
13444                entity_codes.len(),
13445                estimates
13446                    .iter()
13447                    .filter(|e| e.retrospective_review.is_some())
13448                    .count(),
13449                estimates
13450                    .iter()
13451                    .filter(|e| e.auditor_point_estimate.is_some())
13452                    .count(),
13453            );
13454            snapshot.accounting_estimates = estimates;
13455        }
13456
13457        // ----------------------------------------------------------------
13458        // ISA 700/701/705/706: Audit opinions (one per engagement)
13459        // ----------------------------------------------------------------
13460        {
13461            use datasynth_generators::audit::audit_opinion_generator::{
13462                AuditOpinionGenerator, AuditOpinionInput,
13463            };
13464
13465            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13466
13467            // Build inputs — one per engagement, linking findings and going concern.
13468            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13469                .engagements
13470                .iter()
13471                .map(|eng| {
13472                    // Collect findings for this engagement.
13473                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13474                        .findings
13475                        .iter()
13476                        .filter(|f| f.engagement_id == eng.engagement_id)
13477                        .cloned()
13478                        .collect();
13479
13480                    // Going concern for this entity.
13481                    let gc = snapshot
13482                        .going_concern_assessments
13483                        .iter()
13484                        .find(|g| g.entity_code == eng.client_entity_id)
13485                        .cloned();
13486
13487                    // Component reports relevant to this engagement.
13488                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13489                        snapshot.component_reports.clone();
13490
13491                    let auditor = self
13492                        .master_data
13493                        .employees
13494                        .first()
13495                        .map(|e| e.display_name.clone())
13496                        .unwrap_or_else(|| "Global Audit LLP".into());
13497
13498                    let partner = self
13499                        .master_data
13500                        .employees
13501                        .get(1)
13502                        .map(|e| e.display_name.clone())
13503                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
13504
13505                    AuditOpinionInput {
13506                        entity_code: eng.client_entity_id.clone(),
13507                        entity_name: eng.client_name.clone(),
13508                        engagement_id: eng.engagement_id,
13509                        period_end: eng.period_end_date,
13510                        findings: eng_findings,
13511                        going_concern: gc,
13512                        component_reports: comp_reports,
13513                        // Mark as US-listed when audit standards include PCAOB.
13514                        is_us_listed: {
13515                            let fw = &self.config.audit_standards.isa_compliance.framework;
13516                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13517                        },
13518                        auditor_name: auditor,
13519                        engagement_partner: partner,
13520                    }
13521                })
13522                .collect();
13523
13524            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13525
13526            for go in &generated_opinions {
13527                snapshot
13528                    .key_audit_matters
13529                    .extend(go.key_audit_matters.clone());
13530            }
13531            snapshot.audit_opinions = generated_opinions
13532                .into_iter()
13533                .map(|go| go.opinion)
13534                .collect();
13535
13536            info!(
13537                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13538                snapshot.audit_opinions.len(),
13539                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13540                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13541                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13542                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13543            );
13544        }
13545
13546        // ----------------------------------------------------------------
13547        // SOX 302 / 404 assessments
13548        // ----------------------------------------------------------------
13549        {
13550            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13551
13552            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13553
13554            for (i, company) in self.config.companies.iter().enumerate() {
13555                // Collect findings for this company's engagements.
13556                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13557                    .engagements
13558                    .iter()
13559                    .filter(|e| e.client_entity_id == company.code)
13560                    .map(|e| e.engagement_id)
13561                    .collect();
13562
13563                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13564                    .findings
13565                    .iter()
13566                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13567                    .cloned()
13568                    .collect();
13569
13570                // Derive executive names from employee list.
13571                let emp_count = self.master_data.employees.len();
13572                let ceo_name = if emp_count > 0 {
13573                    self.master_data.employees[i % emp_count]
13574                        .display_name
13575                        .clone()
13576                } else {
13577                    format!("CEO of {}", company.name)
13578                };
13579                let cfo_name = if emp_count > 1 {
13580                    self.master_data.employees[(i + 1) % emp_count]
13581                        .display_name
13582                        .clone()
13583                } else {
13584                    format!("CFO of {}", company.name)
13585                };
13586
13587                // Use engagement materiality if available.
13588                let materiality = snapshot
13589                    .engagements
13590                    .iter()
13591                    .find(|e| e.client_entity_id == company.code)
13592                    .map(|e| e.materiality)
13593                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13594
13595                let input = SoxGeneratorInput {
13596                    company_code: company.code.clone(),
13597                    company_name: company.name.clone(),
13598                    fiscal_year,
13599                    period_end,
13600                    findings: company_findings,
13601                    ceo_name,
13602                    cfo_name,
13603                    materiality_threshold: materiality,
13604                    revenue_percent: rust_decimal::Decimal::from(100),
13605                    assets_percent: rust_decimal::Decimal::from(100),
13606                    significant_accounts: vec![
13607                        "Revenue".into(),
13608                        "Accounts Receivable".into(),
13609                        "Inventory".into(),
13610                        "Fixed Assets".into(),
13611                        "Accounts Payable".into(),
13612                    ],
13613                };
13614
13615                let (certs, assessment) = sox_gen.generate(&input);
13616                snapshot.sox_302_certifications.extend(certs);
13617                snapshot.sox_404_assessments.push(assessment);
13618            }
13619
13620            info!(
13621                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13622                snapshot.sox_302_certifications.len(),
13623                snapshot.sox_404_assessments.len(),
13624                snapshot
13625                    .sox_404_assessments
13626                    .iter()
13627                    .filter(|a| a.icfr_effective)
13628                    .count(),
13629                snapshot
13630                    .sox_404_assessments
13631                    .iter()
13632                    .filter(|a| !a.icfr_effective)
13633                    .count(),
13634            );
13635        }
13636
13637        // ----------------------------------------------------------------
13638        // ISA 320: Materiality calculations (one per entity)
13639        // ----------------------------------------------------------------
13640        {
13641            use datasynth_generators::audit::materiality_generator::{
13642                MaterialityGenerator, MaterialityInput,
13643            };
13644
13645            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13646
13647            // Compute per-company financials from JEs.
13648            // Asset accounts start with '1', revenue with '4',
13649            // expense accounts with '5' or '6'.
13650            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13651
13652            for company in &self.config.companies {
13653                let company_code = company.code.clone();
13654
13655                // Revenue: credit-side entries on 4xxx accounts
13656                let company_revenue: rust_decimal::Decimal = entries
13657                    .iter()
13658                    .filter(|e| e.company_code() == company_code)
13659                    .flat_map(|e| e.lines.iter())
13660                    .filter(|l| l.account_code.starts_with('4'))
13661                    .map(|l| l.credit_amount)
13662                    .sum();
13663
13664                // Total assets: debit balances on 1xxx accounts
13665                let total_assets: rust_decimal::Decimal = entries
13666                    .iter()
13667                    .filter(|e| e.company_code() == company_code)
13668                    .flat_map(|e| e.lines.iter())
13669                    .filter(|l| l.account_code.starts_with('1'))
13670                    .map(|l| l.debit_amount)
13671                    .sum();
13672
13673                // Expenses: debit-side entries on 5xxx/6xxx accounts
13674                let total_expenses: rust_decimal::Decimal = entries
13675                    .iter()
13676                    .filter(|e| e.company_code() == company_code)
13677                    .flat_map(|e| e.lines.iter())
13678                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13679                    .map(|l| l.debit_amount)
13680                    .sum();
13681
13682                // Equity: credit balances on 3xxx accounts
13683                let equity: rust_decimal::Decimal = entries
13684                    .iter()
13685                    .filter(|e| e.company_code() == company_code)
13686                    .flat_map(|e| e.lines.iter())
13687                    .filter(|l| l.account_code.starts_with('3'))
13688                    .map(|l| l.credit_amount)
13689                    .sum();
13690
13691                let pretax_income = company_revenue - total_expenses;
13692
13693                // If no company-specific data, fall back to proportional share
13694                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13695                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13696                        .unwrap_or(rust_decimal::Decimal::ONE);
13697                    (
13698                        total_revenue * w,
13699                        total_revenue * w * rust_decimal::Decimal::from(3),
13700                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13701                        total_revenue * w * rust_decimal::Decimal::from(2),
13702                    )
13703                } else {
13704                    (company_revenue, total_assets, pretax_income, equity)
13705                };
13706
13707                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13708
13709                materiality_inputs.push(MaterialityInput {
13710                    entity_code: company_code,
13711                    period: format!("FY{}", fiscal_year),
13712                    revenue: rev,
13713                    pretax_income: pti,
13714                    total_assets: assets,
13715                    equity: eq,
13716                    gross_profit,
13717                });
13718            }
13719
13720            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13721
13722            info!(
13723                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13724                 {} total assets, {} equity benchmarks)",
13725                snapshot.materiality_calculations.len(),
13726                snapshot
13727                    .materiality_calculations
13728                    .iter()
13729                    .filter(|m| matches!(
13730                        m.benchmark,
13731                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13732                    ))
13733                    .count(),
13734                snapshot
13735                    .materiality_calculations
13736                    .iter()
13737                    .filter(|m| matches!(
13738                        m.benchmark,
13739                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13740                    ))
13741                    .count(),
13742                snapshot
13743                    .materiality_calculations
13744                    .iter()
13745                    .filter(|m| matches!(
13746                        m.benchmark,
13747                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13748                    ))
13749                    .count(),
13750                snapshot
13751                    .materiality_calculations
13752                    .iter()
13753                    .filter(|m| matches!(
13754                        m.benchmark,
13755                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13756                    ))
13757                    .count(),
13758            );
13759        }
13760
13761        // ----------------------------------------------------------------
13762        // ISA 315: Combined Risk Assessments (per entity, per account area)
13763        // ----------------------------------------------------------------
13764        {
13765            use datasynth_generators::audit::cra_generator::CraGenerator;
13766
13767            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13768
13769            // Build entity → scope_id map from already-generated scopes
13770            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13771                .audit_scopes
13772                .iter()
13773                .map(|s| (s.entity_code.clone(), s.id.clone()))
13774                .collect();
13775
13776            for company in &self.config.companies {
13777                let cras = cra_gen.generate_for_entity(&company.code, None);
13778                let scope_id = entity_scope_map.get(&company.code).cloned();
13779                let cras_with_scope: Vec<_> = cras
13780                    .into_iter()
13781                    .map(|mut cra| {
13782                        cra.scope_id = scope_id.clone();
13783                        cra
13784                    })
13785                    .collect();
13786                snapshot.combined_risk_assessments.extend(cras_with_scope);
13787            }
13788
13789            let significant_count = snapshot
13790                .combined_risk_assessments
13791                .iter()
13792                .filter(|c| c.significant_risk)
13793                .count();
13794            let high_cra_count = snapshot
13795                .combined_risk_assessments
13796                .iter()
13797                .filter(|c| {
13798                    matches!(
13799                        c.combined_risk,
13800                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13801                    )
13802                })
13803                .count();
13804
13805            info!(
13806                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13807                snapshot.combined_risk_assessments.len(),
13808                significant_count,
13809                high_cra_count,
13810            );
13811        }
13812
13813        // ----------------------------------------------------------------
13814        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13815        // ----------------------------------------------------------------
13816        {
13817            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13818
13819            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13820
13821            // Group CRAs by entity and use per-entity tolerable error from materiality
13822            for company in &self.config.companies {
13823                let entity_code = company.code.clone();
13824
13825                // Find tolerable error for this entity (= performance materiality)
13826                let tolerable_error = snapshot
13827                    .materiality_calculations
13828                    .iter()
13829                    .find(|m| m.entity_code == entity_code)
13830                    .map(|m| m.tolerable_error);
13831
13832                // Collect CRAs for this entity
13833                let entity_cras: Vec<_> = snapshot
13834                    .combined_risk_assessments
13835                    .iter()
13836                    .filter(|c| c.entity_code == entity_code)
13837                    .cloned()
13838                    .collect();
13839
13840                if !entity_cras.is_empty() {
13841                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13842                    snapshot.sampling_plans.extend(plans);
13843                    snapshot.sampled_items.extend(items);
13844                }
13845            }
13846
13847            let misstatement_count = snapshot
13848                .sampled_items
13849                .iter()
13850                .filter(|i| i.misstatement_found)
13851                .count();
13852
13853            info!(
13854                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13855                snapshot.sampling_plans.len(),
13856                snapshot.sampled_items.len(),
13857                misstatement_count,
13858            );
13859        }
13860
13861        // ----------------------------------------------------------------
13862        // ISA 315: Significant Classes of Transactions (SCOTS)
13863        // ----------------------------------------------------------------
13864        {
13865            use datasynth_generators::audit::scots_generator::{
13866                ScotsGenerator, ScotsGeneratorConfig,
13867            };
13868
13869            let ic_enabled = self.config.intercompany.enabled;
13870
13871            let config = ScotsGeneratorConfig {
13872                intercompany_enabled: ic_enabled,
13873                ..ScotsGeneratorConfig::default()
13874            };
13875            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13876
13877            for company in &self.config.companies {
13878                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13879                snapshot
13880                    .significant_transaction_classes
13881                    .extend(entity_scots);
13882            }
13883
13884            let estimation_count = snapshot
13885                .significant_transaction_classes
13886                .iter()
13887                .filter(|s| {
13888                    matches!(
13889                        s.transaction_type,
13890                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13891                    )
13892                })
13893                .count();
13894
13895            info!(
13896                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13897                snapshot.significant_transaction_classes.len(),
13898                estimation_count,
13899            );
13900        }
13901
13902        // ----------------------------------------------------------------
13903        // ISA 520: Unusual Item Markers
13904        // ----------------------------------------------------------------
13905        {
13906            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13907
13908            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13909            let entity_codes: Vec<String> = self
13910                .config
13911                .companies
13912                .iter()
13913                .map(|c| c.code.clone())
13914                .collect();
13915            let unusual_flags =
13916                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13917            info!(
13918                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13919                unusual_flags.len(),
13920                unusual_flags
13921                    .iter()
13922                    .filter(|f| matches!(
13923                        f.severity,
13924                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13925                    ))
13926                    .count(),
13927                unusual_flags
13928                    .iter()
13929                    .filter(|f| matches!(
13930                        f.severity,
13931                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13932                    ))
13933                    .count(),
13934                unusual_flags
13935                    .iter()
13936                    .filter(|f| matches!(
13937                        f.severity,
13938                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13939                    ))
13940                    .count(),
13941            );
13942            snapshot.unusual_items = unusual_flags;
13943        }
13944
13945        // ----------------------------------------------------------------
13946        // ISA 520: Analytical Relationships
13947        // ----------------------------------------------------------------
13948        {
13949            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13950
13951            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13952            let entity_codes: Vec<String> = self
13953                .config
13954                .companies
13955                .iter()
13956                .map(|c| c.code.clone())
13957                .collect();
13958            let current_period_label = format!("FY{fiscal_year}");
13959            let prior_period_label = format!("FY{}", fiscal_year - 1);
13960            let analytical_rels = ar_gen.generate_for_entities(
13961                &entity_codes,
13962                entries,
13963                &current_period_label,
13964                &prior_period_label,
13965            );
13966            let out_of_range = analytical_rels
13967                .iter()
13968                .filter(|r| !r.within_expected_range)
13969                .count();
13970            info!(
13971                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13972                analytical_rels.len(),
13973                out_of_range,
13974            );
13975            snapshot.analytical_relationships = analytical_rels;
13976        }
13977
13978        if let Some(pb) = pb {
13979            pb.finish_with_message(format!(
13980                "Audit data: {} engagements, {} workpapers, {} evidence, \
13981                 {} confirmations, {} procedure steps, {} samples, \
13982                 {} analytical, {} IA funcs, {} related parties, \
13983                 {} component auditors, {} letters, {} subsequent events, \
13984                 {} service orgs, {} going concern, {} accounting estimates, \
13985                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13986                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13987                 {} unusual items, {} analytical relationships",
13988                snapshot.engagements.len(),
13989                snapshot.workpapers.len(),
13990                snapshot.evidence.len(),
13991                snapshot.confirmations.len(),
13992                snapshot.procedure_steps.len(),
13993                snapshot.samples.len(),
13994                snapshot.analytical_results.len(),
13995                snapshot.ia_functions.len(),
13996                snapshot.related_parties.len(),
13997                snapshot.component_auditors.len(),
13998                snapshot.engagement_letters.len(),
13999                snapshot.subsequent_events.len(),
14000                snapshot.service_organizations.len(),
14001                snapshot.going_concern_assessments.len(),
14002                snapshot.accounting_estimates.len(),
14003                snapshot.audit_opinions.len(),
14004                snapshot.key_audit_matters.len(),
14005                snapshot.sox_302_certifications.len(),
14006                snapshot.sox_404_assessments.len(),
14007                snapshot.materiality_calculations.len(),
14008                snapshot.combined_risk_assessments.len(),
14009                snapshot.sampling_plans.len(),
14010                snapshot.significant_transaction_classes.len(),
14011                snapshot.unusual_items.len(),
14012                snapshot.analytical_relationships.len(),
14013            ));
14014        }
14015
14016        // ----------------------------------------------------------------
14017        // PCAOB-ISA cross-reference mappings
14018        // ----------------------------------------------------------------
14019        // Always include the standard PCAOB-ISA mappings when audit generation is
14020        // enabled. These are static reference data (no randomness required) so we
14021        // call standard_mappings() directly.
14022        {
14023            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14024            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14025            debug!(
14026                "PCAOB-ISA mappings generated: {} mappings",
14027                snapshot.isa_pcaob_mappings.len()
14028            );
14029        }
14030
14031        // ----------------------------------------------------------------
14032        // ISA standard reference entries
14033        // ----------------------------------------------------------------
14034        // Emit flat ISA standard reference data (number, title, series) so
14035        // consumers get a machine-readable listing of all 34 ISA standards in
14036        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
14037        {
14038            use datasynth_standards::audit::isa_reference::IsaStandard;
14039            snapshot.isa_mappings = IsaStandard::standard_entries();
14040            debug!(
14041                "ISA standard entries generated: {} standards",
14042                snapshot.isa_mappings.len()
14043            );
14044        }
14045
14046        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
14047        // For each RPT, find the chronologically closest JE for the engagement's entity.
14048        {
14049            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14050                .engagements
14051                .iter()
14052                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14053                .collect();
14054
14055            for rpt in &mut snapshot.related_party_transactions {
14056                if rpt.journal_entry_id.is_some() {
14057                    continue; // already set
14058                }
14059                let entity = engagement_by_id
14060                    .get(&rpt.engagement_id.to_string())
14061                    .copied()
14062                    .unwrap_or("");
14063
14064                // Find closest JE by date in the entity's company
14065                let best_je = entries
14066                    .iter()
14067                    .filter(|je| je.header.company_code == entity)
14068                    .min_by_key(|je| {
14069                        (je.header.posting_date - rpt.transaction_date)
14070                            .num_days()
14071                            .abs()
14072                    });
14073
14074                if let Some(je) = best_je {
14075                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
14076                }
14077            }
14078
14079            let linked = snapshot
14080                .related_party_transactions
14081                .iter()
14082                .filter(|t| t.journal_entry_id.is_some())
14083                .count();
14084            debug!(
14085                "Linked {}/{} related party transactions to journal entries",
14086                linked,
14087                snapshot.related_party_transactions.len()
14088            );
14089        }
14090
14091        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
14092        // One opinion per engagement, derived from that engagement's findings,
14093        // going-concern assessment, and any component-auditor reports. Fills
14094        // `audit_opinions` + a flattened `key_audit_matters` for downstream
14095        // export.
14096        if !snapshot.engagements.is_empty() {
14097            use datasynth_generators::audit_opinion_generator::{
14098                AuditOpinionGenerator, AuditOpinionInput,
14099            };
14100
14101            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14102            let inputs: Vec<AuditOpinionInput> = snapshot
14103                .engagements
14104                .iter()
14105                .map(|eng| {
14106                    let findings = snapshot
14107                        .findings
14108                        .iter()
14109                        .filter(|f| f.engagement_id == eng.engagement_id)
14110                        .cloned()
14111                        .collect();
14112                    let going_concern = snapshot
14113                        .going_concern_assessments
14114                        .iter()
14115                        .find(|gc| gc.entity_code == eng.client_entity_id)
14116                        .cloned();
14117                    // ComponentAuditorReport doesn't carry an engagement id, but
14118                    // component scope is keyed by `entity_code`, so filter on that.
14119                    let component_reports = snapshot
14120                        .component_reports
14121                        .iter()
14122                        .filter(|r| r.entity_code == eng.client_entity_id)
14123                        .cloned()
14124                        .collect();
14125
14126                    AuditOpinionInput {
14127                        entity_code: eng.client_entity_id.clone(),
14128                        entity_name: eng.client_name.clone(),
14129                        engagement_id: eng.engagement_id,
14130                        period_end: eng.period_end_date,
14131                        findings,
14132                        going_concern,
14133                        component_reports,
14134                        is_us_listed: matches!(
14135                            eng.engagement_type,
14136                            datasynth_core::audit::EngagementType::IntegratedAudit
14137                                | datasynth_core::audit::EngagementType::Sox404
14138                        ),
14139                        auditor_name: "DataSynth Audit LLP".to_string(),
14140                        engagement_partner: "Engagement Partner".to_string(),
14141                    }
14142                })
14143                .collect();
14144
14145            let generated = opinion_gen.generate_batch(&inputs);
14146            for g in generated {
14147                snapshot.key_audit_matters.extend(g.key_audit_matters);
14148                snapshot.audit_opinions.push(g.opinion);
14149            }
14150            debug!(
14151                "Generated {} audit opinions with {} key audit matters",
14152                snapshot.audit_opinions.len(),
14153                snapshot.key_audit_matters.len()
14154            );
14155        }
14156
14157        Ok(snapshot)
14158    }
14159
14160    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
14161    ///
14162    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
14163    /// from the current orchestrator state, runs the FSM engine, and maps the
14164    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
14165    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
14166    fn generate_audit_data_with_fsm(
14167        &mut self,
14168        entries: &[JournalEntry],
14169    ) -> SynthResult<AuditSnapshot> {
14170        use datasynth_audit_fsm::{
14171            context::EngagementContext,
14172            engine::AuditFsmEngine,
14173            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14174        };
14175        use rand::SeedableRng;
14176        use rand_chacha::ChaCha8Rng;
14177
14178        info!("Audit FSM: generating audit data via FSM engine");
14179
14180        let fsm_config = self
14181            .config
14182            .audit
14183            .fsm
14184            .as_ref()
14185            .expect("FSM config must be present when FSM is enabled");
14186
14187        // 1. Load blueprint from config string.
14188        let bwp = match fsm_config.blueprint.as_str() {
14189            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14190            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14191            _ => {
14192                warn!(
14193                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14194                    fsm_config.blueprint
14195                );
14196                BlueprintWithPreconditions::load_builtin_fsa()
14197            }
14198        }
14199        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14200
14201        // 2. Load overlay from config string.
14202        let overlay = match fsm_config.overlay.as_str() {
14203            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14204            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14205            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14206            _ => {
14207                warn!(
14208                    "Unknown FSM overlay '{}', falling back to builtin:default",
14209                    fsm_config.overlay
14210                );
14211                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14212            }
14213        }
14214        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14215
14216        // 3. Build EngagementContext from orchestrator state.
14217        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14218            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14219        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14220
14221        // Determine the engagement entity early so we can filter JEs.
14222        let company = self.config.companies.first();
14223        let company_code = company
14224            .map(|c| c.code.clone())
14225            .unwrap_or_else(|| "UNKNOWN".to_string());
14226        let company_name = company
14227            .map(|c| c.name.clone())
14228            .unwrap_or_else(|| "Unknown Company".to_string());
14229        let currency = company
14230            .map(|c| c.currency.clone())
14231            .unwrap_or_else(|| "USD".to_string());
14232
14233        // Filter JEs to the engagement entity for single-company coherence.
14234        let entity_entries: Vec<_> = entries
14235            .iter()
14236            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14237            .cloned()
14238            .collect();
14239        let entries = &entity_entries; // Shadow the parameter for remaining usage
14240
14241        // Financial aggregates from journal entries.
14242        let total_revenue: rust_decimal::Decimal = entries
14243            .iter()
14244            .flat_map(|e| e.lines.iter())
14245            .filter(|l| l.account_code.starts_with('4'))
14246            .map(|l| l.credit_amount - l.debit_amount)
14247            .sum();
14248
14249        let total_assets: rust_decimal::Decimal = entries
14250            .iter()
14251            .flat_map(|e| e.lines.iter())
14252            .filter(|l| l.account_code.starts_with('1'))
14253            .map(|l| l.debit_amount - l.credit_amount)
14254            .sum();
14255
14256        let total_expenses: rust_decimal::Decimal = entries
14257            .iter()
14258            .flat_map(|e| e.lines.iter())
14259            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14260            .map(|l| l.debit_amount)
14261            .sum();
14262
14263        let equity: rust_decimal::Decimal = entries
14264            .iter()
14265            .flat_map(|e| e.lines.iter())
14266            .filter(|l| l.account_code.starts_with('3'))
14267            .map(|l| l.credit_amount - l.debit_amount)
14268            .sum();
14269
14270        let total_debt: rust_decimal::Decimal = entries
14271            .iter()
14272            .flat_map(|e| e.lines.iter())
14273            .filter(|l| l.account_code.starts_with('2'))
14274            .map(|l| l.credit_amount - l.debit_amount)
14275            .sum();
14276
14277        let pretax_income = total_revenue - total_expenses;
14278
14279        let cogs: rust_decimal::Decimal = entries
14280            .iter()
14281            .flat_map(|e| e.lines.iter())
14282            .filter(|l| l.account_code.starts_with('5'))
14283            .map(|l| l.debit_amount)
14284            .sum();
14285        let gross_profit = total_revenue - cogs;
14286
14287        let current_assets: rust_decimal::Decimal = entries
14288            .iter()
14289            .flat_map(|e| e.lines.iter())
14290            .filter(|l| {
14291                l.account_code.starts_with("10")
14292                    || l.account_code.starts_with("11")
14293                    || l.account_code.starts_with("12")
14294                    || l.account_code.starts_with("13")
14295            })
14296            .map(|l| l.debit_amount - l.credit_amount)
14297            .sum();
14298        let current_liabilities: rust_decimal::Decimal = entries
14299            .iter()
14300            .flat_map(|e| e.lines.iter())
14301            .filter(|l| {
14302                l.account_code.starts_with("20")
14303                    || l.account_code.starts_with("21")
14304                    || l.account_code.starts_with("22")
14305            })
14306            .map(|l| l.credit_amount - l.debit_amount)
14307            .sum();
14308        let working_capital = current_assets - current_liabilities;
14309
14310        let depreciation: rust_decimal::Decimal = entries
14311            .iter()
14312            .flat_map(|e| e.lines.iter())
14313            .filter(|l| l.account_code.starts_with("60"))
14314            .map(|l| l.debit_amount)
14315            .sum();
14316        let operating_cash_flow = pretax_income + depreciation;
14317
14318        // GL accounts for reference data.
14319        let accounts: Vec<String> = self
14320            .coa
14321            .as_ref()
14322            .map(|coa| {
14323                coa.get_postable_accounts()
14324                    .iter()
14325                    .map(|acc| acc.account_code().to_string())
14326                    .collect()
14327            })
14328            .unwrap_or_default();
14329
14330        // Team member IDs and display names from master data.
14331        let team_member_ids: Vec<String> = self
14332            .master_data
14333            .employees
14334            .iter()
14335            .take(8) // Cap team size
14336            .map(|e| e.employee_id.clone())
14337            .collect();
14338        let team_member_pairs: Vec<(String, String)> = self
14339            .master_data
14340            .employees
14341            .iter()
14342            .take(8)
14343            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14344            .collect();
14345
14346        let vendor_names: Vec<String> = self
14347            .master_data
14348            .vendors
14349            .iter()
14350            .map(|v| v.name.clone())
14351            .collect();
14352        let customer_names: Vec<String> = self
14353            .master_data
14354            .customers
14355            .iter()
14356            .map(|c| c.name.clone())
14357            .collect();
14358
14359        let entity_codes: Vec<String> = self
14360            .config
14361            .companies
14362            .iter()
14363            .map(|c| c.code.clone())
14364            .collect();
14365
14366        // Journal entry IDs for evidence tracing (sample up to 50).
14367        let journal_entry_ids: Vec<String> = entries
14368            .iter()
14369            .take(50)
14370            .map(|e| e.header.document_id.to_string())
14371            .collect();
14372
14373        // Account balances for risk weighting (aggregate debit - credit per account).
14374        let mut account_balances = std::collections::HashMap::<String, f64>::new();
14375        for entry in entries {
14376            for line in &entry.lines {
14377                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14378                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14379                *account_balances
14380                    .entry(line.account_code.clone())
14381                    .or_insert(0.0) += debit_f64 - credit_f64;
14382            }
14383        }
14384
14385        // Internal control IDs and anomaly refs are populated by the
14386        // caller when available; here we default to empty because the
14387        // orchestrator state may not have generated controls/anomalies
14388        // yet at this point in the pipeline.
14389        let control_ids: Vec<String> = Vec::new();
14390        let anomaly_refs: Vec<String> = Vec::new();
14391
14392        let mut context = EngagementContext {
14393            company_code,
14394            company_name,
14395            fiscal_year: start_date.year(),
14396            currency,
14397            total_revenue,
14398            total_assets,
14399            engagement_start: start_date,
14400            report_date: period_end,
14401            pretax_income,
14402            equity,
14403            gross_profit,
14404            working_capital,
14405            operating_cash_flow,
14406            total_debt,
14407            team_member_ids,
14408            team_member_pairs,
14409            accounts,
14410            vendor_names,
14411            customer_names,
14412            journal_entry_ids,
14413            account_balances,
14414            control_ids,
14415            anomaly_refs,
14416            journal_entries: entries.to_vec(),
14417            is_us_listed: false,
14418            entity_codes,
14419            auditor_firm_name: "DataSynth Audit LLP".into(),
14420            accounting_framework: self
14421                .config
14422                .accounting_standards
14423                .framework
14424                .map(|f| match f {
14425                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14426                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14427                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14428                        "French GAAP"
14429                    }
14430                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14431                        "German GAAP"
14432                    }
14433                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14434                        "Dual Reporting"
14435                    }
14436                })
14437                .unwrap_or("IFRS")
14438                .into(),
14439        };
14440
14441        // 4. Create and run the FSM engine.
14442        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14443        let rng = ChaCha8Rng::seed_from_u64(seed);
14444        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14445
14446        let mut result = engine
14447            .run_engagement(&context)
14448            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14449
14450        info!(
14451            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14452             {} phases completed, duration {:.1}h",
14453            result.event_log.len(),
14454            result.artifacts.total_artifacts(),
14455            result.anomalies.len(),
14456            result.phases_completed.len(),
14457            result.total_duration_hours,
14458        );
14459
14460        // 4b. Populate financial data in the artifact bag for downstream consumers.
14461        let tb_entity = context.company_code.clone();
14462        let tb_fy = context.fiscal_year;
14463        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14464        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14465            entries,
14466            &tb_entity,
14467            tb_fy,
14468            self.coa.as_ref().map(|c| c.as_ref()),
14469        );
14470
14471        // 5. Map ArtifactBag fields to AuditSnapshot.
14472        let bag = result.artifacts;
14473        let mut snapshot = AuditSnapshot {
14474            engagements: bag.engagements,
14475            engagement_letters: bag.engagement_letters,
14476            materiality_calculations: bag.materiality_calculations,
14477            risk_assessments: bag.risk_assessments,
14478            combined_risk_assessments: bag.combined_risk_assessments,
14479            workpapers: bag.workpapers,
14480            evidence: bag.evidence,
14481            findings: bag.findings,
14482            judgments: bag.judgments,
14483            sampling_plans: bag.sampling_plans,
14484            sampled_items: bag.sampled_items,
14485            analytical_results: bag.analytical_results,
14486            going_concern_assessments: bag.going_concern_assessments,
14487            subsequent_events: bag.subsequent_events,
14488            audit_opinions: bag.audit_opinions,
14489            key_audit_matters: bag.key_audit_matters,
14490            procedure_steps: bag.procedure_steps,
14491            samples: bag.samples,
14492            confirmations: bag.confirmations,
14493            confirmation_responses: bag.confirmation_responses,
14494            // Store the event trail for downstream export.
14495            fsm_event_trail: Some(result.event_log),
14496            // Fields not produced by the FSM engine remain at their defaults.
14497            ..Default::default()
14498        };
14499
14500        // 6. Add static reference data (same as legacy path).
14501        {
14502            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14503            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14504        }
14505        {
14506            use datasynth_standards::audit::isa_reference::IsaStandard;
14507            snapshot.isa_mappings = IsaStandard::standard_entries();
14508        }
14509
14510        info!(
14511            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14512             {} risk assessments, {} findings, {} materiality calcs",
14513            snapshot.engagements.len(),
14514            snapshot.workpapers.len(),
14515            snapshot.evidence.len(),
14516            snapshot.risk_assessments.len(),
14517            snapshot.findings.len(),
14518            snapshot.materiality_calculations.len(),
14519        );
14520
14521        Ok(snapshot)
14522    }
14523
14524    /// Export journal entries as graph data for ML training and network reconstruction.
14525    ///
14526    /// Builds a transaction graph where:
14527    /// - Nodes are GL accounts
14528    /// - Edges are money flows from credit to debit accounts
14529    /// - Edge attributes include amount, date, business process, anomaly flags
14530    fn export_graphs(
14531        &mut self,
14532        entries: &[JournalEntry],
14533        _coa: &Arc<ChartOfAccounts>,
14534        stats: &mut EnhancedGenerationStatistics,
14535    ) -> SynthResult<GraphExportSnapshot> {
14536        let pb = self.create_progress_bar(100, "Exporting Graphs");
14537
14538        let mut snapshot = GraphExportSnapshot::default();
14539
14540        // Get output directory
14541        let output_dir = self
14542            .output_path
14543            .clone()
14544            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14545        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14546
14547        // Process each graph type configuration
14548        for graph_type in &self.config.graph_export.graph_types {
14549            if let Some(pb) = &pb {
14550                pb.inc(10);
14551            }
14552
14553            // Build transaction graph
14554            let graph_config = TransactionGraphConfig {
14555                include_vendors: false,
14556                include_customers: false,
14557                create_debit_credit_edges: true,
14558                include_document_nodes: graph_type.include_document_nodes,
14559                min_edge_weight: graph_type.min_edge_weight,
14560                aggregate_parallel_edges: graph_type.aggregate_edges,
14561                framework: None,
14562            };
14563
14564            let mut builder = TransactionGraphBuilder::new(graph_config);
14565            builder.add_journal_entries(entries);
14566            let graph = builder.build();
14567
14568            // Update stats
14569            stats.graph_node_count += graph.node_count();
14570            stats.graph_edge_count += graph.edge_count();
14571
14572            if let Some(pb) = &pb {
14573                pb.inc(40);
14574            }
14575
14576            // Export to each configured format
14577            for format in &self.config.graph_export.formats {
14578                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14579
14580                // Create output directory
14581                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14582                    warn!("Failed to create graph output directory: {}", e);
14583                    continue;
14584                }
14585
14586                match format {
14587                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14588                        let pyg_config = PyGExportConfig {
14589                            common: datasynth_graph::CommonExportConfig {
14590                                export_node_features: true,
14591                                export_edge_features: true,
14592                                export_node_labels: true,
14593                                export_edge_labels: true,
14594                                export_masks: true,
14595                                train_ratio: self.config.graph_export.train_ratio,
14596                                val_ratio: self.config.graph_export.validation_ratio,
14597                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14598                            },
14599                            one_hot_categoricals: false,
14600                        };
14601
14602                        let exporter = PyGExporter::new(pyg_config);
14603                        match exporter.export(&graph, &format_dir) {
14604                            Ok(metadata) => {
14605                                snapshot.exports.insert(
14606                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14607                                    GraphExportInfo {
14608                                        name: graph_type.name.clone(),
14609                                        format: "pytorch_geometric".to_string(),
14610                                        output_path: format_dir.clone(),
14611                                        node_count: metadata.num_nodes,
14612                                        edge_count: metadata.num_edges,
14613                                    },
14614                                );
14615                                snapshot.graph_count += 1;
14616                            }
14617                            Err(e) => {
14618                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14619                            }
14620                        }
14621                    }
14622                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14623                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14624
14625                        let neo4j_config = Neo4jExportConfig {
14626                            export_node_properties: true,
14627                            export_edge_properties: true,
14628                            export_features: true,
14629                            generate_cypher: true,
14630                            generate_admin_import: true,
14631                            database_name: "synth".to_string(),
14632                            cypher_batch_size: 1000,
14633                        };
14634
14635                        let exporter = Neo4jExporter::new(neo4j_config);
14636                        match exporter.export(&graph, &format_dir) {
14637                            Ok(metadata) => {
14638                                snapshot.exports.insert(
14639                                    format!("{}_{}", graph_type.name, "neo4j"),
14640                                    GraphExportInfo {
14641                                        name: graph_type.name.clone(),
14642                                        format: "neo4j".to_string(),
14643                                        output_path: format_dir.clone(),
14644                                        node_count: metadata.num_nodes,
14645                                        edge_count: metadata.num_edges,
14646                                    },
14647                                );
14648                                snapshot.graph_count += 1;
14649                            }
14650                            Err(e) => {
14651                                warn!("Failed to export Neo4j graph: {}", e);
14652                            }
14653                        }
14654                    }
14655                    datasynth_config::schema::GraphExportFormat::Dgl => {
14656                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14657
14658                        let dgl_config = DGLExportConfig {
14659                            common: datasynth_graph::CommonExportConfig {
14660                                export_node_features: true,
14661                                export_edge_features: true,
14662                                export_node_labels: true,
14663                                export_edge_labels: true,
14664                                export_masks: true,
14665                                train_ratio: self.config.graph_export.train_ratio,
14666                                val_ratio: self.config.graph_export.validation_ratio,
14667                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14668                            },
14669                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14670                            include_pickle_script: true, // DGL ecosystem standard helper
14671                        };
14672
14673                        let exporter = DGLExporter::new(dgl_config);
14674                        match exporter.export(&graph, &format_dir) {
14675                            Ok(metadata) => {
14676                                snapshot.exports.insert(
14677                                    format!("{}_{}", graph_type.name, "dgl"),
14678                                    GraphExportInfo {
14679                                        name: graph_type.name.clone(),
14680                                        format: "dgl".to_string(),
14681                                        output_path: format_dir.clone(),
14682                                        node_count: metadata.common.num_nodes,
14683                                        edge_count: metadata.common.num_edges,
14684                                    },
14685                                );
14686                                snapshot.graph_count += 1;
14687                            }
14688                            Err(e) => {
14689                                warn!("Failed to export DGL graph: {}", e);
14690                            }
14691                        }
14692                    }
14693                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14694                        use datasynth_graph::{
14695                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14696                        };
14697
14698                        let rustgraph_config = RustGraphExportConfig {
14699                            include_features: true,
14700                            include_temporal: true,
14701                            include_labels: true,
14702                            source_name: "datasynth".to_string(),
14703                            batch_id: None,
14704                            output_format: RustGraphOutputFormat::JsonLines,
14705                            export_node_properties: true,
14706                            export_edge_properties: true,
14707                            pretty_print: false,
14708                        };
14709
14710                        let exporter = RustGraphExporter::new(rustgraph_config);
14711                        match exporter.export(&graph, &format_dir) {
14712                            Ok(metadata) => {
14713                                snapshot.exports.insert(
14714                                    format!("{}_{}", graph_type.name, "rustgraph"),
14715                                    GraphExportInfo {
14716                                        name: graph_type.name.clone(),
14717                                        format: "rustgraph".to_string(),
14718                                        output_path: format_dir.clone(),
14719                                        node_count: metadata.num_nodes,
14720                                        edge_count: metadata.num_edges,
14721                                    },
14722                                );
14723                                snapshot.graph_count += 1;
14724                            }
14725                            Err(e) => {
14726                                warn!("Failed to export RustGraph: {}", e);
14727                            }
14728                        }
14729                    }
14730                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14731                        // Hypergraph export is handled separately in Phase 10b
14732                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14733                    }
14734                }
14735            }
14736
14737            if let Some(pb) = &pb {
14738                pb.inc(40);
14739            }
14740        }
14741
14742        stats.graph_export_count = snapshot.graph_count;
14743        snapshot.exported = snapshot.graph_count > 0;
14744
14745        if let Some(pb) = pb {
14746            pb.finish_with_message(format!(
14747                "Graphs exported: {} graphs ({} nodes, {} edges)",
14748                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14749            ));
14750        }
14751
14752        Ok(snapshot)
14753    }
14754
14755    /// Build additional graph types (banking, approval, entity) when relevant data
14756    /// is available. These run as a late phase because the data they need (banking
14757    /// snapshot, intercompany snapshot) is only generated after the main graph
14758    /// export phase.
14759    fn build_additional_graphs(
14760        &self,
14761        banking: &BankingSnapshot,
14762        intercompany: &IntercompanySnapshot,
14763        entries: &[JournalEntry],
14764        stats: &mut EnhancedGenerationStatistics,
14765    ) {
14766        let output_dir = self
14767            .output_path
14768            .clone()
14769            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14770        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14771
14772        // Banking graph: build when banking customers and transactions exist
14773        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14774            info!("Phase 10c: Building banking network graph");
14775            let config = BankingGraphConfig::default();
14776            let mut builder = BankingGraphBuilder::new(config);
14777            builder.add_customers(&banking.customers);
14778            builder.add_accounts(&banking.accounts, &banking.customers);
14779            builder.add_transactions(&banking.transactions);
14780            let graph = builder.build();
14781
14782            let node_count = graph.node_count();
14783            let edge_count = graph.edge_count();
14784            stats.graph_node_count += node_count;
14785            stats.graph_edge_count += edge_count;
14786
14787            // Export as PyG if configured
14788            for format in &self.config.graph_export.formats {
14789                if matches!(
14790                    format,
14791                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14792                ) {
14793                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14794                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14795                        warn!("Failed to create banking graph output dir: {}", e);
14796                        continue;
14797                    }
14798                    let pyg_config = PyGExportConfig::default();
14799                    let exporter = PyGExporter::new(pyg_config);
14800                    if let Err(e) = exporter.export(&graph, &format_dir) {
14801                        warn!("Failed to export banking graph as PyG: {}", e);
14802                    } else {
14803                        info!(
14804                            "Banking network graph exported: {} nodes, {} edges",
14805                            node_count, edge_count
14806                        );
14807                    }
14808                }
14809            }
14810        }
14811
14812        // Approval graph: build from journal entry approval workflows
14813        let approval_entries: Vec<_> = entries
14814            .iter()
14815            .filter(|je| je.header.approval_workflow.is_some())
14816            .collect();
14817
14818        if !approval_entries.is_empty() {
14819            info!(
14820                "Phase 10c: Building approval network graph ({} entries with approvals)",
14821                approval_entries.len()
14822            );
14823            let config = ApprovalGraphConfig::default();
14824            let mut builder = ApprovalGraphBuilder::new(config);
14825
14826            for je in &approval_entries {
14827                if let Some(ref wf) = je.header.approval_workflow {
14828                    for action in &wf.actions {
14829                        let record = datasynth_core::models::ApprovalRecord {
14830                            approval_id: format!(
14831                                "APR-{}-{}",
14832                                je.header.document_id, action.approval_level
14833                            ),
14834                            document_number: je.header.document_id.to_string(),
14835                            document_type: "JE".to_string(),
14836                            company_code: je.company_code().to_string(),
14837                            requester_id: wf.preparer_id.clone(),
14838                            requester_name: Some(wf.preparer_name.clone()),
14839                            approver_id: action.actor_id.clone(),
14840                            approver_name: action.actor_name.clone(),
14841                            approval_date: je.posting_date(),
14842                            action: format!("{:?}", action.action),
14843                            amount: wf.amount,
14844                            approval_limit: None,
14845                            comments: action.comments.clone(),
14846                            delegation_from: None,
14847                            is_auto_approved: false,
14848                        };
14849                        builder.add_approval(&record);
14850                    }
14851                }
14852            }
14853
14854            let graph = builder.build();
14855            let node_count = graph.node_count();
14856            let edge_count = graph.edge_count();
14857            stats.graph_node_count += node_count;
14858            stats.graph_edge_count += edge_count;
14859
14860            // Export as PyG if configured
14861            for format in &self.config.graph_export.formats {
14862                if matches!(
14863                    format,
14864                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14865                ) {
14866                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14867                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14868                        warn!("Failed to create approval graph output dir: {}", e);
14869                        continue;
14870                    }
14871                    let pyg_config = PyGExportConfig::default();
14872                    let exporter = PyGExporter::new(pyg_config);
14873                    if let Err(e) = exporter.export(&graph, &format_dir) {
14874                        warn!("Failed to export approval graph as PyG: {}", e);
14875                    } else {
14876                        info!(
14877                            "Approval network graph exported: {} nodes, {} edges",
14878                            node_count, edge_count
14879                        );
14880                    }
14881                }
14882            }
14883        }
14884
14885        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14886        if self.config.companies.len() >= 2 {
14887            info!(
14888                "Phase 10c: Building entity relationship graph ({} companies)",
14889                self.config.companies.len()
14890            );
14891
14892            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14893                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14894
14895            // Map CompanyConfig → Company objects
14896            let parent_code = &self.config.companies[0].code;
14897            let mut companies: Vec<datasynth_core::models::Company> =
14898                Vec::with_capacity(self.config.companies.len());
14899
14900            // First company is the parent
14901            let first = &self.config.companies[0];
14902            companies.push(datasynth_core::models::Company::parent(
14903                &first.code,
14904                &first.name,
14905                &first.country,
14906                &first.currency,
14907            ));
14908
14909            // Remaining companies are subsidiaries (100% owned by parent)
14910            for cc in self.config.companies.iter().skip(1) {
14911                companies.push(datasynth_core::models::Company::subsidiary(
14912                    &cc.code,
14913                    &cc.name,
14914                    &cc.country,
14915                    &cc.currency,
14916                    parent_code,
14917                    rust_decimal::Decimal::from(100),
14918                ));
14919            }
14920
14921            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14922            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14923                self.config
14924                    .companies
14925                    .iter()
14926                    .skip(1)
14927                    .enumerate()
14928                    .map(|(i, cc)| {
14929                        let mut rel =
14930                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14931                                format!("REL{:03}", i + 1),
14932                                parent_code.clone(),
14933                                cc.code.clone(),
14934                                rust_decimal::Decimal::from(100),
14935                                start_date,
14936                            );
14937                        rel.functional_currency = cc.currency.clone();
14938                        rel
14939                    })
14940                    .collect();
14941
14942            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14943            builder.add_companies(&companies);
14944            builder.add_ownership_relationships(&relationships);
14945
14946            // Thread IC matched-pair transaction edges into the entity graph
14947            for pair in &intercompany.matched_pairs {
14948                builder.add_intercompany_edge(
14949                    &pair.seller_company,
14950                    &pair.buyer_company,
14951                    pair.amount,
14952                    &format!("{:?}", pair.transaction_type),
14953                );
14954            }
14955
14956            let graph = builder.build();
14957            let node_count = graph.node_count();
14958            let edge_count = graph.edge_count();
14959            stats.graph_node_count += node_count;
14960            stats.graph_edge_count += edge_count;
14961
14962            // Export as PyG if configured
14963            for format in &self.config.graph_export.formats {
14964                if matches!(
14965                    format,
14966                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14967                ) {
14968                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14969                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14970                        warn!("Failed to create entity graph output dir: {}", e);
14971                        continue;
14972                    }
14973                    let pyg_config = PyGExportConfig::default();
14974                    let exporter = PyGExporter::new(pyg_config);
14975                    if let Err(e) = exporter.export(&graph, &format_dir) {
14976                        warn!("Failed to export entity graph as PyG: {}", e);
14977                    } else {
14978                        info!(
14979                            "Entity relationship graph exported: {} nodes, {} edges",
14980                            node_count, edge_count
14981                        );
14982                    }
14983                }
14984            }
14985        } else {
14986            debug!(
14987                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14988                self.config.companies.len()
14989            );
14990        }
14991    }
14992
14993    /// Export a multi-layer hypergraph for RustGraph integration.
14994    ///
14995    /// Builds a 3-layer hypergraph:
14996    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14997    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14998    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14999    #[allow(clippy::too_many_arguments)]
15000    fn export_hypergraph(
15001        &self,
15002        coa: &Arc<ChartOfAccounts>,
15003        entries: &[JournalEntry],
15004        document_flows: &DocumentFlowSnapshot,
15005        sourcing: &SourcingSnapshot,
15006        hr: &HrSnapshot,
15007        manufacturing: &ManufacturingSnapshot,
15008        banking: &BankingSnapshot,
15009        audit: &AuditSnapshot,
15010        financial_reporting: &FinancialReportingSnapshot,
15011        ocpm: &OcpmSnapshot,
15012        compliance: &ComplianceRegulationsSnapshot,
15013        stats: &mut EnhancedGenerationStatistics,
15014    ) -> SynthResult<HypergraphExportInfo> {
15015        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15016        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15017        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15018        use datasynth_graph::models::hypergraph::AggregationStrategy;
15019
15020        let hg_settings = &self.config.graph_export.hypergraph;
15021
15022        // Parse aggregation strategy from config string
15023        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15024            "truncate" => AggregationStrategy::Truncate,
15025            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15026            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15027            "importance_sample" => AggregationStrategy::ImportanceSample,
15028            _ => AggregationStrategy::PoolByCounterparty,
15029        };
15030
15031        let builder_config = HypergraphConfig {
15032            max_nodes: hg_settings.max_nodes,
15033            aggregation_strategy,
15034            include_coso: hg_settings.governance_layer.include_coso,
15035            include_controls: hg_settings.governance_layer.include_controls,
15036            include_sox: hg_settings.governance_layer.include_sox,
15037            include_vendors: hg_settings.governance_layer.include_vendors,
15038            include_customers: hg_settings.governance_layer.include_customers,
15039            include_employees: hg_settings.governance_layer.include_employees,
15040            include_p2p: hg_settings.process_layer.include_p2p,
15041            include_o2c: hg_settings.process_layer.include_o2c,
15042            include_s2c: hg_settings.process_layer.include_s2c,
15043            include_h2r: hg_settings.process_layer.include_h2r,
15044            include_mfg: hg_settings.process_layer.include_mfg,
15045            include_bank: hg_settings.process_layer.include_bank,
15046            include_audit: hg_settings.process_layer.include_audit,
15047            include_r2r: hg_settings.process_layer.include_r2r,
15048            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15049            docs_per_counterparty_threshold: hg_settings
15050                .process_layer
15051                .docs_per_counterparty_threshold,
15052            include_accounts: hg_settings.accounting_layer.include_accounts,
15053            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15054            include_cross_layer_edges: hg_settings.cross_layer.enabled,
15055            include_compliance: self.config.compliance_regulations.enabled,
15056            include_tax: true,
15057            include_treasury: true,
15058            include_esg: true,
15059            include_project: true,
15060            include_intercompany: true,
15061            include_temporal_events: true,
15062        };
15063
15064        let mut builder = HypergraphBuilder::new(builder_config);
15065
15066        // Layer 1: Governance & Controls
15067        builder.add_coso_framework();
15068
15069        // Add controls if available (generated during JE generation)
15070        // Controls are generated per-company; we use the standard set
15071        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15072            let controls = InternalControl::standard_controls();
15073            builder.add_controls(&controls);
15074        }
15075
15076        // Add master data
15077        builder.add_vendors(&self.master_data.vendors);
15078        builder.add_customers(&self.master_data.customers);
15079        builder.add_employees(&self.master_data.employees);
15080
15081        // Layer 2: Process Events (all process families)
15082        builder.add_p2p_documents(
15083            &document_flows.purchase_orders,
15084            &document_flows.goods_receipts,
15085            &document_flows.vendor_invoices,
15086            &document_flows.payments,
15087        );
15088        builder.add_o2c_documents(
15089            &document_flows.sales_orders,
15090            &document_flows.deliveries,
15091            &document_flows.customer_invoices,
15092        );
15093        builder.add_s2c_documents(
15094            &sourcing.sourcing_projects,
15095            &sourcing.qualifications,
15096            &sourcing.rfx_events,
15097            &sourcing.bids,
15098            &sourcing.bid_evaluations,
15099            &sourcing.contracts,
15100        );
15101        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15102        builder.add_mfg_documents(
15103            &manufacturing.production_orders,
15104            &manufacturing.quality_inspections,
15105            &manufacturing.cycle_counts,
15106        );
15107        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15108        builder.add_audit_documents(
15109            &audit.engagements,
15110            &audit.workpapers,
15111            &audit.findings,
15112            &audit.evidence,
15113            &audit.risk_assessments,
15114            &audit.judgments,
15115            &audit.materiality_calculations,
15116            &audit.audit_opinions,
15117            &audit.going_concern_assessments,
15118        );
15119        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15120
15121        // OCPM events as hyperedges
15122        if let Some(ref event_log) = ocpm.event_log {
15123            builder.add_ocpm_events(event_log);
15124        }
15125
15126        // Compliance regulations as cross-layer nodes
15127        if self.config.compliance_regulations.enabled
15128            && hg_settings.governance_layer.include_controls
15129        {
15130            // Reconstruct ComplianceStandard objects from the registry
15131            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15132            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15133                .standard_records
15134                .iter()
15135                .filter_map(|r| {
15136                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15137                    registry.get(&sid).cloned()
15138                })
15139                .collect();
15140
15141            builder.add_compliance_regulations(
15142                &standards,
15143                &compliance.findings,
15144                &compliance.filings,
15145            );
15146        }
15147
15148        // Layer 3: Accounting Network
15149        builder.add_accounts(coa);
15150        builder.add_journal_entries_as_hyperedges(entries);
15151
15152        // Build the hypergraph
15153        let hypergraph = builder.build();
15154
15155        // Export
15156        let output_dir = self
15157            .output_path
15158            .clone()
15159            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15160        let hg_dir = output_dir
15161            .join(&self.config.graph_export.output_subdirectory)
15162            .join(&hg_settings.output_subdirectory);
15163
15164        // Branch on output format
15165        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15166            "unified" => {
15167                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15168                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15169                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15170                })?;
15171                (
15172                    metadata.num_nodes,
15173                    metadata.num_edges,
15174                    metadata.num_hyperedges,
15175                )
15176            }
15177            _ => {
15178                // "native" or any unrecognized format → use existing exporter
15179                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15180                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15181                    SynthError::generation(format!("Hypergraph export failed: {e}"))
15182                })?;
15183                (
15184                    metadata.num_nodes,
15185                    metadata.num_edges,
15186                    metadata.num_hyperedges,
15187                )
15188            }
15189        };
15190
15191        // Stream to RustGraph ingest endpoint if configured
15192        #[cfg(feature = "streaming")]
15193        if let Some(ref target_url) = hg_settings.stream_target {
15194            use crate::stream_client::{StreamClient, StreamConfig};
15195            use std::io::Write as _;
15196
15197            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15198            let stream_config = StreamConfig {
15199                target_url: target_url.clone(),
15200                batch_size: hg_settings.stream_batch_size,
15201                api_key,
15202                ..StreamConfig::default()
15203            };
15204
15205            match StreamClient::new(stream_config) {
15206                Ok(mut client) => {
15207                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15208                    match exporter.export_to_writer(&hypergraph, &mut client) {
15209                        Ok(_) => {
15210                            if let Err(e) = client.flush() {
15211                                warn!("Failed to flush stream client: {}", e);
15212                            } else {
15213                                info!("Streamed {} records to {}", client.total_sent(), target_url);
15214                            }
15215                        }
15216                        Err(e) => {
15217                            warn!("Streaming export failed: {}", e);
15218                        }
15219                    }
15220                }
15221                Err(e) => {
15222                    warn!("Failed to create stream client: {}", e);
15223                }
15224            }
15225        }
15226
15227        // Update stats
15228        stats.graph_node_count += num_nodes;
15229        stats.graph_edge_count += num_edges;
15230        stats.graph_export_count += 1;
15231
15232        Ok(HypergraphExportInfo {
15233            node_count: num_nodes,
15234            edge_count: num_edges,
15235            hyperedge_count: num_hyperedges,
15236            output_path: hg_dir,
15237        })
15238    }
15239
15240    /// Generate banking KYC/AML data.
15241    ///
15242    /// Creates banking customers, accounts, and transactions with AML typology injection.
15243    /// Uses the BankingOrchestrator from synth-banking crate.
15244    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15245        let pb = self.create_progress_bar(100, "Generating Banking Data");
15246
15247        // Build the banking orchestrator from config
15248        let orchestrator = BankingOrchestratorBuilder::new()
15249            .config(self.config.banking.clone())
15250            .seed(self.seed + 9000)
15251            .country_pack(self.primary_pack().clone())
15252            .build();
15253
15254        if let Some(pb) = &pb {
15255            pb.inc(10);
15256        }
15257
15258        // Generate the banking data
15259        let result = orchestrator.generate();
15260
15261        if let Some(pb) = &pb {
15262            pb.inc(90);
15263            pb.finish_with_message(format!(
15264                "Banking: {} customers, {} transactions",
15265                result.customers.len(),
15266                result.transactions.len()
15267            ));
15268        }
15269
15270        // Cross-reference banking customers with core master data so that
15271        // banking customer names align with the enterprise customer list.
15272        // We rotate through core customers, overlaying their name and country
15273        // onto the generated banking customers where possible.
15274        let mut banking_customers = result.customers;
15275        let core_customers = &self.master_data.customers;
15276        if !core_customers.is_empty() {
15277            for (i, bc) in banking_customers.iter_mut().enumerate() {
15278                let core = &core_customers[i % core_customers.len()];
15279                bc.name = CustomerName::business(&core.name);
15280                bc.residence_country = core.country.clone();
15281                bc.enterprise_customer_id = Some(core.customer_id.clone());
15282            }
15283            debug!(
15284                "Cross-referenced {} banking customers with {} core customers",
15285                banking_customers.len(),
15286                core_customers.len()
15287            );
15288        }
15289
15290        Ok(BankingSnapshot {
15291            customers: banking_customers,
15292            accounts: result.accounts,
15293            transactions: result.transactions,
15294            transaction_labels: result.transaction_labels,
15295            customer_labels: result.customer_labels,
15296            account_labels: result.account_labels,
15297            relationship_labels: result.relationship_labels,
15298            narratives: result.narratives,
15299            suspicious_count: result.stats.suspicious_count,
15300            scenario_count: result.scenarios.len(),
15301        })
15302    }
15303
15304    /// Calculate total transactions to generate.
15305    fn calculate_total_transactions(&self) -> u64 {
15306        let months = self.config.global.period_months as f64;
15307        self.config
15308            .companies
15309            .iter()
15310            .map(|c| {
15311                let annual = c.annual_transaction_volume.count() as f64;
15312                let weighted = annual * c.volume_weight;
15313                (weighted * months / 12.0) as u64
15314            })
15315            .sum()
15316    }
15317
15318    /// Create a progress bar if progress display is enabled.
15319    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15320        if !self.phase_config.show_progress {
15321            return None;
15322        }
15323
15324        let pb = if let Some(mp) = &self.multi_progress {
15325            mp.add(ProgressBar::new(total))
15326        } else {
15327            ProgressBar::new(total)
15328        };
15329
15330        pb.set_style(
15331            ProgressStyle::default_bar()
15332                .template(&format!(
15333                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15334                ))
15335                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15336                .progress_chars("#>-"),
15337        );
15338
15339        Some(pb)
15340    }
15341
15342    /// Get the generated chart of accounts.
15343    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15344        self.coa.clone()
15345    }
15346
15347    /// Get the generated master data.
15348    pub fn get_master_data(&self) -> &MasterDataSnapshot {
15349        &self.master_data
15350    }
15351
15352    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
15353    fn phase_compliance_regulations(
15354        &mut self,
15355        _stats: &mut EnhancedGenerationStatistics,
15356    ) -> SynthResult<ComplianceRegulationsSnapshot> {
15357        if !self.phase_config.generate_compliance_regulations {
15358            return Ok(ComplianceRegulationsSnapshot::default());
15359        }
15360
15361        info!("Phase: Generating Compliance Regulations Data");
15362
15363        let cr_config = &self.config.compliance_regulations;
15364
15365        // Determine jurisdictions: from config or inferred from companies
15366        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15367            self.config
15368                .companies
15369                .iter()
15370                .map(|c| c.country.clone())
15371                .collect::<std::collections::HashSet<_>>()
15372                .into_iter()
15373                .collect()
15374        } else {
15375            cr_config.jurisdictions.clone()
15376        };
15377
15378        // Determine reference date
15379        let fallback_date =
15380            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15381        let reference_date = cr_config
15382            .reference_date
15383            .as_ref()
15384            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15385            .unwrap_or_else(|| {
15386                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15387                    .unwrap_or(fallback_date)
15388            });
15389
15390        // Generate standards registry data
15391        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15392        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15393        let cross_reference_records = reg_gen.generate_cross_reference_records();
15394        let jurisdiction_records =
15395            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15396
15397        info!(
15398            "  Standards: {} records, {} cross-references, {} jurisdictions",
15399            standard_records.len(),
15400            cross_reference_records.len(),
15401            jurisdiction_records.len()
15402        );
15403
15404        // Generate audit procedures (if enabled)
15405        let audit_procedures = if cr_config.audit_procedures.enabled {
15406            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15407                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15408                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15409                confidence_level: cr_config.audit_procedures.confidence_level,
15410                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15411            };
15412            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15413                self.seed + 9000,
15414                proc_config,
15415            );
15416            let registry = reg_gen.registry();
15417            let mut all_procs = Vec::new();
15418            for jurisdiction in &jurisdictions {
15419                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15420                all_procs.extend(procs);
15421            }
15422            info!("  Audit procedures: {}", all_procs.len());
15423            all_procs
15424        } else {
15425            Vec::new()
15426        };
15427
15428        // Generate compliance findings (if enabled)
15429        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15430            let finding_config =
15431                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15432                    finding_rate: cr_config.findings.finding_rate,
15433                    material_weakness_rate: cr_config.findings.material_weakness_rate,
15434                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15435                    generate_remediation: cr_config.findings.generate_remediation,
15436                };
15437            let mut finding_gen =
15438                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15439                    self.seed + 9100,
15440                    finding_config,
15441                );
15442            let mut all_findings = Vec::new();
15443            for company in &self.config.companies {
15444                let company_findings =
15445                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15446                all_findings.extend(company_findings);
15447            }
15448            info!("  Compliance findings: {}", all_findings.len());
15449            all_findings
15450        } else {
15451            Vec::new()
15452        };
15453
15454        // Generate regulatory filings (if enabled)
15455        let filings = if cr_config.filings.enabled {
15456            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15457                filing_types: cr_config.filings.filing_types.clone(),
15458                generate_status_progression: cr_config.filings.generate_status_progression,
15459            };
15460            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15461                self.seed + 9200,
15462                filing_config,
15463            );
15464            let company_codes: Vec<String> = self
15465                .config
15466                .companies
15467                .iter()
15468                .map(|c| c.code.clone())
15469                .collect();
15470            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15471                .unwrap_or(fallback_date);
15472            let filings = filing_gen.generate_filings(
15473                &company_codes,
15474                &jurisdictions,
15475                start_date,
15476                self.config.global.period_months,
15477            );
15478            info!("  Regulatory filings: {}", filings.len());
15479            filings
15480        } else {
15481            Vec::new()
15482        };
15483
15484        // Build compliance graph (if enabled)
15485        let compliance_graph = if cr_config.graph.enabled {
15486            let graph_config = datasynth_graph::ComplianceGraphConfig {
15487                include_standard_nodes: cr_config.graph.include_compliance_nodes,
15488                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15489                include_cross_references: cr_config.graph.include_cross_references,
15490                include_supersession_edges: cr_config.graph.include_supersession_edges,
15491                include_account_links: cr_config.graph.include_account_links,
15492                include_control_links: cr_config.graph.include_control_links,
15493                include_company_links: cr_config.graph.include_company_links,
15494            };
15495            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15496
15497            // Add standard nodes
15498            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15499                .iter()
15500                .map(|r| datasynth_graph::StandardNodeInput {
15501                    standard_id: r.standard_id.clone(),
15502                    title: r.title.clone(),
15503                    category: r.category.clone(),
15504                    domain: r.domain.clone(),
15505                    is_active: r.is_active,
15506                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
15507                    applicable_account_types: r.applicable_account_types.clone(),
15508                    applicable_processes: r.applicable_processes.clone(),
15509                })
15510                .collect();
15511            builder.add_standards(&standard_inputs);
15512
15513            // Add jurisdiction nodes
15514            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15515                jurisdiction_records
15516                    .iter()
15517                    .map(|r| datasynth_graph::JurisdictionNodeInput {
15518                        country_code: r.country_code.clone(),
15519                        country_name: r.country_name.clone(),
15520                        framework: r.accounting_framework.clone(),
15521                        standard_count: r.standard_count,
15522                        tax_rate: r.statutory_tax_rate,
15523                    })
15524                    .collect();
15525            builder.add_jurisdictions(&jurisdiction_inputs);
15526
15527            // Add cross-reference edges
15528            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15529                cross_reference_records
15530                    .iter()
15531                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15532                        from_standard: r.from_standard.clone(),
15533                        to_standard: r.to_standard.clone(),
15534                        relationship: r.relationship.clone(),
15535                        convergence_level: r.convergence_level,
15536                    })
15537                    .collect();
15538            builder.add_cross_references(&xref_inputs);
15539
15540            // Add jurisdiction→standard mappings
15541            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15542                .iter()
15543                .map(|r| datasynth_graph::JurisdictionMappingInput {
15544                    country_code: r.jurisdiction.clone(),
15545                    standard_id: r.standard_id.clone(),
15546                })
15547                .collect();
15548            builder.add_jurisdiction_mappings(&mapping_inputs);
15549
15550            // Add procedure nodes
15551            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15552                .iter()
15553                .map(|p| datasynth_graph::ProcedureNodeInput {
15554                    procedure_id: p.procedure_id.clone(),
15555                    standard_id: p.standard_id.clone(),
15556                    procedure_type: p.procedure_type.clone(),
15557                    sample_size: p.sample_size,
15558                    confidence_level: p.confidence_level,
15559                })
15560                .collect();
15561            builder.add_procedures(&proc_inputs);
15562
15563            // Add finding nodes
15564            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15565                .iter()
15566                .map(|f| datasynth_graph::FindingNodeInput {
15567                    finding_id: f.finding_id.to_string(),
15568                    standard_id: f
15569                        .related_standards
15570                        .first()
15571                        .map(|s| s.as_str().to_string())
15572                        .unwrap_or_default(),
15573                    severity: f.severity.to_string(),
15574                    deficiency_level: f.deficiency_level.to_string(),
15575                    severity_score: f.deficiency_level.severity_score(),
15576                    control_id: f.control_id.clone(),
15577                    affected_accounts: f.affected_accounts.clone(),
15578                })
15579                .collect();
15580            builder.add_findings(&finding_inputs);
15581
15582            // Cross-domain: link standards to accounts from chart of accounts
15583            if cr_config.graph.include_account_links {
15584                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15585                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15586                for std_record in &standard_records {
15587                    if let Some(std_obj) =
15588                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15589                            &std_record.standard_id,
15590                        ))
15591                    {
15592                        for acct_type in &std_obj.applicable_account_types {
15593                            account_links.push(datasynth_graph::AccountLinkInput {
15594                                standard_id: std_record.standard_id.clone(),
15595                                account_code: acct_type.clone(),
15596                                account_name: acct_type.clone(),
15597                            });
15598                        }
15599                    }
15600                }
15601                builder.add_account_links(&account_links);
15602            }
15603
15604            // Cross-domain: link standards to internal controls
15605            if cr_config.graph.include_control_links {
15606                let mut control_links = Vec::new();
15607                // SOX/PCAOB standards link to all controls
15608                let sox_like_ids: Vec<String> = standard_records
15609                    .iter()
15610                    .filter(|r| {
15611                        r.standard_id.starts_with("SOX")
15612                            || r.standard_id.starts_with("PCAOB-AS-2201")
15613                    })
15614                    .map(|r| r.standard_id.clone())
15615                    .collect();
15616                // Get control IDs from config (C001-C060 standard controls)
15617                let control_ids = [
15618                    ("C001", "Cash Controls"),
15619                    ("C002", "Large Transaction Approval"),
15620                    ("C010", "PO Approval"),
15621                    ("C011", "Three-Way Match"),
15622                    ("C020", "Revenue Recognition"),
15623                    ("C021", "Credit Check"),
15624                    ("C030", "Manual JE Approval"),
15625                    ("C031", "Period Close Review"),
15626                    ("C032", "Account Reconciliation"),
15627                    ("C040", "Payroll Processing"),
15628                    ("C050", "Fixed Asset Capitalization"),
15629                    ("C060", "Intercompany Elimination"),
15630                ];
15631                for sox_id in &sox_like_ids {
15632                    for (ctrl_id, ctrl_name) in &control_ids {
15633                        control_links.push(datasynth_graph::ControlLinkInput {
15634                            standard_id: sox_id.clone(),
15635                            control_id: ctrl_id.to_string(),
15636                            control_name: ctrl_name.to_string(),
15637                        });
15638                    }
15639                }
15640                builder.add_control_links(&control_links);
15641            }
15642
15643            // Cross-domain: filing nodes with company links
15644            if cr_config.graph.include_company_links {
15645                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15646                    .iter()
15647                    .enumerate()
15648                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15649                        filing_id: format!("F{:04}", i + 1),
15650                        filing_type: f.filing_type.to_string(),
15651                        company_code: f.company_code.clone(),
15652                        jurisdiction: f.jurisdiction.clone(),
15653                        status: format!("{:?}", f.status),
15654                    })
15655                    .collect();
15656                builder.add_filings(&filing_inputs);
15657            }
15658
15659            let graph = builder.build();
15660            info!(
15661                "  Compliance graph: {} nodes, {} edges",
15662                graph.nodes.len(),
15663                graph.edges.len()
15664            );
15665            Some(graph)
15666        } else {
15667            None
15668        };
15669
15670        self.check_resources_with_log("post-compliance-regulations")?;
15671
15672        Ok(ComplianceRegulationsSnapshot {
15673            standard_records,
15674            cross_reference_records,
15675            jurisdiction_records,
15676            audit_procedures,
15677            findings,
15678            filings,
15679            compliance_graph,
15680        })
15681    }
15682
15683    /// Build a lineage graph describing config → phase → output relationships.
15684    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15685        use super::lineage::LineageGraphBuilder;
15686
15687        let mut builder = LineageGraphBuilder::new();
15688
15689        // Config sections
15690        builder.add_config_section("config:global", "Global Config");
15691        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15692        builder.add_config_section("config:transactions", "Transaction Config");
15693
15694        // Generator phases
15695        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15696        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15697
15698        // Config → phase edges
15699        builder.configured_by("phase:coa", "config:chart_of_accounts");
15700        builder.configured_by("phase:je", "config:transactions");
15701
15702        // Output files
15703        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15704        builder.produced_by("output:je", "phase:je");
15705
15706        // Optional phases based on config
15707        if self.phase_config.generate_master_data {
15708            builder.add_config_section("config:master_data", "Master Data Config");
15709            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15710            builder.configured_by("phase:master_data", "config:master_data");
15711            builder.input_to("phase:master_data", "phase:je");
15712        }
15713
15714        if self.phase_config.generate_document_flows {
15715            builder.add_config_section("config:document_flows", "Document Flow Config");
15716            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15717            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15718            builder.configured_by("phase:p2p", "config:document_flows");
15719            builder.configured_by("phase:o2c", "config:document_flows");
15720
15721            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15722            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15723            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15724            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15725            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15726
15727            builder.produced_by("output:po", "phase:p2p");
15728            builder.produced_by("output:gr", "phase:p2p");
15729            builder.produced_by("output:vi", "phase:p2p");
15730            builder.produced_by("output:so", "phase:o2c");
15731            builder.produced_by("output:ci", "phase:o2c");
15732        }
15733
15734        if self.phase_config.inject_anomalies {
15735            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15736            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15737            builder.configured_by("phase:anomaly", "config:fraud");
15738            builder.add_output_file(
15739                "output:labels",
15740                "Anomaly Labels",
15741                "labels/anomaly_labels.csv",
15742            );
15743            builder.produced_by("output:labels", "phase:anomaly");
15744        }
15745
15746        if self.phase_config.generate_audit {
15747            builder.add_config_section("config:audit", "Audit Config");
15748            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15749            builder.configured_by("phase:audit", "config:audit");
15750        }
15751
15752        if self.phase_config.generate_banking {
15753            builder.add_config_section("config:banking", "Banking Config");
15754            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15755            builder.configured_by("phase:banking", "config:banking");
15756        }
15757
15758        if self.config.llm.enabled {
15759            builder.add_config_section("config:llm", "LLM Enrichment Config");
15760            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15761            builder.configured_by("phase:llm_enrichment", "config:llm");
15762        }
15763
15764        if self.config.diffusion.enabled {
15765            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15766            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15767            builder.configured_by("phase:diffusion", "config:diffusion");
15768        }
15769
15770        if self.config.causal.enabled {
15771            builder.add_config_section("config:causal", "Causal Generation Config");
15772            builder.add_generator_phase("phase:causal", "Causal Overlay");
15773            builder.configured_by("phase:causal", "config:causal");
15774        }
15775
15776        builder.build()
15777    }
15778
15779    // -----------------------------------------------------------------------
15780    // Trial-balance helpers used to replace hardcoded proxy values
15781    // -----------------------------------------------------------------------
15782
15783    /// Compute total revenue for a company from its journal entries.
15784    ///
15785    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15786    /// net credits on all revenue-account lines filtered to `company_code`.
15787    fn compute_company_revenue(
15788        entries: &[JournalEntry],
15789        company_code: &str,
15790    ) -> rust_decimal::Decimal {
15791        use rust_decimal::Decimal;
15792        let mut revenue = Decimal::ZERO;
15793        for je in entries {
15794            if je.header.company_code != company_code {
15795                continue;
15796            }
15797            for line in &je.lines {
15798                if line.gl_account.starts_with('4') {
15799                    // Revenue is credit-normal
15800                    revenue += line.credit_amount - line.debit_amount;
15801                }
15802            }
15803        }
15804        revenue.max(Decimal::ZERO)
15805    }
15806
15807    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15808    ///
15809    /// Asset accounts start with "1"; liability accounts start with "2".
15810    fn compute_entity_net_assets(
15811        entries: &[JournalEntry],
15812        entity_code: &str,
15813    ) -> rust_decimal::Decimal {
15814        use rust_decimal::Decimal;
15815        let mut asset_net = Decimal::ZERO;
15816        let mut liability_net = Decimal::ZERO;
15817        for je in entries {
15818            if je.header.company_code != entity_code {
15819                continue;
15820            }
15821            for line in &je.lines {
15822                if line.gl_account.starts_with('1') {
15823                    asset_net += line.debit_amount - line.credit_amount;
15824                } else if line.gl_account.starts_with('2') {
15825                    liability_net += line.credit_amount - line.debit_amount;
15826                }
15827            }
15828        }
15829        asset_net - liability_net
15830    }
15831
15832    /// v3.5.1+: Run the statistical validation suite configured in
15833    /// `distributions.validation.tests` over the final amount
15834    /// distribution.  Collects every non-zero line-level amount (debit +
15835    /// credit) and hands it to the runners in
15836    /// `datasynth_core::distributions::validation`.
15837    ///
15838    /// Returns `Ok(None)` when validation is disabled (the default).
15839    /// When `reporting.fail_on_error = true` and any test fails, returns
15840    /// `Err` with a concise message; otherwise attaches the report to
15841    /// the result and lets callers inspect it.
15842    fn phase_statistical_validation(
15843        &self,
15844        entries: &[JournalEntry],
15845    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15846        use datasynth_config::schema::StatisticalTestConfig;
15847        use datasynth_core::distributions::{
15848            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15849            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15850        };
15851        use rust_decimal::prelude::ToPrimitive;
15852
15853        let cfg = &self.config.distributions.validation;
15854        if !cfg.enabled {
15855            return Ok(None);
15856        }
15857
15858        // Collect per-line positive amounts (debit + credit is zero on the
15859        // non-posting side, so this naturally picks the magnitude).
15860        let amounts: Vec<rust_decimal::Decimal> = entries
15861            .iter()
15862            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15863            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15864            .collect();
15865
15866        // v4.1.0+ paired (amount, line_count) per entry for correlation
15867        // checks. Amount per entry is the debit-side total (= credit-side
15868        // total for a balanced entry).
15869        let paired_amount_linecount: Vec<(f64, f64)> = entries
15870            .iter()
15871            .filter_map(|je| {
15872                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15873                if amt > rust_decimal::Decimal::ZERO {
15874                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15875                } else {
15876                    None
15877                }
15878            })
15879            .collect();
15880
15881        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15882        for test_cfg in &cfg.tests {
15883            match test_cfg {
15884                StatisticalTestConfig::BenfordFirstDigit {
15885                    threshold_mad,
15886                    warning_mad,
15887                } => {
15888                    results.push(run_benford_first_digit(
15889                        &amounts,
15890                        *threshold_mad,
15891                        *warning_mad,
15892                    ));
15893                }
15894                StatisticalTestConfig::ChiSquared { bins, significance } => {
15895                    results.push(run_chi_squared(&amounts, *bins, *significance));
15896                }
15897                StatisticalTestConfig::DistributionFit {
15898                    target: _,
15899                    ks_significance,
15900                    method: _,
15901                } => {
15902                    // v3.5.1+: log-uniformity KS check. Target-specific
15903                    // fits against Normal / Exponential land in v4.1.1+.
15904                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15905                }
15906                StatisticalTestConfig::AndersonDarling {
15907                    target: _,
15908                    significance,
15909                } => {
15910                    // v4.1.0+: A*² statistic against log-normal on the
15911                    // log-scale. Other targets follow the same pattern.
15912                    results.push(run_anderson_darling(&amounts, *significance));
15913                }
15914                StatisticalTestConfig::CorrelationCheck {
15915                    expected_correlations,
15916                } => {
15917                    // v4.1.0+: (amount, line_count) is tracked today.
15918                    // Other pairs resolve to Skipped pending richer
15919                    // per-entry attribute collection.
15920                    if expected_correlations.is_empty() {
15921                        results.push(StatisticalTestResult {
15922                            name: "correlation_check".to_string(),
15923                            outcome: TestOutcome::Skipped,
15924                            statistic: 0.0,
15925                            threshold: 0.0,
15926                            message: "no expected correlations declared".to_string(),
15927                        });
15928                    } else {
15929                        for ec in expected_correlations {
15930                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15931                            let is_amount_linecount = (ec.field1 == "amount"
15932                                && ec.field2 == "line_count")
15933                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15934                            if is_amount_linecount {
15935                                let xs: Vec<f64> =
15936                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15937                                let ys: Vec<f64> =
15938                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15939                                results.push(run_correlation_check(
15940                                    &pair_key,
15941                                    &xs,
15942                                    &ys,
15943                                    ec.expected_r,
15944                                    ec.tolerance,
15945                                ));
15946                            } else {
15947                                results.push(StatisticalTestResult {
15948                                    name: format!("correlation_check_{pair_key}"),
15949                                    outcome: TestOutcome::Skipped,
15950                                    statistic: 0.0,
15951                                    threshold: ec.tolerance,
15952                                    message: format!(
15953                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15954                                        ec.field1, ec.field2
15955                                    ),
15956                                });
15957                            }
15958                        }
15959                    }
15960                }
15961            }
15962        }
15963
15964        let report = StatisticalValidationReport {
15965            sample_count: amounts.len(),
15966            results,
15967        };
15968
15969        if cfg.reporting.fail_on_error && !report.all_passed() {
15970            let failed = report.failed_names().join(", ");
15971            return Err(SynthError::validation(format!(
15972                "statistical validation failed: {failed}"
15973            )));
15974        }
15975
15976        Ok(Some(report))
15977    }
15978
15979    /// v3.3.0: analytics-metadata phase.
15980    ///
15981    /// Runs AFTER all JE-adding phases (including Phase 20b's
15982    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15983    /// gated by an individual `analytics_metadata.<flag>` toggle:
15984    ///
15985    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15986    ///    current-period account balances.
15987    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15988    ///    configured `global.industry`.
15989    /// 3. `ManagementReportGenerator` — management-report artefacts.
15990    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15991    fn phase_analytics_metadata(
15992        &mut self,
15993        entries: &[JournalEntry],
15994    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15995        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15996        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15997        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15998        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15999        use std::collections::BTreeMap;
16000
16001        let mut snap = AnalyticsMetadataSnapshot::default();
16002
16003        if !self.phase_config.generate_analytics_metadata {
16004            return Ok(snap);
16005        }
16006
16007        let cfg = &self.config.analytics_metadata;
16008        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16009            .map(|d| d.year())
16010            .unwrap_or(2025);
16011
16012        // ---- 1. Prior-year comparatives ----
16013        if cfg.prior_year {
16014            let mut gen = PriorYearGenerator::new(self.seed + 9100);
16015            for company in &self.config.companies {
16016                // Aggregate current-period balances per account code +
16017                // account name from the entries slice.
16018                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16019                    BTreeMap::new();
16020                for je in entries {
16021                    if je.header.company_code != company.code {
16022                        continue;
16023                    }
16024                    for line in &je.lines {
16025                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16026                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16027                        });
16028                        entry.1 += line.debit_amount - line.credit_amount;
16029                    }
16030                }
16031                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16032                    .into_iter()
16033                    .filter(|(_, (_, bal))| !bal.is_zero())
16034                    .map(|(code, (name, bal))| (code, name, bal))
16035                    .collect();
16036                if !current.is_empty() {
16037                    let comparatives =
16038                        gen.generate_comparatives(&company.code, fiscal_year, &current);
16039                    snap.prior_year_comparatives.extend(comparatives);
16040                }
16041            }
16042            info!(
16043                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16044                snap.prior_year_comparatives.len(),
16045                self.config.companies.len()
16046            );
16047        }
16048
16049        // ---- 2. Industry benchmarks ----
16050        if cfg.industry_benchmark {
16051            use datasynth_core::models::IndustrySector;
16052            let industry = match self.config.global.industry {
16053                IndustrySector::Manufacturing => "manufacturing",
16054                IndustrySector::Retail => "retail",
16055                IndustrySector::FinancialServices => "financial_services",
16056                IndustrySector::Technology => "technology",
16057                IndustrySector::Healthcare => "healthcare",
16058                _ => "other",
16059            };
16060            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16061            let benchmarks = gen.generate(industry, fiscal_year);
16062            info!(
16063                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16064                benchmarks.len()
16065            );
16066            snap.industry_benchmarks = benchmarks;
16067        }
16068
16069        // ---- 3. Management reports ----
16070        if cfg.management_reports {
16071            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16072            let period_months = self.config.global.period_months;
16073            for company in &self.config.companies {
16074                let reports =
16075                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16076                snap.management_reports.extend(reports);
16077            }
16078            info!(
16079                "v3.3.0 analytics: {} management reports across {} companies",
16080                snap.management_reports.len(),
16081                self.config.companies.len()
16082            );
16083        }
16084
16085        // ---- 4. Drift-event labels ----
16086        if cfg.drift_events {
16087            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16088                .expect("hardcoded NaiveDate 2025-01-01 is valid");
16089            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16090                .unwrap_or(fallback_start);
16091            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16092            let mut gen = DriftEventGenerator::new(self.seed + 9400);
16093            let drifts = gen.generate_standalone_drifts(start_date, end_date);
16094            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16095            snap.drift_events = drifts;
16096        }
16097        // `entries` parameter reserved for future JE-aware drift detection
16098        let _ = entries;
16099
16100        Ok(snap)
16101    }
16102}
16103
16104/// Get the directory name for a graph export format.
16105fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16106    match format {
16107        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16108        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16109        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16110        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16111        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16112    }
16113}
16114
16115/// Aggregate journal entry lines into per-account trial balance rows.
16116///
16117/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
16118/// debit/credit totals and a net balance (debit minus credit).
16119fn compute_trial_balance_entries(
16120    entries: &[JournalEntry],
16121    entity_code: &str,
16122    fiscal_year: i32,
16123    coa: Option<&ChartOfAccounts>,
16124) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16125    use std::collections::BTreeMap;
16126
16127    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16128        BTreeMap::new();
16129
16130    for je in entries {
16131        for line in &je.lines {
16132            let entry = balances.entry(line.account_code.clone()).or_default();
16133            entry.0 += line.debit_amount;
16134            entry.1 += line.credit_amount;
16135        }
16136    }
16137
16138    balances
16139        .into_iter()
16140        .map(
16141            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16142                account_description: coa
16143                    .and_then(|c| c.get_account(&account_code))
16144                    .map(|a| a.description().to_string())
16145                    .unwrap_or_else(|| account_code.clone()),
16146                account_code,
16147                debit_balance: debit,
16148                credit_balance: credit,
16149                net_balance: debit - credit,
16150                entity_code: entity_code.to_string(),
16151                period: format!("FY{}", fiscal_year),
16152            },
16153        )
16154        .collect()
16155}
16156
16157#[cfg(test)]
16158mod tests {
16159    use super::*;
16160    use datasynth_config::schema::*;
16161
16162    fn create_test_config() -> GeneratorConfig {
16163        GeneratorConfig {
16164            global: GlobalConfig {
16165                industry: IndustrySector::Manufacturing,
16166                start_date: "2024-01-01".to_string(),
16167                period_months: 1,
16168                seed: Some(42),
16169                parallel: false,
16170                group_currency: "USD".to_string(),
16171                presentation_currency: None,
16172                worker_threads: 0,
16173                memory_limit_mb: 0,
16174                fiscal_year_months: None,
16175            },
16176            companies: vec![CompanyConfig {
16177                code: "1000".to_string(),
16178                name: "Test Company".to_string(),
16179                currency: "USD".to_string(),
16180                functional_currency: None,
16181                country: "US".to_string(),
16182                annual_transaction_volume: TransactionVolume::TenK,
16183                volume_weight: 1.0,
16184                fiscal_year_variant: "K4".to_string(),
16185            }],
16186            chart_of_accounts: ChartOfAccountsConfig {
16187                complexity: CoAComplexity::Small,
16188                industry_specific: true,
16189                custom_accounts: None,
16190                min_hierarchy_depth: 2,
16191                max_hierarchy_depth: 4,
16192                expand_industry_subaccounts: false,
16193            },
16194            transactions: TransactionConfig::default(),
16195            output: OutputConfig::default(),
16196            fraud: FraudConfig::default(),
16197            internal_controls: InternalControlsConfig::default(),
16198            business_processes: BusinessProcessConfig::default(),
16199            user_personas: UserPersonaConfig::default(),
16200            templates: TemplateConfig::default(),
16201            approval: ApprovalConfig::default(),
16202            departments: DepartmentConfig::default(),
16203            master_data: MasterDataConfig::default(),
16204            document_flows: DocumentFlowConfig::default(),
16205            intercompany: IntercompanyConfig::default(),
16206            balance: BalanceConfig::default(),
16207            ocpm: OcpmConfig::default(),
16208            audit: AuditGenerationConfig::default(),
16209            banking: datasynth_banking::BankingConfig::default(),
16210            data_quality: DataQualitySchemaConfig::default(),
16211            scenario: ScenarioConfig::default(),
16212            temporal: TemporalDriftConfig::default(),
16213            graph_export: GraphExportConfig::default(),
16214            streaming: StreamingSchemaConfig::default(),
16215            rate_limit: RateLimitSchemaConfig::default(),
16216            temporal_attributes: TemporalAttributeSchemaConfig::default(),
16217            relationships: RelationshipSchemaConfig::default(),
16218            accounting_standards: AccountingStandardsConfig::default(),
16219            audit_standards: AuditStandardsConfig::default(),
16220            distributions: Default::default(),
16221            temporal_patterns: Default::default(),
16222            vendor_network: VendorNetworkSchemaConfig::default(),
16223            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16224            relationship_strength: RelationshipStrengthSchemaConfig::default(),
16225            cross_process_links: CrossProcessLinksSchemaConfig::default(),
16226            organizational_events: OrganizationalEventsSchemaConfig::default(),
16227            behavioral_drift: BehavioralDriftSchemaConfig::default(),
16228            market_drift: MarketDriftSchemaConfig::default(),
16229            drift_labeling: DriftLabelingSchemaConfig::default(),
16230            anomaly_injection: Default::default(),
16231            industry_specific: Default::default(),
16232            fingerprint_privacy: Default::default(),
16233            quality_gates: Default::default(),
16234            compliance: Default::default(),
16235            webhooks: Default::default(),
16236            llm: Default::default(),
16237            diffusion: Default::default(),
16238            causal: Default::default(),
16239            source_to_pay: Default::default(),
16240            financial_reporting: Default::default(),
16241            hr: Default::default(),
16242            manufacturing: Default::default(),
16243            sales_quotes: Default::default(),
16244            tax: Default::default(),
16245            treasury: Default::default(),
16246            project_accounting: Default::default(),
16247            esg: Default::default(),
16248            country_packs: None,
16249            scenarios: Default::default(),
16250            session: Default::default(),
16251            compliance_regulations: Default::default(),
16252            analytics_metadata: Default::default(),
16253            concentration: Default::default(),
16254        }
16255    }
16256
16257    #[test]
16258    fn test_enhanced_orchestrator_creation() {
16259        let config = create_test_config();
16260        let orchestrator = EnhancedOrchestrator::with_defaults(config);
16261        assert!(orchestrator.is_ok());
16262    }
16263
16264    #[test]
16265    fn test_minimal_generation() {
16266        let config = create_test_config();
16267        let phase_config = PhaseConfig {
16268            generate_master_data: false,
16269            generate_document_flows: false,
16270            generate_journal_entries: true,
16271            inject_anomalies: false,
16272            show_progress: false,
16273            ..Default::default()
16274        };
16275
16276        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16277        let result = orchestrator.generate();
16278
16279        assert!(result.is_ok());
16280        let result = result.unwrap();
16281        assert!(!result.journal_entries.is_empty());
16282    }
16283
16284    #[test]
16285    fn test_master_data_generation() {
16286        let config = create_test_config();
16287        let phase_config = PhaseConfig {
16288            generate_master_data: true,
16289            generate_document_flows: false,
16290            generate_journal_entries: false,
16291            inject_anomalies: false,
16292            show_progress: false,
16293            vendors_per_company: 5,
16294            customers_per_company: 5,
16295            materials_per_company: 10,
16296            assets_per_company: 5,
16297            employees_per_company: 10,
16298            ..Default::default()
16299        };
16300
16301        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16302        let result = orchestrator.generate().unwrap();
16303
16304        assert!(!result.master_data.vendors.is_empty());
16305        assert!(!result.master_data.customers.is_empty());
16306        assert!(!result.master_data.materials.is_empty());
16307    }
16308
16309    #[test]
16310    fn test_document_flow_generation() {
16311        let config = create_test_config();
16312        let phase_config = PhaseConfig {
16313            generate_master_data: true,
16314            generate_document_flows: true,
16315            generate_journal_entries: false,
16316            inject_anomalies: false,
16317            inject_data_quality: false,
16318            validate_balances: false,
16319            validate_coa_coverage_strict: false,
16320            generate_ocpm_events: false,
16321            show_progress: false,
16322            vendors_per_company: 5,
16323            customers_per_company: 5,
16324            materials_per_company: 10,
16325            assets_per_company: 5,
16326            employees_per_company: 10,
16327            p2p_chains: 5,
16328            o2c_chains: 5,
16329            ..Default::default()
16330        };
16331
16332        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16333        let result = orchestrator.generate().unwrap();
16334
16335        // Should have generated P2P and O2C chains
16336        assert!(!result.document_flows.p2p_chains.is_empty());
16337        assert!(!result.document_flows.o2c_chains.is_empty());
16338
16339        // Flattened documents should be populated
16340        assert!(!result.document_flows.purchase_orders.is_empty());
16341        assert!(!result.document_flows.sales_orders.is_empty());
16342    }
16343
16344    #[test]
16345    fn test_anomaly_injection() {
16346        let config = create_test_config();
16347        let phase_config = PhaseConfig {
16348            generate_master_data: false,
16349            generate_document_flows: false,
16350            generate_journal_entries: true,
16351            inject_anomalies: true,
16352            show_progress: false,
16353            ..Default::default()
16354        };
16355
16356        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16357        let result = orchestrator.generate().unwrap();
16358
16359        // Should have journal entries
16360        assert!(!result.journal_entries.is_empty());
16361
16362        // With ~833 entries and 2% rate, expect some anomalies
16363        // Note: This is probabilistic, so we just verify the structure exists
16364        assert!(result.anomaly_labels.summary.is_some());
16365    }
16366
16367    #[test]
16368    fn test_full_generation_pipeline() {
16369        let config = create_test_config();
16370        let phase_config = PhaseConfig {
16371            generate_master_data: true,
16372            generate_document_flows: true,
16373            generate_journal_entries: true,
16374            inject_anomalies: false,
16375            inject_data_quality: false,
16376            validate_balances: true,
16377            validate_coa_coverage_strict: false,
16378            generate_ocpm_events: false,
16379            show_progress: false,
16380            vendors_per_company: 3,
16381            customers_per_company: 3,
16382            materials_per_company: 5,
16383            assets_per_company: 3,
16384            employees_per_company: 5,
16385            p2p_chains: 3,
16386            o2c_chains: 3,
16387            ..Default::default()
16388        };
16389
16390        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16391        let result = orchestrator.generate().unwrap();
16392
16393        // All phases should have results
16394        assert!(!result.master_data.vendors.is_empty());
16395        assert!(!result.master_data.customers.is_empty());
16396        assert!(!result.document_flows.p2p_chains.is_empty());
16397        assert!(!result.document_flows.o2c_chains.is_empty());
16398        assert!(!result.journal_entries.is_empty());
16399        assert!(result.statistics.accounts_count > 0);
16400
16401        // Subledger linking should have run
16402        assert!(!result.subledger.ap_invoices.is_empty());
16403        assert!(!result.subledger.ar_invoices.is_empty());
16404
16405        // Balance validation should have run
16406        assert!(result.balance_validation.validated);
16407        assert!(result.balance_validation.entries_processed > 0);
16408    }
16409
16410    #[test]
16411    fn test_subledger_linking() {
16412        let config = create_test_config();
16413        let phase_config = PhaseConfig {
16414            generate_master_data: true,
16415            generate_document_flows: true,
16416            generate_journal_entries: false,
16417            inject_anomalies: false,
16418            inject_data_quality: false,
16419            validate_balances: false,
16420            validate_coa_coverage_strict: false,
16421            generate_ocpm_events: false,
16422            show_progress: false,
16423            vendors_per_company: 5,
16424            customers_per_company: 5,
16425            materials_per_company: 10,
16426            assets_per_company: 3,
16427            employees_per_company: 5,
16428            p2p_chains: 5,
16429            o2c_chains: 5,
16430            ..Default::default()
16431        };
16432
16433        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16434        let result = orchestrator.generate().unwrap();
16435
16436        // Should have document flows
16437        assert!(!result.document_flows.vendor_invoices.is_empty());
16438        assert!(!result.document_flows.customer_invoices.is_empty());
16439
16440        // Subledger should be linked from document flows
16441        assert!(!result.subledger.ap_invoices.is_empty());
16442        assert!(!result.subledger.ar_invoices.is_empty());
16443
16444        // AP invoices count should match vendor invoices count
16445        assert_eq!(
16446            result.subledger.ap_invoices.len(),
16447            result.document_flows.vendor_invoices.len()
16448        );
16449
16450        // AR invoices count should match customer invoices count
16451        assert_eq!(
16452            result.subledger.ar_invoices.len(),
16453            result.document_flows.customer_invoices.len()
16454        );
16455
16456        // Statistics should reflect subledger counts
16457        assert_eq!(
16458            result.statistics.ap_invoice_count,
16459            result.subledger.ap_invoices.len()
16460        );
16461        assert_eq!(
16462            result.statistics.ar_invoice_count,
16463            result.subledger.ar_invoices.len()
16464        );
16465    }
16466
16467    #[test]
16468    fn test_balance_validation() {
16469        let config = create_test_config();
16470        let phase_config = PhaseConfig {
16471            generate_master_data: false,
16472            generate_document_flows: false,
16473            generate_journal_entries: true,
16474            inject_anomalies: false,
16475            validate_balances: true,
16476            validate_coa_coverage_strict: false,
16477            show_progress: false,
16478            ..Default::default()
16479        };
16480
16481        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482        let result = orchestrator.generate().unwrap();
16483
16484        // Balance validation should run
16485        assert!(result.balance_validation.validated);
16486        assert!(result.balance_validation.entries_processed > 0);
16487
16488        // Generated JEs should be balanced (no unbalanced entries)
16489        assert!(!result.balance_validation.has_unbalanced_entries);
16490
16491        // Total debits should equal total credits
16492        assert_eq!(
16493            result.balance_validation.total_debits,
16494            result.balance_validation.total_credits
16495        );
16496    }
16497
16498    #[test]
16499    fn test_statistics_accuracy() {
16500        let config = create_test_config();
16501        let phase_config = PhaseConfig {
16502            generate_master_data: true,
16503            generate_document_flows: false,
16504            generate_journal_entries: true,
16505            inject_anomalies: false,
16506            show_progress: false,
16507            vendors_per_company: 10,
16508            customers_per_company: 20,
16509            materials_per_company: 15,
16510            assets_per_company: 5,
16511            employees_per_company: 8,
16512            ..Default::default()
16513        };
16514
16515        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16516        let result = orchestrator.generate().unwrap();
16517
16518        // Statistics should match actual data
16519        assert_eq!(
16520            result.statistics.vendor_count,
16521            result.master_data.vendors.len()
16522        );
16523        assert_eq!(
16524            result.statistics.customer_count,
16525            result.master_data.customers.len()
16526        );
16527        assert_eq!(
16528            result.statistics.material_count,
16529            result.master_data.materials.len()
16530        );
16531        assert_eq!(
16532            result.statistics.total_entries as usize,
16533            result.journal_entries.len()
16534        );
16535    }
16536
16537    #[test]
16538    fn test_phase_config_defaults() {
16539        let config = PhaseConfig::default();
16540        assert!(config.generate_master_data);
16541        assert!(config.generate_document_flows);
16542        assert!(config.generate_journal_entries);
16543        assert!(!config.inject_anomalies);
16544        assert!(config.validate_balances);
16545        assert!(config.show_progress);
16546        assert!(config.vendors_per_company > 0);
16547        assert!(config.customers_per_company > 0);
16548    }
16549
16550    #[test]
16551    fn test_get_coa_before_generation() {
16552        let config = create_test_config();
16553        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16554
16555        // Before generation, CoA should be None
16556        assert!(orchestrator.get_coa().is_none());
16557    }
16558
16559    #[test]
16560    fn test_get_coa_after_generation() {
16561        let config = create_test_config();
16562        let phase_config = PhaseConfig {
16563            generate_master_data: false,
16564            generate_document_flows: false,
16565            generate_journal_entries: true,
16566            inject_anomalies: false,
16567            show_progress: false,
16568            ..Default::default()
16569        };
16570
16571        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16572        let _ = orchestrator.generate().unwrap();
16573
16574        // After generation, CoA should be available
16575        assert!(orchestrator.get_coa().is_some());
16576    }
16577
16578    #[test]
16579    fn test_get_master_data() {
16580        let config = create_test_config();
16581        let phase_config = PhaseConfig {
16582            generate_master_data: true,
16583            generate_document_flows: false,
16584            generate_journal_entries: false,
16585            inject_anomalies: false,
16586            show_progress: false,
16587            vendors_per_company: 5,
16588            customers_per_company: 5,
16589            materials_per_company: 5,
16590            assets_per_company: 5,
16591            employees_per_company: 5,
16592            ..Default::default()
16593        };
16594
16595        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16596        let result = orchestrator.generate().unwrap();
16597
16598        // After generate(), master_data is moved into the result
16599        assert!(!result.master_data.vendors.is_empty());
16600    }
16601
16602    #[test]
16603    fn test_with_progress_builder() {
16604        let config = create_test_config();
16605        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16606            .unwrap()
16607            .with_progress(false);
16608
16609        // Should still work without progress
16610        assert!(!orchestrator.phase_config.show_progress);
16611    }
16612
16613    #[test]
16614    fn test_multi_company_generation() {
16615        let mut config = create_test_config();
16616        config.companies.push(CompanyConfig {
16617            code: "2000".to_string(),
16618            name: "Subsidiary".to_string(),
16619            currency: "EUR".to_string(),
16620            functional_currency: None,
16621            country: "DE".to_string(),
16622            annual_transaction_volume: TransactionVolume::TenK,
16623            volume_weight: 0.5,
16624            fiscal_year_variant: "K4".to_string(),
16625        });
16626
16627        let phase_config = PhaseConfig {
16628            generate_master_data: true,
16629            generate_document_flows: false,
16630            generate_journal_entries: true,
16631            inject_anomalies: false,
16632            show_progress: false,
16633            vendors_per_company: 5,
16634            customers_per_company: 5,
16635            materials_per_company: 5,
16636            assets_per_company: 5,
16637            employees_per_company: 5,
16638            ..Default::default()
16639        };
16640
16641        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16642        let result = orchestrator.generate().unwrap();
16643
16644        // Should have master data for both companies
16645        assert!(result.statistics.vendor_count >= 10); // 5 per company
16646        assert!(result.statistics.customer_count >= 10);
16647        assert!(result.statistics.companies_count == 2);
16648    }
16649
16650    #[test]
16651    fn test_empty_master_data_skips_document_flows() {
16652        let config = create_test_config();
16653        let phase_config = PhaseConfig {
16654            generate_master_data: false,   // Skip master data
16655            generate_document_flows: true, // Try to generate flows
16656            generate_journal_entries: false,
16657            inject_anomalies: false,
16658            show_progress: false,
16659            ..Default::default()
16660        };
16661
16662        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16663        let result = orchestrator.generate().unwrap();
16664
16665        // Without master data, document flows should be empty
16666        assert!(result.document_flows.p2p_chains.is_empty());
16667        assert!(result.document_flows.o2c_chains.is_empty());
16668    }
16669
16670    #[test]
16671    fn test_journal_entry_line_item_count() {
16672        let config = create_test_config();
16673        let phase_config = PhaseConfig {
16674            generate_master_data: false,
16675            generate_document_flows: false,
16676            generate_journal_entries: true,
16677            inject_anomalies: false,
16678            show_progress: false,
16679            ..Default::default()
16680        };
16681
16682        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16683        let result = orchestrator.generate().unwrap();
16684
16685        // Total line items should match sum of all entry line counts
16686        let calculated_line_items: u64 = result
16687            .journal_entries
16688            .iter()
16689            .map(|e| e.line_count() as u64)
16690            .sum();
16691        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16692    }
16693
16694    #[test]
16695    fn test_audit_generation() {
16696        let config = create_test_config();
16697        let phase_config = PhaseConfig {
16698            generate_master_data: false,
16699            generate_document_flows: false,
16700            generate_journal_entries: true,
16701            inject_anomalies: false,
16702            show_progress: false,
16703            generate_audit: true,
16704            audit_engagements: 2,
16705            workpapers_per_engagement: 5,
16706            evidence_per_workpaper: 2,
16707            risks_per_engagement: 3,
16708            findings_per_engagement: 2,
16709            judgments_per_engagement: 2,
16710            ..Default::default()
16711        };
16712
16713        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16714        let result = orchestrator.generate().unwrap();
16715
16716        // Should have generated audit data
16717        assert_eq!(result.audit.engagements.len(), 2);
16718        assert!(!result.audit.workpapers.is_empty());
16719        assert!(!result.audit.evidence.is_empty());
16720        assert!(!result.audit.risk_assessments.is_empty());
16721        assert!(!result.audit.findings.is_empty());
16722        assert!(!result.audit.judgments.is_empty());
16723
16724        // New ISA entity collections should also be populated
16725        assert!(
16726            !result.audit.confirmations.is_empty(),
16727            "ISA 505 confirmations should be generated"
16728        );
16729        assert!(
16730            !result.audit.confirmation_responses.is_empty(),
16731            "ISA 505 confirmation responses should be generated"
16732        );
16733        assert!(
16734            !result.audit.procedure_steps.is_empty(),
16735            "ISA 330 procedure steps should be generated"
16736        );
16737        // Samples may or may not be generated depending on workpaper sampling methods
16738        assert!(
16739            !result.audit.analytical_results.is_empty(),
16740            "ISA 520 analytical procedures should be generated"
16741        );
16742        assert!(
16743            !result.audit.ia_functions.is_empty(),
16744            "ISA 610 IA functions should be generated (one per engagement)"
16745        );
16746        assert!(
16747            !result.audit.related_parties.is_empty(),
16748            "ISA 550 related parties should be generated"
16749        );
16750
16751        // Statistics should match
16752        assert_eq!(
16753            result.statistics.audit_engagement_count,
16754            result.audit.engagements.len()
16755        );
16756        assert_eq!(
16757            result.statistics.audit_workpaper_count,
16758            result.audit.workpapers.len()
16759        );
16760        assert_eq!(
16761            result.statistics.audit_evidence_count,
16762            result.audit.evidence.len()
16763        );
16764        assert_eq!(
16765            result.statistics.audit_risk_count,
16766            result.audit.risk_assessments.len()
16767        );
16768        assert_eq!(
16769            result.statistics.audit_finding_count,
16770            result.audit.findings.len()
16771        );
16772        assert_eq!(
16773            result.statistics.audit_judgment_count,
16774            result.audit.judgments.len()
16775        );
16776        assert_eq!(
16777            result.statistics.audit_confirmation_count,
16778            result.audit.confirmations.len()
16779        );
16780        assert_eq!(
16781            result.statistics.audit_confirmation_response_count,
16782            result.audit.confirmation_responses.len()
16783        );
16784        assert_eq!(
16785            result.statistics.audit_procedure_step_count,
16786            result.audit.procedure_steps.len()
16787        );
16788        assert_eq!(
16789            result.statistics.audit_sample_count,
16790            result.audit.samples.len()
16791        );
16792        assert_eq!(
16793            result.statistics.audit_analytical_result_count,
16794            result.audit.analytical_results.len()
16795        );
16796        assert_eq!(
16797            result.statistics.audit_ia_function_count,
16798            result.audit.ia_functions.len()
16799        );
16800        assert_eq!(
16801            result.statistics.audit_ia_report_count,
16802            result.audit.ia_reports.len()
16803        );
16804        assert_eq!(
16805            result.statistics.audit_related_party_count,
16806            result.audit.related_parties.len()
16807        );
16808        assert_eq!(
16809            result.statistics.audit_related_party_transaction_count,
16810            result.audit.related_party_transactions.len()
16811        );
16812    }
16813
16814    #[test]
16815    fn test_new_phases_disabled_by_default() {
16816        let config = create_test_config();
16817        // Verify new config fields default to disabled
16818        assert!(!config.llm.enabled);
16819        assert!(!config.diffusion.enabled);
16820        assert!(!config.causal.enabled);
16821
16822        let phase_config = PhaseConfig {
16823            generate_master_data: false,
16824            generate_document_flows: false,
16825            generate_journal_entries: true,
16826            inject_anomalies: false,
16827            show_progress: false,
16828            ..Default::default()
16829        };
16830
16831        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16832        let result = orchestrator.generate().unwrap();
16833
16834        // All new phase statistics should be zero when disabled
16835        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16836        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16837        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16838        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16839        assert_eq!(result.statistics.causal_generation_ms, 0);
16840        assert_eq!(result.statistics.causal_samples_generated, 0);
16841        assert!(result.statistics.causal_validation_passed.is_none());
16842        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16843        assert!(result.counterfactual_pairs.is_empty());
16844    }
16845
16846    #[test]
16847    fn test_counterfactual_generation_enabled() {
16848        let config = create_test_config();
16849        let phase_config = PhaseConfig {
16850            generate_master_data: false,
16851            generate_document_flows: false,
16852            generate_journal_entries: true,
16853            inject_anomalies: false,
16854            show_progress: false,
16855            generate_counterfactuals: true,
16856            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16857            ..Default::default()
16858        };
16859
16860        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16861        let result = orchestrator.generate().unwrap();
16862
16863        // With JE generation enabled, counterfactual pairs should be generated
16864        if !result.journal_entries.is_empty() {
16865            assert_eq!(
16866                result.counterfactual_pairs.len(),
16867                result.journal_entries.len()
16868            );
16869            assert_eq!(
16870                result.statistics.counterfactual_pair_count,
16871                result.journal_entries.len()
16872            );
16873            // Each pair should have a distinct pair_id
16874            let ids: std::collections::HashSet<_> = result
16875                .counterfactual_pairs
16876                .iter()
16877                .map(|p| p.pair_id.clone())
16878                .collect();
16879            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16880        }
16881    }
16882
16883    #[test]
16884    fn test_llm_enrichment_enabled() {
16885        let mut config = create_test_config();
16886        config.llm.enabled = true;
16887        config.llm.max_vendor_enrichments = 3;
16888
16889        let phase_config = PhaseConfig {
16890            generate_master_data: true,
16891            generate_document_flows: false,
16892            generate_journal_entries: false,
16893            inject_anomalies: false,
16894            show_progress: false,
16895            vendors_per_company: 5,
16896            customers_per_company: 3,
16897            materials_per_company: 3,
16898            assets_per_company: 3,
16899            employees_per_company: 3,
16900            ..Default::default()
16901        };
16902
16903        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16904        let result = orchestrator.generate().unwrap();
16905
16906        // LLM enrichment should have run
16907        assert!(result.statistics.llm_vendors_enriched > 0);
16908        assert!(result.statistics.llm_vendors_enriched <= 3);
16909    }
16910
16911    #[test]
16912    fn test_diffusion_enhancement_enabled() {
16913        let mut config = create_test_config();
16914        config.diffusion.enabled = true;
16915        config.diffusion.n_steps = 50;
16916        config.diffusion.sample_size = 20;
16917
16918        let phase_config = PhaseConfig {
16919            generate_master_data: false,
16920            generate_document_flows: false,
16921            generate_journal_entries: true,
16922            inject_anomalies: false,
16923            show_progress: false,
16924            ..Default::default()
16925        };
16926
16927        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16928        let result = orchestrator.generate().unwrap();
16929
16930        // Diffusion phase should have generated samples
16931        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16932    }
16933
16934    #[test]
16935    fn test_causal_overlay_enabled() {
16936        let mut config = create_test_config();
16937        config.causal.enabled = true;
16938        config.causal.template = "fraud_detection".to_string();
16939        config.causal.sample_size = 100;
16940        config.causal.validate = true;
16941
16942        let phase_config = PhaseConfig {
16943            generate_master_data: false,
16944            generate_document_flows: false,
16945            generate_journal_entries: true,
16946            inject_anomalies: false,
16947            show_progress: false,
16948            ..Default::default()
16949        };
16950
16951        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16952        let result = orchestrator.generate().unwrap();
16953
16954        // Causal phase should have generated samples
16955        assert_eq!(result.statistics.causal_samples_generated, 100);
16956        // Validation should have run
16957        assert!(result.statistics.causal_validation_passed.is_some());
16958    }
16959
16960    #[test]
16961    fn test_causal_overlay_revenue_cycle_template() {
16962        let mut config = create_test_config();
16963        config.causal.enabled = true;
16964        config.causal.template = "revenue_cycle".to_string();
16965        config.causal.sample_size = 50;
16966        config.causal.validate = false;
16967
16968        let phase_config = PhaseConfig {
16969            generate_master_data: false,
16970            generate_document_flows: false,
16971            generate_journal_entries: true,
16972            inject_anomalies: false,
16973            show_progress: false,
16974            ..Default::default()
16975        };
16976
16977        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16978        let result = orchestrator.generate().unwrap();
16979
16980        // Causal phase should have generated samples
16981        assert_eq!(result.statistics.causal_samples_generated, 50);
16982        // Validation was disabled
16983        assert!(result.statistics.causal_validation_passed.is_none());
16984    }
16985
16986    #[test]
16987    fn test_all_new_phases_enabled_together() {
16988        let mut config = create_test_config();
16989        config.llm.enabled = true;
16990        config.llm.max_vendor_enrichments = 2;
16991        config.diffusion.enabled = true;
16992        config.diffusion.n_steps = 20;
16993        config.diffusion.sample_size = 10;
16994        config.causal.enabled = true;
16995        config.causal.sample_size = 50;
16996        config.causal.validate = true;
16997
16998        let phase_config = PhaseConfig {
16999            generate_master_data: true,
17000            generate_document_flows: false,
17001            generate_journal_entries: true,
17002            inject_anomalies: false,
17003            show_progress: false,
17004            vendors_per_company: 5,
17005            customers_per_company: 3,
17006            materials_per_company: 3,
17007            assets_per_company: 3,
17008            employees_per_company: 3,
17009            ..Default::default()
17010        };
17011
17012        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17013        let result = orchestrator.generate().unwrap();
17014
17015        // All three phases should have run
17016        assert!(result.statistics.llm_vendors_enriched > 0);
17017        assert_eq!(result.statistics.diffusion_samples_generated, 10);
17018        assert_eq!(result.statistics.causal_samples_generated, 50);
17019        assert!(result.statistics.causal_validation_passed.is_some());
17020    }
17021
17022    #[test]
17023    fn test_statistics_serialization_with_new_fields() {
17024        let stats = EnhancedGenerationStatistics {
17025            total_entries: 100,
17026            total_line_items: 500,
17027            llm_enrichment_ms: 42,
17028            llm_vendors_enriched: 10,
17029            diffusion_enhancement_ms: 100,
17030            diffusion_samples_generated: 50,
17031            causal_generation_ms: 200,
17032            causal_samples_generated: 100,
17033            causal_validation_passed: Some(true),
17034            ..Default::default()
17035        };
17036
17037        let json = serde_json::to_string(&stats).unwrap();
17038        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17039
17040        assert_eq!(deserialized.llm_enrichment_ms, 42);
17041        assert_eq!(deserialized.llm_vendors_enriched, 10);
17042        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17043        assert_eq!(deserialized.diffusion_samples_generated, 50);
17044        assert_eq!(deserialized.causal_generation_ms, 200);
17045        assert_eq!(deserialized.causal_samples_generated, 100);
17046        assert_eq!(deserialized.causal_validation_passed, Some(true));
17047    }
17048
17049    #[test]
17050    fn test_statistics_backward_compat_deserialization() {
17051        // Old JSON without the new fields should still deserialize
17052        let old_json = r#"{
17053            "total_entries": 100,
17054            "total_line_items": 500,
17055            "accounts_count": 50,
17056            "companies_count": 1,
17057            "period_months": 12,
17058            "vendor_count": 10,
17059            "customer_count": 20,
17060            "material_count": 15,
17061            "asset_count": 5,
17062            "employee_count": 8,
17063            "p2p_chain_count": 5,
17064            "o2c_chain_count": 5,
17065            "ap_invoice_count": 5,
17066            "ar_invoice_count": 5,
17067            "ocpm_event_count": 0,
17068            "ocpm_object_count": 0,
17069            "ocpm_case_count": 0,
17070            "audit_engagement_count": 0,
17071            "audit_workpaper_count": 0,
17072            "audit_evidence_count": 0,
17073            "audit_risk_count": 0,
17074            "audit_finding_count": 0,
17075            "audit_judgment_count": 0,
17076            "anomalies_injected": 0,
17077            "data_quality_issues": 0,
17078            "banking_customer_count": 0,
17079            "banking_account_count": 0,
17080            "banking_transaction_count": 0,
17081            "banking_suspicious_count": 0,
17082            "graph_export_count": 0,
17083            "graph_node_count": 0,
17084            "graph_edge_count": 0
17085        }"#;
17086
17087        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17088
17089        // New fields should default to 0 / None
17090        assert_eq!(stats.llm_enrichment_ms, 0);
17091        assert_eq!(stats.llm_vendors_enriched, 0);
17092        assert_eq!(stats.diffusion_enhancement_ms, 0);
17093        assert_eq!(stats.diffusion_samples_generated, 0);
17094        assert_eq!(stats.causal_generation_ms, 0);
17095        assert_eq!(stats.causal_samples_generated, 0);
17096        assert!(stats.causal_validation_passed.is_none());
17097    }
17098
17099    // ── v5.33 #162 — framework-aware TB classification ──────────────────────
17100
17101    #[test]
17102    fn category_from_account_code_us_gaap_unchanged() {
17103        // US-style numbering — same answers as the pre-v5.33 hard-coded table.
17104        assert_eq!(
17105            EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17106            "Cash"
17107        );
17108        assert_eq!(
17109            EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17110            "FixedAssets"
17111        );
17112        assert_eq!(
17113            EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17114            "Revenue"
17115        );
17116        assert_eq!(
17117            EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17118            "OperatingExpenses"
17119        );
17120    }
17121
17122    #[test]
17123    fn category_from_account_code_skr04_german() {
17124        // SKR04 (German GAAP): 0xxx = fixed assets, 4xxx = revenue,
17125        // 8xxx = tax/extraordinary expense — pre-v5.33 the US-only table
17126        // mis-classified 0xxx as OperatingExpenses (default arm), 4xxx as
17127        // Revenue (accidentally correct), and 8xxx as OtherExpenses.
17128        // Framework-aware version routes them correctly.
17129        assert_eq!(
17130            EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17131            "FixedAssets",
17132            "SKR 0xxx must be classified as fixed assets, not P&L"
17133        );
17134        assert_eq!(
17135            EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17136            "Cash"
17137        );
17138        assert_eq!(
17139            EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17140            "Receivables"
17141        );
17142        assert_eq!(
17143            EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17144            "Equity"
17145        );
17146        assert_eq!(
17147            EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17148            "Payables"
17149        );
17150        assert_eq!(
17151            EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17152            "Revenue"
17153        );
17154        assert_eq!(
17155            EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17156            "CostOfSales"
17157        );
17158        assert_eq!(
17159            EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17160            "OtherExpenses"
17161        );
17162    }
17163
17164    #[test]
17165    fn category_from_account_code_pcg_french() {
17166        // PCG (French GAAP): 2 = fixed assets, 5 = cash, 6 = expenses,
17167        // 7 = revenue. Pre-v5.33 these all hit the wrong US-prefix arms.
17168        assert_eq!(
17169            EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17170            "FixedAssets"
17171        );
17172        assert_eq!(
17173            EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17174            "Receivables"
17175        );
17176        assert_eq!(
17177            EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17178            "Payables"
17179        );
17180        assert_eq!(
17181            EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17182            "Cash"
17183        );
17184        assert_eq!(
17185            EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17186            "OperatingExpenses"
17187        );
17188        assert_eq!(
17189            EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17190            "Revenue"
17191        );
17192        assert_eq!(
17193            EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17194            "Equity"
17195        );
17196    }
17197
17198    #[test]
17199    fn is_balance_sheet_account_routes_skr_correctly() {
17200        // SKR04: 0xxx fixed assets, 1xxx current assets, 2xxx equity,
17201        // 3xxx liabilities → all BS.  4xxx revenue, 5-6 expenses → P&L.
17202        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17203            "0010",
17204            "german_gaap"
17205        ));
17206        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17207            "1200",
17208            "german_gaap"
17209        ));
17210        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17211            "2000",
17212            "german_gaap"
17213        ));
17214        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17215            "3000",
17216            "german_gaap"
17217        ));
17218        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17219            "4000",
17220            "german_gaap"
17221        ));
17222        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17223            "6000",
17224            "german_gaap"
17225        ));
17226    }
17227
17228    #[test]
17229    fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17230        // Defect C regression test — every TB line was hard-coded
17231        // `account_type: Asset` regardless of the underlying code. With
17232        // the framework-aware classifier wired in, the same SKR codes
17233        // resolve to their proper sides.
17234        use datasynth_generators::TrialBalanceEntry;
17235        let entries = vec![
17236            TrialBalanceEntry {
17237                account_code: "0010".to_string(), // SKR fixed asset
17238                account_name: "Land".to_string(),
17239                category: "FixedAssets".to_string(),
17240                debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17241                credit_balance: rust_decimal::Decimal::ZERO,
17242            },
17243            TrialBalanceEntry {
17244                account_code: "3000".to_string(), // SKR liability
17245                account_name: "Trade payables".to_string(),
17246                category: "Payables".to_string(),
17247                debit_balance: rust_decimal::Decimal::ZERO,
17248                credit_balance: rust_decimal::Decimal::new(500_000, 0),
17249            },
17250            TrialBalanceEntry {
17251                account_code: "4000".to_string(), // SKR revenue
17252                account_name: "Sales".to_string(),
17253                category: "Revenue".to_string(),
17254                debit_balance: rust_decimal::Decimal::ZERO,
17255                credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17256            },
17257            TrialBalanceEntry {
17258                account_code: "6000".to_string(), // SKR expense
17259                account_name: "Personnel cost".to_string(),
17260                category: "OperatingExpenses".to_string(),
17261                debit_balance: rust_decimal::Decimal::new(800_000, 0),
17262                credit_balance: rust_decimal::Decimal::ZERO,
17263            },
17264        ];
17265        let ptb = PeriodTrialBalance {
17266            fiscal_year: 2024,
17267            fiscal_period: 12,
17268            period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17269            period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17270            entries,
17271            framework: "german_gaap".to_string(),
17272        };
17273        let tb = ptb.into_canonical("ACME_EU", "EUR");
17274        // Line account_types are no longer all-Asset.
17275        let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17276        assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17277        assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17278        assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17279        assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17280        // is_balanced is now an unconditional truth claim — the
17281        // underlying JE-balance invariant is the only one we guarantee.
17282        assert!(tb.is_balanced);
17283        assert!(tb.is_equation_valid);
17284        assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17285        assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17286    }
17287
17288    #[test]
17289    fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17290        // Old in-memory snapshots (pre-v5.33) didn't carry the framework
17291        // field. Serde `#[serde(default)]` must let them round-trip with
17292        // a `"us_gaap"` fallback so older saved sessions keep working.
17293        let legacy_json = r#"{
17294            "fiscal_year": 2024,
17295            "fiscal_period": 12,
17296            "period_start": "2024-01-01",
17297            "period_end": "2024-12-31",
17298            "entries": []
17299        }"#;
17300        let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17301        assert_eq!(ptb.framework, "us_gaap");
17302    }
17303}