Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// `crate::output_writer::write_outputs`.
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826    /// Framework string for classifier dispatch in
827    /// [`PeriodTrialBalance::into_canonical`] (`"us_gaap"` / `"ifrs"` /
828    /// `"french_gaap"` / `"german_gaap"` / `"dual_reporting"`). Set by
829    /// the orchestrator at TB-emit time; defaults to `"us_gaap"` when
830    /// constructed by ad-hoc callers (e.g. test fixtures).
831    #[serde(default = "default_framework")]
832    pub framework: String,
833}
834
835fn default_framework() -> String {
836    "us_gaap".to_string()
837}
838
839impl PeriodTrialBalance {
840    /// Convert this in-memory period TB into the canonical
841    /// [`datasynth_core::models::balance::TrialBalance`] shape used
842    /// for the on-disk artefact.
843    ///
844    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
845    /// aggregate's `tb_loader` consumes the canonical type directly,
846    /// dropping the v5.0 dual-shape detection that converted from
847    /// `PeriodTrialBalance` JSON on the fly.
848    ///
849    /// v5.33: framework-aware classification — `category` and
850    /// `account_type` are now resolved via
851    /// [`datasynth_core::framework_accounts::FrameworkAccounts`] for the
852    /// framework recorded on `self.framework`, fixing the v5.32-and-prior
853    /// regression where every line was stamped `AccountType::Asset`
854    /// regardless of code (Defect C in the 3-year medium-chain
855    /// FINDINGS doc).
856    ///
857    /// The `is_balanced` / `is_equation_valid` flags are now set to
858    /// `true` with `out_of_balance` / `equation_difference` clamped to
859    /// zero. The interim-TB shape this writer produces is "cumulative
860    /// BS positions + period-only P&L", which is the standard adjusted
861    /// TB layout but has no `Σ debits == Σ credits` invariant — that
862    /// comparison is meaningful only for a gross-flow TB built from
863    /// fully-balanced JEs over a single time window. The integrity that
864    /// IS guaranteed is the underlying per-JE balance invariant
865    /// enforced by [`datasynth_core::models::journal_entry::JournalEntry::new`].
866    /// Downstream consumers that need a real signed-equation check
867    /// (`Σ A = Σ L + Σ E + NI`) should derive it from opening balances
868    /// plus the period-only P&L lines, not from the raw debit/credit
869    /// totals stamped here.
870    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
871        let framework = &self.framework;
872        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
873        let mut total_debits = Decimal::ZERO;
874        let mut total_credits = Decimal::ZERO;
875        let lines: Vec<TrialBalanceLine> = self
876            .entries
877            .into_iter()
878            .map(|e| {
879                total_debits += e.debit_balance;
880                total_credits += e.credit_balance;
881                let category =
882                    AccountCategory::from_account_code_with_framework(&e.account_code, framework);
883                let account_type = fa.classify_account_type(&e.account_code);
884                TrialBalanceLine {
885                    account_code: e.account_code,
886                    account_description: e.account_name,
887                    category,
888                    account_type,
889                    opening_balance: Decimal::ZERO,
890                    period_debits: e.debit_balance,
891                    period_credits: e.credit_balance,
892                    closing_balance: e.debit_balance - e.credit_balance,
893                    debit_balance: e.debit_balance,
894                    credit_balance: e.credit_balance,
895                    cost_center: None,
896                    profit_center: None,
897                }
898            })
899            .collect();
900        TrialBalance {
901            trial_balance_id: format!(
902                "{company_code}-{:04}{:02}",
903                self.fiscal_year, self.fiscal_period
904            ),
905            company_code: company_code.to_string(),
906            company_name: None,
907            as_of_date: self.period_end,
908            fiscal_year: self.fiscal_year as i32,
909            fiscal_period: self.fiscal_period as u32,
910            currency: currency.to_string(),
911            balance_type: TrialBalanceType::Adjusted,
912            lines,
913            total_debits,
914            total_credits,
915            is_balanced: true,
916            out_of_balance: Decimal::ZERO,
917            is_equation_valid: true,
918            equation_difference: Decimal::ZERO,
919            category_summary: std::collections::HashMap::new(),
920            created_at: self
921                .period_start
922                .and_hms_opt(0, 0, 0)
923                .expect("midnight is a valid time"),
924            created_by: "ORCHESTRATOR".to_string(),
925            approved_by: None,
926            approved_at: None,
927            status: TrialBalanceStatus::Final,
928        }
929    }
930}
931
932/// Financial reporting snapshot (financial statements + bank reconciliations).
933#[derive(Debug, Clone, Default)]
934pub struct FinancialReportingSnapshot {
935    /// Financial statements (balance sheet, income statement, cash flow).
936    /// For multi-entity configs this includes all standalone statements.
937    pub financial_statements: Vec<FinancialStatement>,
938    /// Standalone financial statements keyed by entity code.
939    /// Each entity has its own slice of statements.
940    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
941    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
942    pub consolidated_statements: Vec<FinancialStatement>,
943    /// Consolidation schedules (one per period) showing pre/post elimination detail.
944    pub consolidation_schedules: Vec<ConsolidationSchedule>,
945    /// Bank reconciliations.
946    pub bank_reconciliations: Vec<BankReconciliation>,
947    /// Period-close trial balances (one per period).
948    pub trial_balances: Vec<PeriodTrialBalance>,
949    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
950    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
951    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
952    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
953    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
954    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
955}
956
957/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
958#[derive(Debug, Clone, Default)]
959pub struct HrSnapshot {
960    /// Payroll runs (actual data).
961    pub payroll_runs: Vec<PayrollRun>,
962    /// Payroll line items (actual data).
963    pub payroll_line_items: Vec<PayrollLineItem>,
964    /// Time entries (actual data).
965    pub time_entries: Vec<TimeEntry>,
966    /// Expense reports (actual data).
967    pub expense_reports: Vec<ExpenseReport>,
968    /// Benefit enrollments (actual data).
969    pub benefit_enrollments: Vec<BenefitEnrollment>,
970    /// Defined benefit pension plans (IAS 19 / ASC 715).
971    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
972    /// Pension obligation (DBO) roll-forwards.
973    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
974    /// Plan asset roll-forwards.
975    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
976    /// Pension disclosures.
977    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
978    /// Journal entries generated from pension expense and OCI remeasurements.
979    pub pension_journal_entries: Vec<JournalEntry>,
980    /// Stock grants (ASC 718 / IFRS 2).
981    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
982    /// Stock-based compensation period expense records.
983    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
984    /// Journal entries generated from stock-based compensation expense.
985    pub stock_comp_journal_entries: Vec<JournalEntry>,
986    /// Payroll runs.
987    pub payroll_run_count: usize,
988    /// Payroll line item count.
989    pub payroll_line_item_count: usize,
990    /// Time entry count.
991    pub time_entry_count: usize,
992    /// Expense report count.
993    pub expense_report_count: usize,
994    /// Benefit enrollment count.
995    pub benefit_enrollment_count: usize,
996    /// Pension plan count.
997    pub pension_plan_count: usize,
998    /// Stock grant count.
999    pub stock_grant_count: usize,
1000}
1001
1002/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
1003#[derive(Debug, Clone, Default)]
1004pub struct AccountingStandardsSnapshot {
1005    /// Revenue recognition contracts (actual data).
1006    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1007    /// Impairment tests (actual data).
1008    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1009    /// Business combinations (IFRS 3 / ASC 805).
1010    pub business_combinations:
1011        Vec<datasynth_core::models::business_combination::BusinessCombination>,
1012    /// Journal entries generated from business combinations (Day 1 + amortization).
1013    pub business_combination_journal_entries: Vec<JournalEntry>,
1014    /// ECL models (IFRS 9 / ASC 326).
1015    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1016    /// ECL provision movements.
1017    pub ecl_provision_movements:
1018        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1019    /// Journal entries from ECL provision.
1020    pub ecl_journal_entries: Vec<JournalEntry>,
1021    /// Provisions (IAS 37 / ASC 450).
1022    pub provisions: Vec<datasynth_core::models::provision::Provision>,
1023    /// Provision movement roll-forwards (IAS 37 / ASC 450).
1024    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1025    /// Contingent liabilities (IAS 37 / ASC 450).
1026    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1027    /// Journal entries from provisions.
1028    pub provision_journal_entries: Vec<JournalEntry>,
1029    /// IAS 21 functional currency translation results (one per entity per period).
1030    pub currency_translation_results:
1031        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1032    /// Revenue recognition contract count.
1033    pub revenue_contract_count: usize,
1034    /// Impairment test count.
1035    pub impairment_test_count: usize,
1036    /// Business combination count.
1037    pub business_combination_count: usize,
1038    /// ECL model count.
1039    pub ecl_model_count: usize,
1040    /// Provision count.
1041    pub provision_count: usize,
1042    /// Currency translation result count (IAS 21).
1043    pub currency_translation_count: usize,
1044    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1045    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1046    /// ROU asset + lease liability details.
1047    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1048    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1049    pub fair_value_measurements:
1050        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1051    /// Framework difference records (dual-reporting only).
1052    pub framework_differences:
1053        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1054    /// Per-entity framework reconciliation (dual-reporting only).
1055    pub framework_reconciliations:
1056        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1057    /// Counts for stats logging.
1058    pub lease_count: usize,
1059    pub fair_value_measurement_count: usize,
1060    pub framework_difference_count: usize,
1061}
1062
1063/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1064#[derive(Debug, Clone, Default)]
1065pub struct ComplianceRegulationsSnapshot {
1066    /// Flattened standard records for output.
1067    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1068    /// Cross-reference records.
1069    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1070    /// Jurisdiction profile records.
1071    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1072    /// Generated audit procedures.
1073    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1074    /// Generated compliance findings.
1075    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1076    /// Generated regulatory filings.
1077    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1078    /// Compliance graph (if graph integration enabled).
1079    pub compliance_graph: Option<datasynth_graph::Graph>,
1080}
1081
1082/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1083#[derive(Debug, Clone, Default)]
1084pub struct ManufacturingSnapshot {
1085    /// Production orders (actual data).
1086    pub production_orders: Vec<ProductionOrder>,
1087    /// Quality inspections (actual data).
1088    pub quality_inspections: Vec<QualityInspection>,
1089    /// Cycle counts (actual data).
1090    pub cycle_counts: Vec<CycleCount>,
1091    /// BOM components (actual data).
1092    pub bom_components: Vec<BomComponent>,
1093    /// Inventory movements (actual data).
1094    pub inventory_movements: Vec<InventoryMovement>,
1095    /// Production order count.
1096    pub production_order_count: usize,
1097    /// Quality inspection count.
1098    pub quality_inspection_count: usize,
1099    /// Cycle count count.
1100    pub cycle_count_count: usize,
1101    /// BOM component count.
1102    pub bom_component_count: usize,
1103    /// Inventory movement count.
1104    pub inventory_movement_count: usize,
1105}
1106
1107/// Sales, KPI, and budget data snapshot.
1108#[derive(Debug, Clone, Default)]
1109pub struct SalesKpiBudgetsSnapshot {
1110    /// Sales quotes (actual data).
1111    pub sales_quotes: Vec<SalesQuote>,
1112    /// Management KPIs (actual data).
1113    pub kpis: Vec<ManagementKpi>,
1114    /// Budgets (actual data).
1115    pub budgets: Vec<Budget>,
1116    /// Sales quote count.
1117    pub sales_quote_count: usize,
1118    /// Management KPI count.
1119    pub kpi_count: usize,
1120    /// Budget line count.
1121    pub budget_line_count: usize,
1122}
1123
1124/// Anomaly labels generated during injection.
1125#[derive(Debug, Clone, Default)]
1126pub struct AnomalyLabels {
1127    /// All anomaly labels.
1128    pub labels: Vec<LabeledAnomaly>,
1129    /// Summary statistics.
1130    pub summary: Option<AnomalySummary>,
1131    /// Count by anomaly type.
1132    pub by_type: HashMap<String, usize>,
1133}
1134
1135/// Balance validation results from running balance tracker.
1136#[derive(Debug, Clone, Default)]
1137pub struct BalanceValidationResult {
1138    /// Whether validation was performed.
1139    pub validated: bool,
1140    /// Whether balance sheet equation is satisfied.
1141    pub is_balanced: bool,
1142    /// Number of entries processed.
1143    pub entries_processed: u64,
1144    /// Total debits across all entries.
1145    pub total_debits: rust_decimal::Decimal,
1146    /// Total credits across all entries.
1147    pub total_credits: rust_decimal::Decimal,
1148    /// Number of accounts tracked.
1149    pub accounts_tracked: usize,
1150    /// Number of companies tracked.
1151    pub companies_tracked: usize,
1152    /// Validation errors encountered.
1153    pub validation_errors: Vec<ValidationError>,
1154    /// Whether any unbalanced entries were found.
1155    pub has_unbalanced_entries: bool,
1156}
1157
1158/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1159#[derive(Debug, Clone, Default)]
1160pub struct TaxSnapshot {
1161    /// Tax jurisdictions.
1162    pub jurisdictions: Vec<TaxJurisdiction>,
1163    /// Tax codes.
1164    pub codes: Vec<TaxCode>,
1165    /// Tax lines computed on documents.
1166    pub tax_lines: Vec<TaxLine>,
1167    /// Tax returns filed per period.
1168    pub tax_returns: Vec<TaxReturn>,
1169    /// Tax provisions.
1170    pub tax_provisions: Vec<TaxProvision>,
1171    /// Withholding tax records.
1172    pub withholding_records: Vec<WithholdingTaxRecord>,
1173    /// Tax anomaly labels.
1174    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1175    /// Jurisdiction count.
1176    pub jurisdiction_count: usize,
1177    /// Code count.
1178    pub code_count: usize,
1179    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1180    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1181    /// Journal entries posting tax payable/receivable from computed tax lines.
1182    pub tax_posting_journal_entries: Vec<JournalEntry>,
1183}
1184
1185/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1186#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1187pub struct IntercompanySnapshot {
1188    /// Group ownership structure (parent/subsidiary/associate relationships).
1189    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1190    /// IC matched pairs (transaction pairs between related entities).
1191    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1192    /// IC journal entries generated from matched pairs (seller side).
1193    pub seller_journal_entries: Vec<JournalEntry>,
1194    /// IC journal entries generated from matched pairs (buyer side).
1195    pub buyer_journal_entries: Vec<JournalEntry>,
1196    /// Elimination entries for consolidation.
1197    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1198    /// NCI measurements derived from group structure ownership percentages.
1199    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1200    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1201    #[serde(skip)]
1202    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1203    /// IC matched pair count.
1204    pub matched_pair_count: usize,
1205    /// IC elimination entry count.
1206    pub elimination_entry_count: usize,
1207    /// IC matching rate (0.0 to 1.0).
1208    pub match_rate: f64,
1209}
1210
1211/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1212#[derive(Debug, Clone, Default)]
1213pub struct EsgSnapshot {
1214    /// Emission records (scope 1, 2, 3).
1215    pub emissions: Vec<EmissionRecord>,
1216    /// Energy consumption records.
1217    pub energy: Vec<EnergyConsumption>,
1218    /// Water usage records.
1219    pub water: Vec<WaterUsage>,
1220    /// Waste records.
1221    pub waste: Vec<WasteRecord>,
1222    /// Workforce diversity metrics.
1223    pub diversity: Vec<WorkforceDiversityMetric>,
1224    /// Pay equity metrics.
1225    pub pay_equity: Vec<PayEquityMetric>,
1226    /// Safety incidents.
1227    pub safety_incidents: Vec<SafetyIncident>,
1228    /// Safety metrics.
1229    pub safety_metrics: Vec<SafetyMetric>,
1230    /// Governance metrics.
1231    pub governance: Vec<GovernanceMetric>,
1232    /// Supplier ESG assessments.
1233    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1234    /// Materiality assessments.
1235    pub materiality: Vec<MaterialityAssessment>,
1236    /// ESG disclosures.
1237    pub disclosures: Vec<EsgDisclosure>,
1238    /// Climate scenarios.
1239    pub climate_scenarios: Vec<ClimateScenario>,
1240    /// ESG anomaly labels.
1241    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1242    /// Total emission record count.
1243    pub emission_count: usize,
1244    /// Total disclosure count.
1245    pub disclosure_count: usize,
1246}
1247
1248/// Treasury data snapshot (cash management, hedging, debt, pooling).
1249#[derive(Debug, Clone, Default)]
1250pub struct TreasurySnapshot {
1251    /// Cash positions (daily balances per account).
1252    pub cash_positions: Vec<CashPosition>,
1253    /// Cash forecasts.
1254    pub cash_forecasts: Vec<CashForecast>,
1255    /// Cash pools.
1256    pub cash_pools: Vec<CashPool>,
1257    /// Cash pool sweep transactions.
1258    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1259    /// Hedging instruments.
1260    pub hedging_instruments: Vec<HedgingInstrument>,
1261    /// Hedge relationships (ASC 815/IFRS 9 designations).
1262    pub hedge_relationships: Vec<HedgeRelationship>,
1263    /// Debt instruments.
1264    pub debt_instruments: Vec<DebtInstrument>,
1265    /// Bank guarantees and letters of credit.
1266    pub bank_guarantees: Vec<BankGuarantee>,
1267    /// Intercompany netting runs.
1268    pub netting_runs: Vec<NettingRun>,
1269    /// Treasury anomaly labels.
1270    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1271    /// Journal entries generated from treasury instruments (debt interest accruals,
1272    /// hedge MTM, cash pool sweeps).
1273    pub journal_entries: Vec<JournalEntry>,
1274}
1275
1276/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1277#[derive(Debug, Clone, Default)]
1278pub struct ProjectAccountingSnapshot {
1279    /// Projects with WBS hierarchies.
1280    pub projects: Vec<Project>,
1281    /// Project cost lines (linked from source documents).
1282    pub cost_lines: Vec<ProjectCostLine>,
1283    /// Revenue recognition records.
1284    pub revenue_records: Vec<ProjectRevenue>,
1285    /// Earned value metrics.
1286    pub earned_value_metrics: Vec<EarnedValueMetric>,
1287    /// Change orders.
1288    pub change_orders: Vec<ChangeOrder>,
1289    /// Project milestones.
1290    pub milestones: Vec<ProjectMilestone>,
1291}
1292
1293/// Complete result of enhanced generation run.
1294#[derive(Debug, Default)]
1295pub struct EnhancedGenerationResult {
1296    /// Generated chart of accounts.
1297    pub chart_of_accounts: ChartOfAccounts,
1298    /// Master data snapshot.
1299    pub master_data: MasterDataSnapshot,
1300    /// Document flow snapshot.
1301    pub document_flows: DocumentFlowSnapshot,
1302    /// Subledger snapshot (linked from document flows).
1303    pub subledger: SubledgerSnapshot,
1304    /// OCPM event log snapshot (if OCPM generation enabled).
1305    pub ocpm: OcpmSnapshot,
1306    /// Audit data snapshot (if audit generation enabled).
1307    pub audit: AuditSnapshot,
1308    /// Banking KYC/AML data snapshot (if banking generation enabled).
1309    pub banking: BankingSnapshot,
1310    /// Graph export snapshot (if graph export enabled).
1311    pub graph_export: GraphExportSnapshot,
1312    /// S2C sourcing data snapshot (if sourcing generation enabled).
1313    pub sourcing: SourcingSnapshot,
1314    /// Financial reporting snapshot (financial statements + bank reconciliations).
1315    pub financial_reporting: FinancialReportingSnapshot,
1316    /// HR data snapshot (payroll, time entries, expenses).
1317    pub hr: HrSnapshot,
1318    /// Accounting standards snapshot (revenue recognition, impairment).
1319    pub accounting_standards: AccountingStandardsSnapshot,
1320    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1321    pub manufacturing: ManufacturingSnapshot,
1322    /// Sales, KPI, and budget snapshot.
1323    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1324    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1325    pub tax: TaxSnapshot,
1326    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1327    pub esg: EsgSnapshot,
1328    /// Treasury data snapshot (cash management, hedging, debt).
1329    pub treasury: TreasurySnapshot,
1330    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1331    pub project_accounting: ProjectAccountingSnapshot,
1332    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1333    pub process_evolution: Vec<ProcessEvolutionEvent>,
1334    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1335    pub organizational_events: Vec<OrganizationalEvent>,
1336    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1337    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1338    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1339    pub intercompany: IntercompanySnapshot,
1340    /// Generated journal entries.
1341    pub journal_entries: Vec<JournalEntry>,
1342    /// Anomaly labels (if injection enabled).
1343    pub anomaly_labels: AnomalyLabels,
1344    /// Balance validation results (if validation enabled).
1345    pub balance_validation: BalanceValidationResult,
1346    /// Data quality statistics (if injection enabled).
1347    pub data_quality_stats: DataQualityStats,
1348    /// Data quality issue records (if injection enabled).
1349    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1350    /// Generation statistics.
1351    pub statistics: EnhancedGenerationStatistics,
1352    /// Data lineage graph (if tracking enabled).
1353    pub lineage: Option<super::lineage::LineageGraph>,
1354    /// Quality gate evaluation result.
1355    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1356    /// Internal controls (if controls generation enabled).
1357    pub internal_controls: Vec<InternalControl>,
1358    /// SoD (Segregation of Duties) violations identified during control application.
1359    ///
1360    /// Each record corresponds to a journal entry where `sod_violation == true`.
1361    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1362    /// Opening balances (if opening balance generation enabled).
1363    pub opening_balances: Vec<GeneratedOpeningBalance>,
1364    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1365    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1366    /// Counterfactual (original, mutated) JE pairs for ML training.
1367    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1368    /// Fraud red-flag indicators on P2P/O2C documents.
1369    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1370    /// Collusion rings (coordinated fraud networks).
1371    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1372    /// Bi-temporal version chains for vendor entities.
1373    pub temporal_vendor_chains:
1374        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1375    /// Entity relationship graph (nodes + edges with strength scores).
1376    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1377    /// Cross-process links (P2P ↔ O2C via inventory movements).
1378    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1379    /// Industry-specific GL accounts and metadata.
1380    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1381    /// SP5.2 — CoA semantic prior snapshot. When `Some`, `write_journal_entries_csv`
1382    /// builds a secondary lookup from the prior's 3,123 corpus accounts and uses
1383    /// it as a fallback when the synthetic CoA index misses a line's `gl_account`
1384    /// (common when SP3.7's per-source attribute conditional emits corpus account
1385    /// numbers that differ from the synthetic CoA master table's number set).
1386    pub coa_semantic_prior:
1387        Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1388    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1389    pub compliance_regulations: ComplianceRegulationsSnapshot,
1390    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1391    /// industry benchmarks, management reports, drift events). Empty
1392    /// when `analytics_metadata.enabled = false`.
1393    pub analytics_metadata: AnalyticsMetadataSnapshot,
1394    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1395    /// KS) over the generated amount distribution.  `None` when
1396    /// `distributions.validation.enabled = false`.
1397    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1398    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1399    /// customer value-segment labels, and industry-specific metadata
1400    /// populated from the previously-inert `vendor_network`,
1401    /// `customer_segmentation`, and `industry_specific` schema
1402    /// sections. Empty when those sections are disabled.
1403    pub interconnectivity: InterconnectivitySnapshot,
1404}
1405
1406/// v4.1.3+: interconnectivity snapshot. Populated when
1407/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1408/// `industry_specific.enabled` are set. Holds tier / segment / industry
1409/// labels for generated entities so downstream tooling (graph export,
1410/// risk models) can consume them without re-deriving from scratch.
1411#[derive(Debug, Clone, Default)]
1412pub struct InterconnectivitySnapshot {
1413    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1414    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1415    pub vendor_tiers: Vec<(String, u8)>,
1416    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1417    /// `"reliable_strategic" / "standard_operational" / "transactional"
1418    /// / "problematic"`.
1419    pub vendor_clusters: Vec<(String, String)>,
1420    /// `(customer_id, value_segment)` pairs where value_segment is one
1421    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1422    pub customer_value_segments: Vec<(String, String)>,
1423    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1424    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1425    /// "churned" / "won_back"`.
1426    pub customer_lifecycle_stages: Vec<(String, String)>,
1427    /// Summary: industry-specific knob applied, if any (e.g.
1428    /// `"manufacturing.bom_depth=3"`).
1429    pub industry_metadata: Vec<String>,
1430}
1431
1432/// v3.3.0: snapshot for the analytics-metadata phase.
1433#[derive(Debug, Clone, Default)]
1434pub struct AnalyticsMetadataSnapshot {
1435    /// Prior-year comparative balances per account, per entity.
1436    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1437    /// Industry benchmarks for the configured industry.
1438    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1439    /// Management-report artefacts (dashboards, MDA sections).
1440    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1441    /// Drift-event labels emitted from the post-generation sweep.
1442    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1443}
1444
1445/// Enhanced statistics about a generation run.
1446#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1447pub struct EnhancedGenerationStatistics {
1448    /// Total journal entries generated.
1449    pub total_entries: u64,
1450    /// Total line items generated.
1451    pub total_line_items: u64,
1452    /// Number of accounts in CoA.
1453    pub accounts_count: usize,
1454    /// Number of companies.
1455    pub companies_count: usize,
1456    /// Period in months.
1457    pub period_months: u32,
1458    /// Master data counts.
1459    pub vendor_count: usize,
1460    pub customer_count: usize,
1461    pub material_count: usize,
1462    pub asset_count: usize,
1463    pub employee_count: usize,
1464    /// Document flow counts.
1465    pub p2p_chain_count: usize,
1466    pub o2c_chain_count: usize,
1467    /// Subledger counts.
1468    pub ap_invoice_count: usize,
1469    pub ar_invoice_count: usize,
1470    /// OCPM counts.
1471    pub ocpm_event_count: usize,
1472    pub ocpm_object_count: usize,
1473    pub ocpm_case_count: usize,
1474    /// Audit counts.
1475    pub audit_engagement_count: usize,
1476    pub audit_workpaper_count: usize,
1477    pub audit_evidence_count: usize,
1478    pub audit_risk_count: usize,
1479    pub audit_finding_count: usize,
1480    pub audit_judgment_count: usize,
1481    /// ISA 505 confirmation counts.
1482    #[serde(default)]
1483    pub audit_confirmation_count: usize,
1484    #[serde(default)]
1485    pub audit_confirmation_response_count: usize,
1486    /// ISA 330/530 procedure step and sample counts.
1487    #[serde(default)]
1488    pub audit_procedure_step_count: usize,
1489    #[serde(default)]
1490    pub audit_sample_count: usize,
1491    /// ISA 520 analytical procedure counts.
1492    #[serde(default)]
1493    pub audit_analytical_result_count: usize,
1494    /// ISA 610 internal audit counts.
1495    #[serde(default)]
1496    pub audit_ia_function_count: usize,
1497    #[serde(default)]
1498    pub audit_ia_report_count: usize,
1499    /// ISA 550 related party counts.
1500    #[serde(default)]
1501    pub audit_related_party_count: usize,
1502    #[serde(default)]
1503    pub audit_related_party_transaction_count: usize,
1504    /// Anomaly counts.
1505    pub anomalies_injected: usize,
1506    /// Data quality issue counts.
1507    pub data_quality_issues: usize,
1508    /// Banking counts.
1509    pub banking_customer_count: usize,
1510    pub banking_account_count: usize,
1511    pub banking_transaction_count: usize,
1512    pub banking_suspicious_count: usize,
1513    /// Graph export counts.
1514    pub graph_export_count: usize,
1515    pub graph_node_count: usize,
1516    pub graph_edge_count: usize,
1517    /// LLM enrichment timing (milliseconds).
1518    #[serde(default)]
1519    pub llm_enrichment_ms: u64,
1520    /// Number of vendor names enriched by LLM.
1521    #[serde(default)]
1522    pub llm_vendors_enriched: usize,
1523    /// v4.1.1+: number of customer names enriched by LLM.
1524    #[serde(default)]
1525    pub llm_customers_enriched: usize,
1526    /// v4.1.1+: number of material descriptions enriched by LLM.
1527    #[serde(default)]
1528    pub llm_materials_enriched: usize,
1529    /// v4.1.1+: number of audit finding titles enriched by LLM.
1530    #[serde(default)]
1531    pub llm_findings_enriched: usize,
1532    /// Diffusion enhancement timing (milliseconds).
1533    #[serde(default)]
1534    pub diffusion_enhancement_ms: u64,
1535    /// Number of diffusion samples generated.
1536    #[serde(default)]
1537    pub diffusion_samples_generated: usize,
1538    /// Hybrid-diffusion blend weight actually applied (after clamp to \[0,1\]).
1539    /// `None` when the neural/hybrid backend is not active.
1540    #[serde(default, skip_serializing_if = "Option::is_none")]
1541    pub neural_hybrid_weight: Option<f64>,
1542    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1543    #[serde(default, skip_serializing_if = "Option::is_none")]
1544    pub neural_hybrid_strategy: Option<String>,
1545    /// How many columns were routed through the neural backend.
1546    #[serde(default, skip_serializing_if = "Option::is_none")]
1547    pub neural_routed_column_count: Option<usize>,
1548    /// Causal generation timing (milliseconds).
1549    #[serde(default)]
1550    pub causal_generation_ms: u64,
1551    /// Number of causal samples generated.
1552    #[serde(default)]
1553    pub causal_samples_generated: usize,
1554    /// Whether causal validation passed.
1555    #[serde(default)]
1556    pub causal_validation_passed: Option<bool>,
1557    /// S2C sourcing counts.
1558    #[serde(default)]
1559    pub sourcing_project_count: usize,
1560    #[serde(default)]
1561    pub rfx_event_count: usize,
1562    #[serde(default)]
1563    pub bid_count: usize,
1564    #[serde(default)]
1565    pub contract_count: usize,
1566    #[serde(default)]
1567    pub catalog_item_count: usize,
1568    #[serde(default)]
1569    pub scorecard_count: usize,
1570    /// Financial reporting counts.
1571    #[serde(default)]
1572    pub financial_statement_count: usize,
1573    #[serde(default)]
1574    pub bank_reconciliation_count: usize,
1575    /// HR counts.
1576    #[serde(default)]
1577    pub payroll_run_count: usize,
1578    #[serde(default)]
1579    pub time_entry_count: usize,
1580    #[serde(default)]
1581    pub expense_report_count: usize,
1582    #[serde(default)]
1583    pub benefit_enrollment_count: usize,
1584    #[serde(default)]
1585    pub pension_plan_count: usize,
1586    #[serde(default)]
1587    pub stock_grant_count: usize,
1588    /// Accounting standards counts.
1589    #[serde(default)]
1590    pub revenue_contract_count: usize,
1591    #[serde(default)]
1592    pub impairment_test_count: usize,
1593    #[serde(default)]
1594    pub business_combination_count: usize,
1595    #[serde(default)]
1596    pub ecl_model_count: usize,
1597    #[serde(default)]
1598    pub provision_count: usize,
1599    /// Manufacturing counts.
1600    #[serde(default)]
1601    pub production_order_count: usize,
1602    #[serde(default)]
1603    pub quality_inspection_count: usize,
1604    #[serde(default)]
1605    pub cycle_count_count: usize,
1606    #[serde(default)]
1607    pub bom_component_count: usize,
1608    #[serde(default)]
1609    pub inventory_movement_count: usize,
1610    /// Sales & reporting counts.
1611    #[serde(default)]
1612    pub sales_quote_count: usize,
1613    #[serde(default)]
1614    pub kpi_count: usize,
1615    #[serde(default)]
1616    pub budget_line_count: usize,
1617    /// Tax counts.
1618    #[serde(default)]
1619    pub tax_jurisdiction_count: usize,
1620    #[serde(default)]
1621    pub tax_code_count: usize,
1622    /// ESG counts.
1623    #[serde(default)]
1624    pub esg_emission_count: usize,
1625    #[serde(default)]
1626    pub esg_disclosure_count: usize,
1627    /// Intercompany counts.
1628    #[serde(default)]
1629    pub ic_matched_pair_count: usize,
1630    #[serde(default)]
1631    pub ic_elimination_count: usize,
1632    /// Number of intercompany journal entries (seller + buyer side).
1633    #[serde(default)]
1634    pub ic_transaction_count: usize,
1635    /// Number of fixed asset subledger records.
1636    #[serde(default)]
1637    pub fa_subledger_count: usize,
1638    /// Number of inventory subledger records.
1639    #[serde(default)]
1640    pub inventory_subledger_count: usize,
1641    /// Treasury debt instrument count.
1642    #[serde(default)]
1643    pub treasury_debt_instrument_count: usize,
1644    /// Treasury hedging instrument count.
1645    #[serde(default)]
1646    pub treasury_hedging_instrument_count: usize,
1647    /// Project accounting project count.
1648    #[serde(default)]
1649    pub project_count: usize,
1650    /// Project accounting change order count.
1651    #[serde(default)]
1652    pub project_change_order_count: usize,
1653    /// Tax provision count.
1654    #[serde(default)]
1655    pub tax_provision_count: usize,
1656    /// Opening balance count.
1657    #[serde(default)]
1658    pub opening_balance_count: usize,
1659    /// Subledger reconciliation count.
1660    #[serde(default)]
1661    pub subledger_reconciliation_count: usize,
1662    /// Tax line count.
1663    #[serde(default)]
1664    pub tax_line_count: usize,
1665    /// Project cost line count.
1666    #[serde(default)]
1667    pub project_cost_line_count: usize,
1668    /// Cash position count.
1669    #[serde(default)]
1670    pub cash_position_count: usize,
1671    /// Cash forecast count.
1672    #[serde(default)]
1673    pub cash_forecast_count: usize,
1674    /// Cash pool count.
1675    #[serde(default)]
1676    pub cash_pool_count: usize,
1677    /// Process evolution event count.
1678    #[serde(default)]
1679    pub process_evolution_event_count: usize,
1680    /// Organizational event count.
1681    #[serde(default)]
1682    pub organizational_event_count: usize,
1683    /// Counterfactual pair count.
1684    #[serde(default)]
1685    pub counterfactual_pair_count: usize,
1686    /// Number of fraud red-flag indicators generated.
1687    #[serde(default)]
1688    pub red_flag_count: usize,
1689    /// Number of collusion rings generated.
1690    #[serde(default)]
1691    pub collusion_ring_count: usize,
1692    /// Number of bi-temporal vendor version chains generated.
1693    #[serde(default)]
1694    pub temporal_version_chain_count: usize,
1695    /// Number of nodes in the entity relationship graph.
1696    #[serde(default)]
1697    pub entity_relationship_node_count: usize,
1698    /// Number of edges in the entity relationship graph.
1699    #[serde(default)]
1700    pub entity_relationship_edge_count: usize,
1701    /// Number of cross-process links generated.
1702    #[serde(default)]
1703    pub cross_process_link_count: usize,
1704    /// Number of disruption events generated.
1705    #[serde(default)]
1706    pub disruption_event_count: usize,
1707    /// Number of industry-specific GL accounts generated.
1708    #[serde(default)]
1709    pub industry_gl_account_count: usize,
1710    /// Number of period-close journal entries generated (tax provision + closing entries).
1711    #[serde(default)]
1712    pub period_close_je_count: usize,
1713}
1714
1715/// Enhanced orchestrator with full feature integration.
1716pub struct EnhancedOrchestrator {
1717    config: GeneratorConfig,
1718    phase_config: PhaseConfig,
1719    coa: Option<Arc<ChartOfAccounts>>,
1720    master_data: MasterDataSnapshot,
1721    seed: u64,
1722    multi_progress: Option<MultiProgress>,
1723    /// Resource guard for memory, disk, and CPU monitoring
1724    resource_guard: ResourceGuard,
1725    /// Output path for disk space monitoring
1726    output_path: Option<PathBuf>,
1727    /// Copula generators for preserving correlations (from fingerprint)
1728    copula_generators: Vec<CopulaGeneratorSpec>,
1729    /// Country pack registry for localized data generation
1730    country_pack_registry: datasynth_core::CountryPackRegistry,
1731    /// Optional streaming sink for phase-by-phase output
1732    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1733    /// Shared template provider for user-supplied template packs.
1734    ///
1735    /// Constructed from `config.templates.path` at orchestrator creation
1736    /// time. When the path is `None`, this is still populated with an
1737    /// embedded-only provider so generators can always call trait methods
1738    /// without an `Option<…>` guard. v3.2.0+.
1739    template_provider: datasynth_core::templates::SharedTemplateProvider,
1740    /// v3.4.1+ temporal context for business-day / holiday awareness.
1741    ///
1742    /// Populated only when `temporal_patterns.business_days.enabled`. When
1743    /// `None`, document-flow / HR / treasury / period-close generators keep
1744    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1745    /// for the same seed).
1746    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1747    /// Optional shard-mode context (set by group-engine shard runners).
1748    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1749    shard_context: Option<crate::shard_context::ShardContext>,
1750    /// SP3.12 — cached priors, shared between `generate_journal_entries` (which
1751    /// loads them) and `generate_jes_from_document_flows` (which applies padding).
1752    /// Set once after the SP3 opt-in block in `generate_journal_entries`.
1753    cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1754}
1755
1756impl EnhancedOrchestrator {
1757    /// Create a new enhanced orchestrator.
1758    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1759        datasynth_config::validate_config(&config)?;
1760
1761        let seed = config.global.seed.unwrap_or_else(rand::random);
1762
1763        // Build resource guard from config
1764        let resource_guard = Self::build_resource_guard(&config, None);
1765
1766        // Build country pack registry from config
1767        let country_pack_registry = match &config.country_packs {
1768            Some(cp) => {
1769                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1770                    .map_err(|e| SynthError::config(e.to_string()))?
1771            }
1772            None => datasynth_core::CountryPackRegistry::builtin_only()
1773                .map_err(|e| SynthError::config(e.to_string()))?,
1774        };
1775
1776        // Build the shared template provider from config.templates.path.
1777        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1778        // `Some(path)` → load file/dir and honour `merge_strategy`.
1779        let template_provider = Self::build_template_provider(&config)?;
1780
1781        // v3.4.1: build a shared temporal context when
1782        // `temporal_patterns.business_days.enabled`. `None` preserves the
1783        // raw-RNG date-offset behaviour per-generator.
1784        let temporal_context = Self::build_temporal_context(&config)?;
1785
1786        Ok(Self {
1787            config,
1788            phase_config,
1789            coa: None,
1790            master_data: MasterDataSnapshot::default(),
1791            seed,
1792            multi_progress: None,
1793            resource_guard,
1794            output_path: None,
1795            copula_generators: Vec::new(),
1796            country_pack_registry,
1797            phase_sink: None,
1798            template_provider,
1799            temporal_context,
1800            shard_context: None,
1801            cached_priors: None,
1802        })
1803    }
1804
1805    /// Install shard-mode context.  Called by the group shard runner
1806    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1807    /// entry point).  Has no effect on single-entity runs.
1808    ///
1809    /// See [`crate::shard_context::ShardContext`] for rationale.
1810    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1811        self.shard_context = Some(ctx);
1812    }
1813
1814    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1815    ///
1816    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1817    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1818    /// enabled. Returns `Err` only for unrecoverable config errors.
1819    fn build_temporal_context(
1820        config: &GeneratorConfig,
1821    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1822        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1823
1824        let tp = &config.temporal_patterns;
1825        if !tp.enabled || !tp.business_days.enabled {
1826            return Ok(None);
1827        }
1828
1829        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1830            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1831        let end_date = start_date + chrono::Months::new(config.global.period_months);
1832
1833        let region_code = tp
1834            .calendars
1835            .regions
1836            .first()
1837            .cloned()
1838            .unwrap_or_else(|| "US".to_string());
1839        let region = parse_region_code(&region_code);
1840
1841        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1842    }
1843
1844    /// Build the shared template provider from `config.templates`.
1845    ///
1846    /// Always returns a provider — falls back to embedded-only when
1847    /// `config.templates.path` is `None`. The merge-strategy from config
1848    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1849    /// orchestrator-construction time are fatal (preferable to silently
1850    /// using embedded pools when the user supplied a bad path).
1851    fn build_template_provider(
1852        config: &GeneratorConfig,
1853    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1854        use datasynth_core::templates::{
1855            loader::{MergeStrategy, TemplateLoader},
1856            DefaultTemplateProvider,
1857        };
1858        use std::sync::Arc;
1859
1860        let provider = match &config.templates.path {
1861            None => DefaultTemplateProvider::new(),
1862            Some(path) => {
1863                let data = if path.is_dir() {
1864                    TemplateLoader::load_from_directory(path)
1865                } else {
1866                    TemplateLoader::load_from_file(path)
1867                }
1868                .map_err(|e| {
1869                    SynthError::config(format!(
1870                        "Failed to load templates from {}: {e}",
1871                        path.display()
1872                    ))
1873                })?;
1874                let strategy = match config.templates.merge_strategy {
1875                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1876                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1877                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1878                        MergeStrategy::MergePreferFile
1879                    }
1880                };
1881                DefaultTemplateProvider::with_templates(data, strategy)
1882            }
1883        };
1884        Ok(Arc::new(provider))
1885    }
1886
1887    /// Create with default phase config.
1888    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1889        Self::new(config, PhaseConfig::default())
1890    }
1891
1892    /// Set a streaming phase sink for real-time output (builder pattern).
1893    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1894        self.phase_sink = Some(sink);
1895        self
1896    }
1897
1898    /// Set a streaming phase sink on an existing orchestrator.
1899    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1900        self.phase_sink = Some(sink);
1901    }
1902
1903    /// Emit a batch of items to the phase sink (if configured).
1904    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1905        if let Some(ref sink) = self.phase_sink {
1906            for item in items {
1907                if let Ok(value) = serde_json::to_value(item) {
1908                    if let Err(e) = sink.emit(phase, type_name, &value) {
1909                        warn!(
1910                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1911                        );
1912                    }
1913                }
1914            }
1915            if let Err(e) = sink.phase_complete(phase) {
1916                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1917            }
1918        }
1919    }
1920
1921    /// Enable/disable progress bars.
1922    pub fn with_progress(mut self, show: bool) -> Self {
1923        self.phase_config.show_progress = show;
1924        if show {
1925            self.multi_progress = Some(MultiProgress::new());
1926        }
1927        self
1928    }
1929
1930    /// Set the output path for disk space monitoring.
1931    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1932        let path = path.into();
1933        self.output_path = Some(path.clone());
1934        // Rebuild resource guard with the output path
1935        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1936        self
1937    }
1938
1939    /// Access the country pack registry.
1940    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1941        &self.country_pack_registry
1942    }
1943
1944    /// Look up a country pack by country code string.
1945    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1946        self.country_pack_registry.get_by_str(country)
1947    }
1948
1949    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1950    /// company, defaulting to `"US"` if no companies are configured.
1951    fn primary_country_code(&self) -> &str {
1952        self.config
1953            .companies
1954            .first()
1955            .map(|c| c.country.as_str())
1956            .unwrap_or("US")
1957    }
1958
1959    /// Resolve the country pack for the primary (first) company.
1960    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1961        self.country_pack_for(self.primary_country_code())
1962    }
1963
1964    /// Resolve the CoA framework from config/country-pack.
1965    fn resolve_coa_framework(&self) -> CoAFramework {
1966        if self.config.accounting_standards.enabled {
1967            match self.config.accounting_standards.framework {
1968                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1969                    return CoAFramework::FrenchPcg;
1970                }
1971                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1972                    return CoAFramework::GermanSkr04;
1973                }
1974                _ => {}
1975            }
1976        }
1977        // Fallback: derive from country pack
1978        let pack = self.primary_pack();
1979        match pack.accounting.framework.as_str() {
1980            "french_gaap" => CoAFramework::FrenchPcg,
1981            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1982            _ => CoAFramework::UsGaap,
1983        }
1984    }
1985
1986    /// Resolve the framework string consumed by
1987    /// [`datasynth_core::framework_accounts::FrameworkAccounts::for_framework`].
1988    ///
1989    /// Mirrors [`Self::resolve_coa_framework`] but returns the snake_case
1990    /// label (`"us_gaap"`, `"ifrs"`, `"french_gaap"`, `"german_gaap"`,
1991    /// `"dual_reporting"`) that the framework-aware account classifier
1992    /// expects. Country drives selection because the country pack's CoA
1993    /// loader is what actually picks the numbering convention (SKR04 for
1994    /// DE, PCG for FR) — the entity's `accounting_framework` label can
1995    /// disagree with the chart it's posted against (e.g. a DE entity
1996    /// flagged `accounting_framework: ifrs` still gets SKR04 codes from
1997    /// its country pack).
1998    fn resolve_framework_str(&self) -> &'static str {
1999        // Country first — the chart of accounts loaded for this company
2000        // is keyed by country pack, so the code numbering convention
2001        // follows country, not the framework label.
2002        match self.primary_country_code().to_ascii_uppercase().as_str() {
2003            "DE" | "AT" => "german_gaap",
2004            "FR" | "BE" | "LU" => "french_gaap",
2005            _ => {
2006                // No country override → take the framework label.
2007                if self.config.accounting_standards.enabled {
2008                    match self.config.accounting_standards.framework {
2009                        Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2010                            return "french_gaap";
2011                        }
2012                        Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2013                            return "german_gaap";
2014                        }
2015                        Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2016                            return "ifrs";
2017                        }
2018                        Some(
2019                            datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2020                        ) => {
2021                            return "dual_reporting";
2022                        }
2023                        Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2024                        | None => {}
2025                    }
2026                }
2027                "us_gaap"
2028            }
2029        }
2030    }
2031
2032    /// Check if copula generators are available.
2033    ///
2034    /// Returns true if the orchestrator has copula generators for preserving
2035    /// correlations (typically from fingerprint-based generation).
2036    pub fn has_copulas(&self) -> bool {
2037        !self.copula_generators.is_empty()
2038    }
2039
2040    /// Get the copula generators.
2041    ///
2042    /// Returns a reference to the copula generators for use during generation.
2043    /// These can be used to generate correlated samples that preserve the
2044    /// statistical relationships from the source data.
2045    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2046        &self.copula_generators
2047    }
2048
2049    /// Get a mutable reference to the copula generators.
2050    ///
2051    /// Allows generators to sample from copulas during data generation.
2052    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2053        &mut self.copula_generators
2054    }
2055
2056    /// Sample correlated values from a named copula.
2057    ///
2058    /// Returns None if the copula doesn't exist.
2059    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2060        self.copula_generators
2061            .iter_mut()
2062            .find(|c| c.name == copula_name)
2063            .map(|c| c.generator.sample())
2064    }
2065
2066    /// Create an orchestrator from a fingerprint file.
2067    ///
2068    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
2069    /// and creates an orchestrator configured to generate data matching
2070    /// the statistical properties of the original data.
2071    ///
2072    /// # Arguments
2073    /// * `fingerprint_path` - Path to the .dsf fingerprint file
2074    /// * `phase_config` - Phase configuration for generation
2075    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2076    ///
2077    /// # Example
2078    /// ```no_run
2079    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
2080    /// use std::path::Path;
2081    ///
2082    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
2083    ///     Path::new("fingerprint.dsf"),
2084    ///     PhaseConfig::default(),
2085    ///     1.0,
2086    /// ).unwrap();
2087    /// ```
2088    pub fn from_fingerprint(
2089        fingerprint_path: &std::path::Path,
2090        phase_config: PhaseConfig,
2091        scale: f64,
2092    ) -> SynthResult<Self> {
2093        info!("Loading fingerprint from: {}", fingerprint_path.display());
2094
2095        // Read the fingerprint
2096        let reader = FingerprintReader::new();
2097        let fingerprint = reader
2098            .read_from_file(fingerprint_path)
2099            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2100
2101        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2102    }
2103
2104    /// Create an orchestrator from a loaded fingerprint.
2105    ///
2106    /// # Arguments
2107    /// * `fingerprint` - The loaded fingerprint
2108    /// * `phase_config` - Phase configuration for generation
2109    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2110    pub fn from_fingerprint_data(
2111        fingerprint: Fingerprint,
2112        phase_config: PhaseConfig,
2113        scale: f64,
2114    ) -> SynthResult<Self> {
2115        info!(
2116            "Synthesizing config from fingerprint (version: {}, tables: {})",
2117            fingerprint.manifest.version,
2118            fingerprint.schema.tables.len()
2119        );
2120
2121        // Generate a seed for the synthesis
2122        let seed: u64 = rand::random();
2123        info!("Fingerprint synthesis seed: {}", seed);
2124
2125        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2126        let options = SynthesisOptions {
2127            scale,
2128            seed: Some(seed),
2129            preserve_correlations: true,
2130            inject_anomalies: true,
2131        };
2132        let synthesizer = ConfigSynthesizer::with_options(options);
2133
2134        // Synthesize full result including copula generators
2135        let synthesis_result = synthesizer
2136            .synthesize_full(&fingerprint, seed)
2137            .map_err(|e| {
2138                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2139            })?;
2140
2141        // Start with a base config from the fingerprint's industry if available
2142        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2143            Self::base_config_for_industry(industry)
2144        } else {
2145            Self::base_config_for_industry("manufacturing")
2146        };
2147
2148        // Apply the synthesized patches
2149        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2150
2151        // Log synthesis results
2152        info!(
2153            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2154            fingerprint.schema.tables.len(),
2155            scale,
2156            synthesis_result.copula_generators.len()
2157        );
2158
2159        if !synthesis_result.copula_generators.is_empty() {
2160            for spec in &synthesis_result.copula_generators {
2161                info!(
2162                    "  Copula '{}' for table '{}': {} columns",
2163                    spec.name,
2164                    spec.table,
2165                    spec.columns.len()
2166                );
2167            }
2168        }
2169
2170        // Create the orchestrator with the synthesized config
2171        let mut orchestrator = Self::new(config, phase_config)?;
2172
2173        // Store copula generators for use during generation
2174        orchestrator.copula_generators = synthesis_result.copula_generators;
2175
2176        Ok(orchestrator)
2177    }
2178
2179    /// Create a base config for a given industry.
2180    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2181        use datasynth_config::presets::create_preset;
2182        use datasynth_config::TransactionVolume;
2183        use datasynth_core::models::{CoAComplexity, IndustrySector};
2184
2185        let sector = match industry.to_lowercase().as_str() {
2186            "manufacturing" => IndustrySector::Manufacturing,
2187            "retail" => IndustrySector::Retail,
2188            "financial" | "financial_services" => IndustrySector::FinancialServices,
2189            "healthcare" => IndustrySector::Healthcare,
2190            "technology" | "tech" => IndustrySector::Technology,
2191            _ => IndustrySector::Manufacturing,
2192        };
2193
2194        // Create a preset with reasonable defaults
2195        create_preset(
2196            sector,
2197            1,  // company count
2198            12, // period months
2199            CoAComplexity::Medium,
2200            TransactionVolume::TenK,
2201        )
2202    }
2203
2204    /// Apply a config patch to a GeneratorConfig.
2205    fn apply_config_patch(
2206        mut config: GeneratorConfig,
2207        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2208    ) -> GeneratorConfig {
2209        use datasynth_fingerprint::synthesis::ConfigValue;
2210
2211        for (key, value) in patch.values() {
2212            match (key.as_str(), value) {
2213                // Transaction count is handled via TransactionVolume enum on companies
2214                // Log it but cannot directly set it (would need to modify company volumes)
2215                ("transactions.count", ConfigValue::Integer(n)) => {
2216                    info!(
2217                        "Fingerprint suggests {} transactions (apply via company volumes)",
2218                        n
2219                    );
2220                }
2221                ("global.period_months", ConfigValue::Integer(n)) => {
2222                    config.global.period_months = (*n).clamp(1, 120) as u32;
2223                }
2224                ("global.start_date", ConfigValue::String(s)) => {
2225                    config.global.start_date = s.clone();
2226                }
2227                ("global.seed", ConfigValue::Integer(n)) => {
2228                    config.global.seed = Some(*n as u64);
2229                }
2230                ("fraud.enabled", ConfigValue::Bool(b)) => {
2231                    config.fraud.enabled = *b;
2232                }
2233                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2234                    config.fraud.fraud_rate = *f;
2235                }
2236                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2237                    config.data_quality.enabled = *b;
2238                }
2239                // Handle anomaly injection paths (mapped to fraud config)
2240                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2241                    config.fraud.enabled = *b;
2242                }
2243                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2244                    config.fraud.fraud_rate = *f;
2245                }
2246                _ => {
2247                    debug!("Ignoring unknown config patch key: {}", key);
2248                }
2249            }
2250        }
2251
2252        config
2253    }
2254
2255    /// Build a resource guard from the configuration.
2256    fn build_resource_guard(
2257        config: &GeneratorConfig,
2258        output_path: Option<PathBuf>,
2259    ) -> ResourceGuard {
2260        let mut builder = ResourceGuardBuilder::new();
2261
2262        // Configure memory limit if set
2263        if config.global.memory_limit_mb > 0 {
2264            builder = builder.memory_limit(config.global.memory_limit_mb);
2265        }
2266
2267        // Configure disk monitoring for output path
2268        if let Some(path) = output_path {
2269            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2270        }
2271
2272        // Use conservative degradation settings for production safety
2273        builder = builder.conservative();
2274
2275        builder.build()
2276    }
2277
2278    /// Check resources (memory, disk, CPU) and return degradation level.
2279    ///
2280    /// Returns an error if hard limits are exceeded.
2281    /// Returns Ok(DegradationLevel) indicating current resource state.
2282    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2283        self.resource_guard.check()
2284    }
2285
2286    /// Check resources with logging.
2287    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2288        let level = self.resource_guard.check()?;
2289
2290        if level != DegradationLevel::Normal {
2291            warn!(
2292                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2293                phase,
2294                level,
2295                self.resource_guard.current_memory_mb(),
2296                self.resource_guard.available_disk_mb()
2297            );
2298        }
2299
2300        Ok(level)
2301    }
2302
2303    /// Get current degradation actions based on resource state.
2304    fn get_degradation_actions(&self) -> DegradationActions {
2305        self.resource_guard.get_actions()
2306    }
2307
2308    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2309    fn check_memory_limit(&self) -> SynthResult<()> {
2310        self.check_resources()?;
2311        Ok(())
2312    }
2313
2314    /// Run the complete generation workflow.
2315    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2316        info!("Starting enhanced generation workflow");
2317        info!(
2318            "Config: industry={:?}, period_months={}, companies={}",
2319            self.config.global.industry,
2320            self.config.global.period_months,
2321            self.config.companies.len()
2322        );
2323
2324        // Set decimal serialization mode (thread-local, affects JSON output).
2325        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2326        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2327        datasynth_core::serde_decimal::set_numeric_native(is_native);
2328        struct NumericModeGuard;
2329        impl Drop for NumericModeGuard {
2330            fn drop(&mut self) {
2331                datasynth_core::serde_decimal::set_numeric_native(false);
2332            }
2333        }
2334        let _numeric_guard = if is_native {
2335            Some(NumericModeGuard)
2336        } else {
2337            None
2338        };
2339
2340        // Initial resource check before starting
2341        let initial_level = self.check_resources_with_log("initial")?;
2342        if initial_level == DegradationLevel::Emergency {
2343            return Err(SynthError::resource(
2344                "Insufficient resources to start generation",
2345            ));
2346        }
2347
2348        let mut stats = EnhancedGenerationStatistics {
2349            companies_count: self.config.companies.len(),
2350            period_months: self.config.global.period_months,
2351            ..Default::default()
2352        };
2353
2354        // Phase 1: Chart of Accounts
2355        let coa = self.phase_chart_of_accounts(&mut stats)?;
2356
2357        // Phase 2: Master Data
2358        self.phase_master_data(&mut stats)?;
2359
2360        // Emit master data to stream sink
2361        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2362        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2363        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2364
2365        // Phase 3: Document Flows + Subledger Linking
2366        let (mut document_flows, mut subledger, fa_journal_entries) =
2367            self.phase_document_flows(&mut stats)?;
2368
2369        // Emit document flows to stream sink
2370        self.emit_phase_items(
2371            "document_flows",
2372            "PurchaseOrder",
2373            &document_flows.purchase_orders,
2374        );
2375        self.emit_phase_items(
2376            "document_flows",
2377            "GoodsReceipt",
2378            &document_flows.goods_receipts,
2379        );
2380        self.emit_phase_items(
2381            "document_flows",
2382            "VendorInvoice",
2383            &document_flows.vendor_invoices,
2384        );
2385        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2386        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2387
2388        // Phase 3b: Opening Balances (before JE generation)
2389        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2390
2391        // Phase 3c: Convert opening balances to journal entries and prepend them.
2392        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2393        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2394        // balance map type.
2395        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2396            .iter()
2397            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2398            .collect();
2399        if !opening_balance_jes.is_empty() {
2400            debug!(
2401                "Prepending {} opening balance JEs to entries",
2402                opening_balance_jes.len()
2403            );
2404        }
2405
2406        // Phase 4: Journal Entries
2407        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2408
2409        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2410        // starts from the correct initial state.
2411        if !opening_balance_jes.is_empty() {
2412            let mut combined = opening_balance_jes;
2413            combined.extend(entries);
2414            entries = combined;
2415        }
2416
2417        // Phase 4c: Append FA acquisition journal entries to main entries
2418        if !fa_journal_entries.is_empty() {
2419            debug!(
2420                "Appending {} FA acquisition JEs to main entries",
2421                fa_journal_entries.len()
2422            );
2423            entries.extend(fa_journal_entries);
2424        }
2425
2426        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2427        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2428
2429        // Get current degradation actions for optional phases
2430        let actions = self.get_degradation_actions();
2431
2432        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2433        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2434
2435        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2436        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2437        if !sourcing.contracts.is_empty() {
2438            let mut linked_count = 0usize;
2439            // Collect (vendor_id, po_id) pairs from P2P chains
2440            let po_vendor_pairs: Vec<(String, String)> = document_flows
2441                .p2p_chains
2442                .iter()
2443                .map(|chain| {
2444                    (
2445                        chain.purchase_order.vendor_id.clone(),
2446                        chain.purchase_order.header.document_id.clone(),
2447                    )
2448                })
2449                .collect();
2450
2451            for chain in &mut document_flows.p2p_chains {
2452                if chain.purchase_order.contract_id.is_none() {
2453                    if let Some(contract) = sourcing
2454                        .contracts
2455                        .iter()
2456                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2457                    {
2458                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2459                        linked_count += 1;
2460                    }
2461                }
2462            }
2463
2464            // Populate reverse FK: purchase_order_ids on each contract
2465            for contract in &mut sourcing.contracts {
2466                let po_ids: Vec<String> = po_vendor_pairs
2467                    .iter()
2468                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2469                    .map(|(_, po_id)| po_id.clone())
2470                    .collect();
2471                if !po_ids.is_empty() {
2472                    contract.purchase_order_ids = po_ids;
2473                }
2474            }
2475
2476            if linked_count > 0 {
2477                debug!(
2478                    "Linked {} purchase orders to S2C contracts by vendor match",
2479                    linked_count
2480                );
2481            }
2482        }
2483
2484        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2485        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2486
2487        // Phase 5c: Append IC journal entries to main entries
2488        if !intercompany.seller_journal_entries.is_empty()
2489            || !intercompany.buyer_journal_entries.is_empty()
2490        {
2491            let ic_je_count = intercompany.seller_journal_entries.len()
2492                + intercompany.buyer_journal_entries.len();
2493            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2494            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2495            debug!(
2496                "Appended {} IC journal entries to main entries",
2497                ic_je_count
2498            );
2499        }
2500
2501        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2502        if !intercompany.elimination_entries.is_empty() {
2503            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2504                &intercompany.elimination_entries,
2505            );
2506            if !elim_jes.is_empty() {
2507                debug!(
2508                    "Appended {} elimination journal entries to main entries",
2509                    elim_jes.len()
2510                );
2511                // IC elimination net-zero assertion (v2.5 hardening)
2512                let elim_debit: rust_decimal::Decimal =
2513                    elim_jes.iter().map(|je| je.total_debit()).sum();
2514                let elim_credit: rust_decimal::Decimal =
2515                    elim_jes.iter().map(|je| je.total_credit()).sum();
2516                let elim_diff = (elim_debit - elim_credit).abs();
2517                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2518                if elim_diff > tolerance {
2519                    return Err(datasynth_core::error::SynthError::generation(format!(
2520                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2521                        elim_debit, elim_credit, elim_diff, tolerance
2522                    )));
2523                }
2524                debug!(
2525                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2526                    elim_debit, elim_credit, elim_diff
2527                );
2528                entries.extend(elim_jes);
2529            }
2530        }
2531
2532        // Phase 5e: Wire IC source documents into document flow snapshot
2533        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2534            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2535                document_flows
2536                    .customer_invoices
2537                    .extend(ic_docs.seller_invoices.iter().cloned());
2538                document_flows
2539                    .purchase_orders
2540                    .extend(ic_docs.buyer_orders.iter().cloned());
2541                document_flows
2542                    .goods_receipts
2543                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2544                document_flows
2545                    .vendor_invoices
2546                    .extend(ic_docs.buyer_invoices.iter().cloned());
2547                debug!(
2548                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2549                    ic_docs.seller_invoices.len(),
2550                    ic_docs.buyer_orders.len(),
2551                    ic_docs.buyer_goods_receipts.len(),
2552                    ic_docs.buyer_invoices.len(),
2553                );
2554            }
2555        }
2556
2557        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2558        let hr = self.phase_hr_data(&mut stats)?;
2559
2560        // Phase 6b: Generate JEs from payroll runs
2561        if !hr.payroll_runs.is_empty() {
2562            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2563            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2564            entries.extend(payroll_jes);
2565        }
2566
2567        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2568        if !hr.pension_journal_entries.is_empty() {
2569            debug!(
2570                "Generated {} JEs from pension plans",
2571                hr.pension_journal_entries.len()
2572            );
2573            entries.extend(hr.pension_journal_entries.iter().cloned());
2574        }
2575
2576        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2577        if !hr.stock_comp_journal_entries.is_empty() {
2578            debug!(
2579                "Generated {} JEs from stock-based compensation",
2580                hr.stock_comp_journal_entries.len()
2581            );
2582            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2583        }
2584
2585        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2586        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2587
2588        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2589        if !manufacturing_snap.production_orders.is_empty() {
2590            let currency = self
2591                .config
2592                .companies
2593                .first()
2594                .map(|c| c.currency.as_str())
2595                .unwrap_or("USD");
2596            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2597                &manufacturing_snap.production_orders,
2598                &manufacturing_snap.quality_inspections,
2599                currency,
2600            );
2601            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2602            entries.extend(mfg_jes);
2603        }
2604
2605        // Phase 7a-warranty: Generate warranty provisions per company
2606        if !manufacturing_snap.quality_inspections.is_empty() {
2607            let framework = match self.config.accounting_standards.framework {
2608                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2609                _ => "US_GAAP",
2610            };
2611            for company in &self.config.companies {
2612                let company_orders: Vec<_> = manufacturing_snap
2613                    .production_orders
2614                    .iter()
2615                    .filter(|o| o.company_code == company.code)
2616                    .cloned()
2617                    .collect();
2618                let company_inspections: Vec<_> = manufacturing_snap
2619                    .quality_inspections
2620                    .iter()
2621                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2622                    .cloned()
2623                    .collect();
2624                if company_inspections.is_empty() {
2625                    continue;
2626                }
2627                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2628                let warranty_result = warranty_gen.generate(
2629                    &company.code,
2630                    &company_orders,
2631                    &company_inspections,
2632                    &company.currency,
2633                    framework,
2634                );
2635                if !warranty_result.journal_entries.is_empty() {
2636                    debug!(
2637                        "Generated {} warranty provision JEs for {}",
2638                        warranty_result.journal_entries.len(),
2639                        company.code
2640                    );
2641                    entries.extend(warranty_result.journal_entries);
2642                }
2643            }
2644        }
2645
2646        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2647        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2648        {
2649            let cogs_currency = self
2650                .config
2651                .companies
2652                .first()
2653                .map(|c| c.currency.as_str())
2654                .unwrap_or("USD");
2655            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2656                &document_flows.deliveries,
2657                &manufacturing_snap.production_orders,
2658                cogs_currency,
2659            );
2660            if !cogs_jes.is_empty() {
2661                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2662                entries.extend(cogs_jes);
2663            }
2664        }
2665
2666        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2667        //
2668        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2669        // subledger inventory positions.  Here we reconcile them so that position balances
2670        // reflect the actual stock movements within the generation period.
2671        if !manufacturing_snap.inventory_movements.is_empty()
2672            && !subledger.inventory_positions.is_empty()
2673        {
2674            use datasynth_core::models::MovementType as MfgMovementType;
2675            let mut receipt_count = 0usize;
2676            let mut issue_count = 0usize;
2677            for movement in &manufacturing_snap.inventory_movements {
2678                // Find a matching position by material code and company
2679                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2680                    p.material_id == movement.material_code
2681                        && p.company_code == movement.entity_code
2682                }) {
2683                    match movement.movement_type {
2684                        MfgMovementType::GoodsReceipt => {
2685                            // Increase stock and update weighted-average cost
2686                            pos.add_quantity(
2687                                movement.quantity,
2688                                movement.value,
2689                                movement.movement_date,
2690                            );
2691                            receipt_count += 1;
2692                        }
2693                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2694                            // Decrease stock (best-effort; silently skip if insufficient)
2695                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2696                            issue_count += 1;
2697                        }
2698                        _ => {}
2699                    }
2700                }
2701            }
2702            debug!(
2703                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2704                manufacturing_snap.inventory_movements.len(),
2705                receipt_count,
2706                issue_count,
2707            );
2708        }
2709
2710        // Update final entry/line-item stats after all JE-generating phases
2711        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2712        if !entries.is_empty() {
2713            stats.total_entries = entries.len() as u64;
2714            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2715            debug!(
2716                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2717                stats.total_entries, stats.total_line_items
2718            );
2719        }
2720
2721        // Phase 7b: Apply internal controls to journal entries
2722        if self.config.internal_controls.enabled && !entries.is_empty() {
2723            info!("Phase 7b: Applying internal controls to journal entries");
2724            let control_config = ControlGeneratorConfig {
2725                exception_rate: self.config.internal_controls.exception_rate,
2726                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2727                enable_sox_marking: true,
2728                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2729                    self.config.internal_controls.sox_materiality_threshold,
2730                )
2731                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2732                ..Default::default()
2733            };
2734            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2735            for entry in &mut entries {
2736                control_gen.apply_controls(entry, &coa);
2737            }
2738            let with_controls = entries
2739                .iter()
2740                .filter(|e| !e.header.control_ids.is_empty())
2741                .count();
2742            info!(
2743                "Applied controls to {} entries ({} with control IDs assigned)",
2744                entries.len(),
2745                with_controls
2746            );
2747        }
2748
2749        // Phase 7c: Extract SoD violations from annotated journal entries.
2750        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2751        // Here we materialise those flags into standalone SodViolation records.
2752        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2753            .iter()
2754            .filter(|e| e.header.sod_violation)
2755            .filter_map(|e| {
2756                e.header.sod_conflict_type.map(|ct| {
2757                    use datasynth_core::models::{RiskLevel, SodViolation};
2758                    let severity = match ct {
2759                        datasynth_core::models::SodConflictType::PaymentReleaser
2760                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2761                            RiskLevel::Critical
2762                        }
2763                        datasynth_core::models::SodConflictType::PreparerApprover
2764                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2765                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2766                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2767                            RiskLevel::High
2768                        }
2769                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2770                            RiskLevel::Medium
2771                        }
2772                    };
2773                    let action = format!(
2774                        "SoD conflict {:?} on entry {} ({})",
2775                        ct, e.header.document_id, e.header.company_code
2776                    );
2777                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2778                })
2779            })
2780            .collect();
2781        if !sod_violations.is_empty() {
2782            info!(
2783                "Phase 7c: Extracted {} SoD violations from {} entries",
2784                sod_violations.len(),
2785                entries.len()
2786            );
2787        }
2788
2789        // Emit journal entries to stream sink (after all JE-generating phases)
2790        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2791
2792        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2793        //
2794        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2795        // document-level fraud are exempt from subsequent line-level flag
2796        // overwrites, and so downstream consumers see a coherent picture.
2797        //
2798        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2799        {
2800            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2801            if self.config.fraud.enabled && doc_rate > 0.0 {
2802                use datasynth_core::fraud_propagation::{
2803                    inject_document_fraud, propagate_documents_to_entries,
2804                };
2805                use datasynth_core::utils::weighted_select;
2806                use datasynth_core::FraudType;
2807                use rand_chacha::rand_core::SeedableRng;
2808
2809                let dist = &self.config.fraud.fraud_type_distribution;
2810                let fraud_type_weights: [(FraudType, f64); 8] = [
2811                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2812                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2813                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2814                    (
2815                        FraudType::ImproperCapitalization,
2816                        dist.expense_capitalization,
2817                    ),
2818                    (FraudType::SplitTransaction, dist.split_transaction),
2819                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2820                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2821                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2822                ];
2823                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2824                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2825                    if weights_sum <= 0.0 {
2826                        FraudType::FictitiousEntry
2827                    } else {
2828                        *weighted_select(rng, &fraud_type_weights)
2829                    }
2830                };
2831
2832                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2833                let mut doc_tagged = 0usize;
2834                macro_rules! inject_into {
2835                    ($collection:expr) => {{
2836                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2837                            $collection.iter_mut().map(|d| &mut d.header).collect();
2838                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2839                    }};
2840                }
2841                inject_into!(document_flows.purchase_orders);
2842                inject_into!(document_flows.goods_receipts);
2843                inject_into!(document_flows.vendor_invoices);
2844                inject_into!(document_flows.payments);
2845                inject_into!(document_flows.sales_orders);
2846                inject_into!(document_flows.deliveries);
2847                inject_into!(document_flows.customer_invoices);
2848                if doc_tagged > 0 {
2849                    info!(
2850                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2851                    );
2852                }
2853
2854                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2855                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2856                        Vec::new();
2857                    headers.extend(
2858                        document_flows
2859                            .purchase_orders
2860                            .iter()
2861                            .map(|d| d.header.clone()),
2862                    );
2863                    headers.extend(
2864                        document_flows
2865                            .goods_receipts
2866                            .iter()
2867                            .map(|d| d.header.clone()),
2868                    );
2869                    headers.extend(
2870                        document_flows
2871                            .vendor_invoices
2872                            .iter()
2873                            .map(|d| d.header.clone()),
2874                    );
2875                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2876                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2877                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2878                    headers.extend(
2879                        document_flows
2880                            .customer_invoices
2881                            .iter()
2882                            .map(|d| d.header.clone()),
2883                    );
2884                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2885                    if propagated > 0 {
2886                        info!(
2887                            "Propagated document-level fraud to {propagated} derived journal entries"
2888                        );
2889                    }
2890                }
2891            }
2892        }
2893
2894        // Phase 8: Anomaly Injection (after all JE-generating phases)
2895        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2896
2897        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2898        // through the anomaly injector.
2899        //
2900        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2901        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2902        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2903        //   - Any external mutation that sets is_fraud after the fact
2904        //
2905        // The anomaly injector already applies the same bias inline when it
2906        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2907        // so gating this sweep on `!is_anomaly` avoids double-application.
2908        //
2909        // Without this sweep, fraud entries from these paths show 0 lift on
2910        // the canonical forensic signals (is_round_1000, is_off_hours,
2911        // is_weekend, is_post_close), which is exactly what the SDK-side
2912        // evaluator caught in v3.1 — fraud features had worse lift than
2913        // baseline. See DS-3.1 post-deploy feedback.
2914        {
2915            use datasynth_core::fraud_bias::{
2916                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2917            };
2918            use rand_chacha::rand_core::SeedableRng;
2919            let cfg = FraudBehavioralBiasConfig::default();
2920            if cfg.enabled {
2921                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2922                let mut swept = 0usize;
2923                for entry in entries.iter_mut() {
2924                    if entry.header.is_fraud && !entry.header.is_anomaly {
2925                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2926                        swept += 1;
2927                    }
2928                }
2929                if swept > 0 {
2930                    info!(
2931                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2932                         (doc-propagated + je_generator intrinsic fraud)"
2933                    );
2934                }
2935            }
2936        }
2937
2938        // Emit anomaly labels to stream sink
2939        self.emit_phase_items(
2940            "anomaly_injection",
2941            "LabeledAnomaly",
2942            &anomaly_labels.labels,
2943        );
2944
2945        // Propagate fraud labels from journal entries to source documents.
2946        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2947        // instead of tracing through document_references.json.
2948        //
2949        // Gated by `fraud.propagate_to_document` (default true) — disable when
2950        // downstream consumers want document fraud flags to reflect only
2951        // document-level injection, not line-level.
2952        if self.config.fraud.propagate_to_document {
2953            use std::collections::HashMap;
2954            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2955            //
2956            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2957            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2958            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2959            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2960            // we register BOTH the prefixed form (raw reference) AND the bare form
2961            // (post-colon portion) in the map. Also register the JE's document_id
2962            // UUID so documents that set `journal_entry_id` match via that path.
2963            //
2964            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2965            // looked up "foo", silently producing 0 propagations.
2966            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2967            for je in &entries {
2968                if je.header.is_fraud {
2969                    if let Some(ref fraud_type) = je.header.fraud_type {
2970                        if let Some(ref reference) = je.header.reference {
2971                            // Register the full reference ("GR:PO-2024-000001")
2972                            fraud_map.insert(reference.clone(), *fraud_type);
2973                            // Also register the bare document ID ("PO-2024-000001")
2974                            // by stripping the "PREFIX:" if present.
2975                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2976                                if !bare.is_empty() {
2977                                    fraud_map.insert(bare.to_string(), *fraud_type);
2978                                }
2979                            }
2980                        }
2981                        // Also tag via journal_entry_id on document headers
2982                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2983                    }
2984                }
2985            }
2986            if !fraud_map.is_empty() {
2987                let mut propagated = 0usize;
2988                // Use DocumentHeader::propagate_fraud method for each doc type
2989                macro_rules! propagate_to {
2990                    ($collection:expr) => {
2991                        for doc in &mut $collection {
2992                            if doc.header.propagate_fraud(&fraud_map) {
2993                                propagated += 1;
2994                            }
2995                        }
2996                    };
2997                }
2998                propagate_to!(document_flows.purchase_orders);
2999                propagate_to!(document_flows.goods_receipts);
3000                propagate_to!(document_flows.vendor_invoices);
3001                propagate_to!(document_flows.payments);
3002                propagate_to!(document_flows.sales_orders);
3003                propagate_to!(document_flows.deliveries);
3004                propagate_to!(document_flows.customer_invoices);
3005                if propagated > 0 {
3006                    info!(
3007                        "Propagated fraud labels to {} document flow records",
3008                        propagated
3009                    );
3010                }
3011            }
3012        }
3013
3014        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
3015        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3016
3017        // Emit red flags to stream sink
3018        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3019
3020        // Phase 26b: Collusion Ring Generation (after red flags)
3021        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3022
3023        // Emit collusion rings to stream sink
3024        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3025
3026        // Phase 8d: W8.1 — TB drift-correction pass.  When a TB anchor prior is
3027        // loaded (industry bundle with real per-account targets), emit balanced
3028        // "SA" adjustment JEs to nudge the synthetic balance sheet toward the
3029        // corpus-median shape before final balance validation runs.
3030        self.phase_tb_drift_correction(&mut entries)?;
3031
3032        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
3033        let balance_validation = self.phase_balance_validation(&entries)?;
3034
3035        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
3036        // chart of accounts. Soft warning by default; hard fail when the
3037        // user passes --validate-coa-coverage / sets the strict flag.
3038        self.validate_coa_coverage(&entries, coa.as_ref())?;
3039
3040        // Phase 9b: GL-to-Subledger Reconciliation
3041        let subledger_reconciliation =
3042            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3043
3044        // Phase 10: Data Quality Injection
3045        let (data_quality_stats, quality_issues) =
3046            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3047
3048        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
3049        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3050
3051        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
3052        {
3053            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
3054
3055            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
3056            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
3057            let mut unbalanced_clean = 0usize;
3058            for je in &entries {
3059                if je.header.is_fraud || je.header.is_anomaly {
3060                    continue;
3061                }
3062                let diff = (je.total_debit() - je.total_credit()).abs();
3063                if diff > tolerance {
3064                    unbalanced_clean += 1;
3065                    if unbalanced_clean <= 3 {
3066                        warn!(
3067                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3068                            je.header.document_id,
3069                            je.total_debit(),
3070                            je.total_credit(),
3071                            diff
3072                        );
3073                    }
3074                }
3075            }
3076            if unbalanced_clean > 0 {
3077                return Err(datasynth_core::error::SynthError::generation(format!(
3078                    "{} non-anomaly JEs are unbalanced (debits != credits). \
3079                     First few logged above. Tolerance={}",
3080                    unbalanced_clean, tolerance
3081                )));
3082            }
3083            debug!(
3084                "Phase 10c: All {} non-anomaly JEs individually balanced",
3085                entries
3086                    .iter()
3087                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3088                    .count()
3089            );
3090
3091            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
3092            let company_codes: Vec<String> = self
3093                .config
3094                .companies
3095                .iter()
3096                .map(|c| c.code.clone())
3097                .collect();
3098            for company_code in &company_codes {
3099                let mut assets = rust_decimal::Decimal::ZERO;
3100                let mut liab_equity = rust_decimal::Decimal::ZERO;
3101
3102                for entry in &entries {
3103                    if entry.header.company_code != *company_code {
3104                        continue;
3105                    }
3106                    for line in &entry.lines {
3107                        let acct = &line.gl_account;
3108                        let net = line.debit_amount - line.credit_amount;
3109                        // Asset accounts (1xxx): normal debit balance
3110                        if acct.starts_with('1') {
3111                            assets += net;
3112                        }
3113                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3114                        else if acct.starts_with('2') || acct.starts_with('3') {
3115                            liab_equity -= net; // credit-normal, so negate debit-net
3116                        }
3117                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3118                        // so they net to zero after closing entries
3119                    }
3120                }
3121
3122                let bs_diff = (assets - liab_equity).abs();
3123                if bs_diff > tolerance {
3124                    warn!(
3125                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3126                         revenue/expense closing entries may not fully offset",
3127                        company_code, assets, liab_equity, bs_diff
3128                    );
3129                    // Warn rather than error: multi-period datasets may have timing
3130                    // differences from accruals/deferrals that resolve in later periods.
3131                    // The TB footing check (Assert 1) is the hard gate.
3132                } else {
3133                    debug!(
3134                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3135                        company_code, assets, liab_equity, bs_diff
3136                    );
3137                }
3138            }
3139
3140            info!("Phase 10c: All generation-time accounting assertions passed");
3141        }
3142
3143        // Phase 11: Audit Data
3144        let audit = self.phase_audit_data(&entries, &mut stats)?;
3145
3146        // Phase 12: Banking KYC/AML Data
3147        let mut banking = self.phase_banking_data(&mut stats)?;
3148
3149        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3150        // Creates coherence between the accounting layer (payments, JEs) and the
3151        // banking layer (bank transactions). A vendor invoice payment now appears
3152        // on both sides with cross-references and fraud labels propagated.
3153        if self.phase_config.generate_banking
3154            && !document_flows.payments.is_empty()
3155            && !banking.accounts.is_empty()
3156        {
3157            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3158            if bridge_rate > 0.0 {
3159                let mut bridge =
3160                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3161                        self.seed,
3162                    );
3163                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3164                    &document_flows.payments,
3165                    &banking.customers,
3166                    &banking.accounts,
3167                    bridge_rate,
3168                );
3169                info!(
3170                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3171                    bridge_stats.bridged_count,
3172                    bridge_stats.transactions_emitted,
3173                    bridge_stats.fraud_propagated,
3174                );
3175                let bridged_count = bridged_txns.len();
3176                banking.transactions.extend(bridged_txns);
3177
3178                // Re-run velocity computation so bridged txns also get features
3179                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3180                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3181                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3182                        &mut banking.transactions,
3183                    );
3184                }
3185
3186                // Recompute suspicious count after bridging
3187                banking.suspicious_count = banking
3188                    .transactions
3189                    .iter()
3190                    .filter(|t| t.is_suspicious)
3191                    .count();
3192                stats.banking_transaction_count = banking.transactions.len();
3193                stats.banking_suspicious_count = banking.suspicious_count;
3194            }
3195        }
3196
3197        // Phase 13: Graph Export
3198        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3199
3200        // Phase 14: LLM Enrichment
3201        self.phase_llm_enrichment(&mut stats);
3202
3203        // Phase 15: Diffusion Enhancement
3204        self.phase_diffusion_enhancement(&entries, &mut stats);
3205
3206        // Phase 16: Causal Overlay
3207        self.phase_causal_overlay(&mut stats);
3208
3209        // Phase 17: Bank Reconciliation + Financial Statements
3210        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3211        // provision data (from accounting_standards / tax snapshots) can be wired in.
3212        let mut financial_reporting = self.phase_financial_reporting(
3213            &document_flows,
3214            &entries,
3215            &coa,
3216            &hr,
3217            &audit,
3218            &mut stats,
3219        )?;
3220
3221        // BS coherence check: assets = liabilities + equity
3222        {
3223            use datasynth_core::models::StatementType;
3224            for stmt in &financial_reporting.consolidated_statements {
3225                if stmt.statement_type == StatementType::BalanceSheet {
3226                    let total_assets: rust_decimal::Decimal = stmt
3227                        .line_items
3228                        .iter()
3229                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3230                        .map(|li| li.amount)
3231                        .sum();
3232                    let total_le: rust_decimal::Decimal = stmt
3233                        .line_items
3234                        .iter()
3235                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3236                        .map(|li| li.amount)
3237                        .sum();
3238                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3239                        warn!(
3240                            "BS equation imbalance: assets={}, L+E={}",
3241                            total_assets, total_le
3242                        );
3243                    }
3244                }
3245            }
3246        }
3247
3248        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3249        let accounting_standards =
3250            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3251
3252        // Phase 18a: Merge ECL journal entries into main GL
3253        if !accounting_standards.ecl_journal_entries.is_empty() {
3254            debug!(
3255                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3256                accounting_standards.ecl_journal_entries.len()
3257            );
3258            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3259        }
3260
3261        // Phase 18a: Merge provision journal entries into main GL
3262        if !accounting_standards.provision_journal_entries.is_empty() {
3263            debug!(
3264                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3265                accounting_standards.provision_journal_entries.len()
3266            );
3267            entries.extend(
3268                accounting_standards
3269                    .provision_journal_entries
3270                    .iter()
3271                    .cloned(),
3272            );
3273        }
3274
3275        // Phase 18b: OCPM Events (after all process data is available)
3276        let mut ocpm = self.phase_ocpm_events(
3277            &document_flows,
3278            &sourcing,
3279            &hr,
3280            &manufacturing_snap,
3281            &banking,
3282            &audit,
3283            &financial_reporting,
3284            &mut stats,
3285        )?;
3286
3287        // Emit OCPM events to stream sink
3288        if let Some(ref event_log) = ocpm.event_log {
3289            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3290        }
3291
3292        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3293        if let Some(ref event_log) = ocpm.event_log {
3294            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3295            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3296                std::collections::HashMap::new();
3297            for (idx, event) in event_log.events.iter().enumerate() {
3298                if let Some(ref doc_ref) = event.document_ref {
3299                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3300                }
3301            }
3302
3303            if !doc_index.is_empty() {
3304                let mut annotated = 0usize;
3305                for entry in &mut entries {
3306                    let doc_id_str = entry.header.document_id.to_string();
3307                    // Collect matching event indices from document_id and reference
3308                    let mut matched_indices: Vec<usize> = Vec::new();
3309                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3310                        matched_indices.extend(indices);
3311                    }
3312                    if let Some(ref reference) = entry.header.reference {
3313                        let bare_ref = reference
3314                            .find(':')
3315                            .map(|i| &reference[i + 1..])
3316                            .unwrap_or(reference.as_str());
3317                        if let Some(indices) = doc_index.get(bare_ref) {
3318                            for &idx in indices {
3319                                if !matched_indices.contains(&idx) {
3320                                    matched_indices.push(idx);
3321                                }
3322                            }
3323                        }
3324                    }
3325                    // Apply matches to JE header
3326                    if !matched_indices.is_empty() {
3327                        for &idx in &matched_indices {
3328                            let event = &event_log.events[idx];
3329                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3330                                entry.header.ocpm_event_ids.push(event.event_id);
3331                            }
3332                            for obj_ref in &event.object_refs {
3333                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3334                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3335                                }
3336                            }
3337                            if entry.header.ocpm_case_id.is_none() {
3338                                entry.header.ocpm_case_id = event.case_id;
3339                            }
3340                        }
3341                        annotated += 1;
3342                    }
3343                }
3344                debug!(
3345                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3346                    annotated
3347                );
3348            }
3349        }
3350
3351        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3352        // IC eliminations, opening balances, standards-driven entries) so
3353        // every JournalEntry carries at least one `ocpm_event_ids` link.
3354        if let Some(ref mut event_log) = ocpm.event_log {
3355            let synthesized =
3356                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3357            if synthesized > 0 {
3358                info!(
3359                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3360                );
3361            }
3362
3363            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3364            // events and their owning CaseTrace. Without this, every exported
3365            // OCEL event has `is_anomaly = false` even when the underlying JE
3366            // was flagged.
3367            let anomaly_events =
3368                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3369            if anomaly_events > 0 {
3370                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3371            }
3372
3373            // Phase 18f: Inject process-variant imperfections (rework, skipped
3374            // steps, out-of-order events) so conformance checkers see
3375            // realistic variant counts and fitness < 1.0. Uses the P2P
3376            // process rates as the single source of truth.
3377            let p2p_cfg = &self.config.ocpm.p2p_process;
3378            let any_imperfection = p2p_cfg.rework_probability > 0.0
3379                || p2p_cfg.skip_step_probability > 0.0
3380                || p2p_cfg.out_of_order_probability > 0.0;
3381            if any_imperfection {
3382                use rand_chacha::rand_core::SeedableRng;
3383                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3384                    rework_rate: p2p_cfg.rework_probability,
3385                    skip_rate: p2p_cfg.skip_step_probability,
3386                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3387                };
3388                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3389                let stats =
3390                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3391                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3392                    info!(
3393                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3394                        stats.rework, stats.skipped, stats.out_of_order
3395                    );
3396                }
3397            }
3398        }
3399
3400        // Phase 19: Sales Quotes, Management KPIs, Budgets
3401        let sales_kpi_budgets =
3402            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3403
3404        // Phase 22: Treasury Data Generation
3405        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3406        // are included in the pre-tax income used by phase_tax_generation.
3407        let treasury =
3408            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3409
3410        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3411        if !treasury.journal_entries.is_empty() {
3412            debug!(
3413                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3414                treasury.journal_entries.len()
3415            );
3416            entries.extend(treasury.journal_entries.iter().cloned());
3417        }
3418
3419        // Phase 20: Tax Generation
3420        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3421
3422        // Phase 20 JEs: Merge tax posting journal entries into main GL
3423        if !tax.tax_posting_journal_entries.is_empty() {
3424            debug!(
3425                "Merging {} tax posting JEs into GL",
3426                tax.tax_posting_journal_entries.len()
3427            );
3428            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3429        }
3430
3431        // Phase 20b: FINAL fraud behavioral bias sweep.
3432        //
3433        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3434        // period close) extend `entries` with new journal entries that may
3435        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3436        // already-fraudulent transactions). Those late additions miss the
3437        // Phase 8b sweep and ship without bias applied — which is exactly
3438        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3439        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3440        //
3441        // Running the sweep one more time here guarantees every is_fraud
3442        // entry — regardless of which phase added it — has bias applied.
3443        // `!is_anomaly` gates out anomaly-injector entries (which already
3444        // got biased inline); the sweep is otherwise idempotent-ish:
3445        // weekend / off_hours re-fire to another valid weekend / off-hour,
3446        // post_close is guarded by `!is_post_close`, and round-dollar
3447        // rescaling on an already-round amount is a no-op (ratio = 1).
3448        {
3449            use datasynth_core::fraud_bias::{
3450                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3451            };
3452            use rand_chacha::rand_core::SeedableRng;
3453            let cfg = FraudBehavioralBiasConfig::default();
3454            if cfg.enabled {
3455                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3456                let mut swept = 0usize;
3457                for entry in entries.iter_mut() {
3458                    if entry.header.is_fraud && !entry.header.is_anomaly {
3459                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3460                        swept += 1;
3461                    }
3462                }
3463                if swept > 0 {
3464                    info!(
3465                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3466                         non-anomaly fraud entries (covers late-added JEs from \
3467                         ECL / provisions / treasury / tax / period-close)"
3468                    );
3469                }
3470            }
3471        }
3472
3473        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3474        // Build supplementary cash flow items from upstream JE data (depreciation,
3475        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3476        {
3477            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3478
3479            let framework_str = {
3480                use datasynth_config::schema::AccountingFrameworkConfig;
3481                match self
3482                    .config
3483                    .accounting_standards
3484                    .framework
3485                    .unwrap_or_default()
3486                {
3487                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3488                        "IFRS"
3489                    }
3490                    _ => "US_GAAP",
3491                }
3492            };
3493
3494            // Sum depreciation debits (account 6000) from close JEs
3495            let depreciation_total: rust_decimal::Decimal = entries
3496                .iter()
3497                .filter(|je| je.header.document_type == "CL")
3498                .flat_map(|je| je.lines.iter())
3499                .filter(|l| l.gl_account.starts_with("6000"))
3500                .map(|l| l.debit_amount)
3501                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3502
3503            // Sum interest expense debits (account 7100)
3504            let interest_paid: rust_decimal::Decimal = entries
3505                .iter()
3506                .flat_map(|je| je.lines.iter())
3507                .filter(|l| l.gl_account.starts_with("7100"))
3508                .map(|l| l.debit_amount)
3509                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3510
3511            // Sum tax expense debits (account 8000)
3512            let tax_paid: rust_decimal::Decimal = entries
3513                .iter()
3514                .flat_map(|je| je.lines.iter())
3515                .filter(|l| l.gl_account.starts_with("8000"))
3516                .map(|l| l.debit_amount)
3517                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3518
3519            // Sum capex debits on fixed assets (account 1500)
3520            let capex: rust_decimal::Decimal = entries
3521                .iter()
3522                .flat_map(|je| je.lines.iter())
3523                .filter(|l| l.gl_account.starts_with("1500"))
3524                .map(|l| l.debit_amount)
3525                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3526
3527            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3528            let dividends_paid: rust_decimal::Decimal = entries
3529                .iter()
3530                .flat_map(|je| je.lines.iter())
3531                .filter(|l| l.gl_account == "2170")
3532                .map(|l| l.debit_amount)
3533                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3534
3535            let cf_data = CashFlowSourceData {
3536                depreciation_total,
3537                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3538                delta_ar: rust_decimal::Decimal::ZERO,
3539                delta_ap: rust_decimal::Decimal::ZERO,
3540                delta_inventory: rust_decimal::Decimal::ZERO,
3541                capex,
3542                debt_issuance: rust_decimal::Decimal::ZERO,
3543                debt_repayment: rust_decimal::Decimal::ZERO,
3544                interest_paid,
3545                tax_paid,
3546                dividends_paid,
3547                framework: framework_str.to_string(),
3548            };
3549
3550            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3551            if !enhanced_cf_items.is_empty() {
3552                // Merge into ALL cash flow statements (standalone + consolidated)
3553                use datasynth_core::models::StatementType;
3554                let merge_count = enhanced_cf_items.len();
3555                for stmt in financial_reporting
3556                    .financial_statements
3557                    .iter_mut()
3558                    .chain(financial_reporting.consolidated_statements.iter_mut())
3559                    .chain(
3560                        financial_reporting
3561                            .standalone_statements
3562                            .values_mut()
3563                            .flat_map(|v| v.iter_mut()),
3564                    )
3565                {
3566                    if stmt.statement_type == StatementType::CashFlowStatement {
3567                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3568                    }
3569                }
3570                info!(
3571                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3572                    merge_count
3573                );
3574            }
3575        }
3576
3577        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3578        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3579        self.generate_notes_to_financial_statements(
3580            &mut financial_reporting,
3581            &accounting_standards,
3582            &tax,
3583            &hr,
3584            &audit,
3585            &treasury,
3586        );
3587
3588        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3589        // When we have 2+ companies, derive segment data from actual journal entries
3590        // to complement or replace the FS-generator-based segments.
3591        if self.config.companies.len() >= 2 && !entries.is_empty() {
3592            let companies: Vec<(String, String)> = self
3593                .config
3594                .companies
3595                .iter()
3596                .map(|c| (c.code.clone(), c.name.clone()))
3597                .collect();
3598            let ic_elim: rust_decimal::Decimal =
3599                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3600            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3601                .unwrap_or(NaiveDate::MIN);
3602            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3603            let period_label = format!(
3604                "{}-{:02}",
3605                end_date.year(),
3606                (end_date - chrono::Days::new(1)).month()
3607            );
3608
3609            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3610            let (je_segments, je_recon) =
3611                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3612            if !je_segments.is_empty() {
3613                info!(
3614                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3615                    je_segments.len(),
3616                    ic_elim,
3617                );
3618                // Replace if existing segment_reports were empty; otherwise supplement
3619                if financial_reporting.segment_reports.is_empty() {
3620                    financial_reporting.segment_reports = je_segments;
3621                    financial_reporting.segment_reconciliations = vec![je_recon];
3622                } else {
3623                    financial_reporting.segment_reports.extend(je_segments);
3624                    financial_reporting.segment_reconciliations.push(je_recon);
3625                }
3626            }
3627        }
3628
3629        // Phase 21: ESG Data Generation
3630        let esg_snap =
3631            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3632
3633        // Phase 23: Project Accounting Data Generation
3634        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3635
3636        // Phase 24: Process Evolution + Organizational Events
3637        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3638
3639        // Phase 24b: Disruption Events
3640        let disruption_events = self.phase_disruption_events(&mut stats)?;
3641
3642        // Phase 27: Bi-Temporal Vendor Version Chains
3643        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3644
3645        // Phase 28: Entity Relationship Graph + Cross-Process Links
3646        let (entity_relationship_graph, cross_process_links) =
3647            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3648
3649        // Phase 29: Industry-specific GL accounts
3650        let industry_output = self.phase_industry_data(&mut stats);
3651
3652        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3653        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3654
3655        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3656        //
3657        // The neural / hybrid diffusion path was a documented L2 stub
3658        // in v3.x; actual neural-network training requires ML
3659        // infrastructure (PyTorch / candle bindings, GPU access,
3660        // training loops) that was never wired through the
3661        // orchestrator. Rather than keep a silently-no-op block that
3662        // misleads users into thinking neural training happens, v4.0
3663        // acknowledges the config — exposing stats so downstream
3664        // tooling can see the request — but emits a clear warning
3665        // when a non-statistical backend is requested. The statistical
3666        // diffusion backend continues to run via
3667        // `phase_diffusion_enhancement`.
3668        //
3669        // Users who need real neural diffusion: track the roadmap item
3670        // in the v4.x backlog and consider contributing the backend
3671        // (the `DiffusionBackend` trait is the integration point).
3672        if self.config.diffusion.enabled
3673            && (self.config.diffusion.backend == "neural"
3674                || self.config.diffusion.backend == "hybrid")
3675        {
3676            let neural = &self.config.diffusion.neural;
3677            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3678            stats.neural_hybrid_weight = Some(weight);
3679            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3680            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3681            warn!(
3682                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3683                 the neural/hybrid training path is not yet shipped. Config \
3684                 is captured in stats (weight={weight:.2}, strategy={}, \
3685                 columns={}) but no neural training runs. Statistical \
3686                 diffusion (backend='statistical') continues to work.",
3687                self.config.diffusion.backend,
3688                neural.hybrid_strategy,
3689                neural.neural_columns.len(),
3690            );
3691        }
3692
3693        // Phase 19b: Hypergraph Export (after all data is available)
3694        self.phase_hypergraph_export(
3695            &coa,
3696            &entries,
3697            &document_flows,
3698            &sourcing,
3699            &hr,
3700            &manufacturing_snap,
3701            &banking,
3702            &audit,
3703            &financial_reporting,
3704            &ocpm,
3705            &compliance_regulations,
3706            &mut stats,
3707        )?;
3708
3709        // Phase 10c: Additional graph builders (approval, entity, banking)
3710        // These run after all data is available since they need banking/IC data.
3711        if self.phase_config.generate_graph_export {
3712            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3713        }
3714
3715        // Log informational messages for config sections not yet fully wired
3716        if self.config.streaming.enabled {
3717            info!("Note: streaming config is enabled but batch mode does not use it");
3718        }
3719        if self.config.vendor_network.enabled {
3720            debug!("Vendor network config available; relationship graph generation is partial");
3721        }
3722        if self.config.customer_segmentation.enabled {
3723            debug!("Customer segmentation config available; segment-aware generation is partial");
3724        }
3725
3726        // Log final resource statistics
3727        let resource_stats = self.resource_guard.stats();
3728        info!(
3729            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3730            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3731            resource_stats.disk.estimated_bytes_written,
3732            resource_stats.degradation_level
3733        );
3734
3735        // Flush any remaining stream sink data
3736        if let Some(ref sink) = self.phase_sink {
3737            if let Err(e) = sink.flush() {
3738                warn!("Stream sink flush failed: {e}");
3739            }
3740        }
3741
3742        // Build data lineage graph
3743        let lineage = self.build_lineage_graph();
3744
3745        // Evaluate quality gates if enabled in config
3746        let gate_result = if self.config.quality_gates.enabled {
3747            let profile_name = &self.config.quality_gates.profile;
3748            match datasynth_eval::gates::get_profile(profile_name) {
3749                Some(profile) => {
3750                    // Build an evaluation populated with actual generation metrics.
3751                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3752
3753                    // Populate balance sheet evaluation from balance validation results
3754                    if balance_validation.validated {
3755                        eval.coherence.balance =
3756                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3757                                equation_balanced: balance_validation.is_balanced,
3758                                max_imbalance: (balance_validation.total_debits
3759                                    - balance_validation.total_credits)
3760                                    .abs(),
3761                                periods_evaluated: 1,
3762                                periods_imbalanced: if balance_validation.is_balanced {
3763                                    0
3764                                } else {
3765                                    1
3766                                },
3767                                period_results: Vec::new(),
3768                                companies_evaluated: self.config.companies.len(),
3769                            });
3770                    }
3771
3772                    // Set coherence passes based on balance validation
3773                    eval.coherence.passes = balance_validation.is_balanced;
3774                    if !balance_validation.is_balanced {
3775                        eval.coherence
3776                            .failures
3777                            .push("Balance sheet equation not satisfied".to_string());
3778                    }
3779
3780                    // Set statistical score based on entry count (basic sanity)
3781                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3782                    eval.statistical.passes = !entries.is_empty();
3783
3784                    // Set quality score from data quality stats
3785                    eval.quality.overall_score = 0.9; // Default high for generated data
3786                    eval.quality.passes = true;
3787
3788                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3789                    info!(
3790                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3791                        profile_name, result.gates_passed, result.gates_total, result.summary
3792                    );
3793                    Some(result)
3794                }
3795                None => {
3796                    warn!(
3797                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3798                        profile_name
3799                    );
3800                    None
3801                }
3802            }
3803        } else {
3804            None
3805        };
3806
3807        // Generate internal controls if enabled
3808        let internal_controls = if self.config.internal_controls.enabled {
3809            InternalControl::standard_controls()
3810        } else {
3811            Vec::new()
3812        };
3813
3814        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3815        // phases (including fraud-bias sweep at Phase 20b) so derived
3816        // outputs reflect final data.
3817        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3818
3819        // v3.5.1: statistical validation over the final amount
3820        // distribution. Runs *after* all JE-adding phases so the report
3821        // reflects everything the user will see in the output. Returns
3822        // `None` unless `distributions.validation.enabled = true`.
3823        let statistical_validation = self.phase_statistical_validation(&entries)?;
3824
3825        // v4.1.3+: interconnectivity snapshot — tier assignments,
3826        // value-segment labels, industry-specific metadata. Runs after
3827        // master data is settled so it can index stable IDs.
3828        let interconnectivity = self.phase_interconnectivity();
3829
3830        // SP5.2 — snapshot the CoA semantic prior (if any) into the result so
3831        // output_writer can use it as a fallback index for account_description
3832        // resolution when the synthetic CoA index misses.
3833        let coa_semantic_prior = self
3834            .cached_priors
3835            .as_ref()
3836            .and_then(|p| p.coa_semantic.clone());
3837
3838        Ok(EnhancedGenerationResult {
3839            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3840            master_data: std::mem::take(&mut self.master_data),
3841            document_flows,
3842            subledger,
3843            ocpm,
3844            audit,
3845            banking,
3846            graph_export,
3847            sourcing,
3848            financial_reporting,
3849            hr,
3850            accounting_standards,
3851            manufacturing: manufacturing_snap,
3852            sales_kpi_budgets,
3853            tax,
3854            esg: esg_snap,
3855            treasury,
3856            project_accounting,
3857            process_evolution,
3858            organizational_events,
3859            disruption_events,
3860            intercompany,
3861            journal_entries: entries,
3862            anomaly_labels,
3863            balance_validation,
3864            data_quality_stats,
3865            quality_issues,
3866            statistics: stats,
3867            lineage: Some(lineage),
3868            gate_result,
3869            internal_controls,
3870            sod_violations,
3871            opening_balances,
3872            subledger_reconciliation,
3873            counterfactual_pairs,
3874            red_flags,
3875            collusion_rings,
3876            temporal_vendor_chains,
3877            entity_relationship_graph,
3878            cross_process_links,
3879            industry_output,
3880            coa_semantic_prior,
3881            compliance_regulations,
3882            analytics_metadata,
3883            statistical_validation,
3884            interconnectivity,
3885        })
3886    }
3887
3888    /// v4.1.3+: populate the interconnectivity snapshot from
3889    /// previously-inert schema sections. Empty when all sections are
3890    /// disabled.
3891    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3892        use rand::{RngExt, SeedableRng};
3893        use rand_chacha::ChaCha8Rng;
3894
3895        let mut snap = InterconnectivitySnapshot::default();
3896        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3897
3898        // --- Vendor network ---
3899        let vn = &self.config.vendor_network;
3900        if vn.enabled {
3901            let total = self.master_data.vendors.len();
3902            if total > 0 {
3903                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3904                let remaining_after_t1 = total.saturating_sub(tier1_count);
3905                let depth = vn.depth.clamp(1, 3);
3906                let tier2_count = if depth >= 2 {
3907                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3908                    (tier1_count * avg).min(remaining_after_t1)
3909                } else {
3910                    0
3911                };
3912                let tier3_count = total
3913                    .saturating_sub(tier1_count)
3914                    .saturating_sub(tier2_count);
3915
3916                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3917                    let tier = if idx < tier1_count {
3918                        1
3919                    } else if idx < tier1_count + tier2_count {
3920                        2
3921                    } else {
3922                        3
3923                    };
3924                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3925
3926                    // Cluster assignment via configured ratios.
3927                    let cl = &vn.clusters;
3928                    let roll: f64 = rng.random();
3929                    let cluster = if roll < cl.reliable_strategic {
3930                        "reliable_strategic"
3931                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3932                        "standard_operational"
3933                    } else if roll
3934                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3935                    {
3936                        "transactional"
3937                    } else {
3938                        "problematic"
3939                    };
3940                    snap.vendor_clusters
3941                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3942                }
3943                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3944            }
3945        }
3946
3947        // --- Customer segmentation ---
3948        let cs = &self.config.customer_segmentation;
3949        if cs.enabled {
3950            let seg = &cs.value_segments;
3951            for customer in &self.master_data.customers {
3952                let roll: f64 = rng.random();
3953                let value_segment = if roll < seg.enterprise.customer_share {
3954                    "enterprise"
3955                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3956                    "mid_market"
3957                } else if roll
3958                    < seg.enterprise.customer_share
3959                        + seg.mid_market.customer_share
3960                        + seg.smb.customer_share
3961                {
3962                    "smb"
3963                } else {
3964                    "consumer"
3965                };
3966                snap.customer_value_segments
3967                    .push((customer.customer_id.clone(), value_segment.to_string()));
3968
3969                let roll2: f64 = rng.random();
3970                let life = &cs.lifecycle;
3971                let lifecycle = if roll2 < life.prospect_rate {
3972                    "prospect"
3973                } else if roll2 < life.prospect_rate + life.new_rate {
3974                    "new"
3975                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3976                    "growth"
3977                } else if roll2
3978                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3979                {
3980                    "mature"
3981                } else if roll2
3982                    < life.prospect_rate
3983                        + life.new_rate
3984                        + life.growth_rate
3985                        + life.mature_rate
3986                        + life.at_risk_rate
3987                {
3988                    "at_risk"
3989                } else if roll2
3990                    < life.prospect_rate
3991                        + life.new_rate
3992                        + life.growth_rate
3993                        + life.mature_rate
3994                        + life.at_risk_rate
3995                        + life.churned_rate
3996                {
3997                    "churned"
3998                } else {
3999                    "won_back"
4000                };
4001                snap.customer_lifecycle_stages
4002                    .push((customer.customer_id.clone(), lifecycle.to_string()));
4003            }
4004        }
4005
4006        // --- Industry-specific metadata (minimal) ---
4007        let is = &self.config.industry_specific;
4008        if is.enabled {
4009            snap.industry_metadata.push(format!(
4010                "industry_specific.enabled=true (industry={:?})",
4011                self.config.global.industry
4012            ));
4013        }
4014
4015        snap
4016    }
4017
4018    // ========================================================================
4019    // Generation Phase Methods
4020    // ========================================================================
4021
4022    /// Phase 1: Generate Chart of Accounts and update statistics.
4023    fn phase_chart_of_accounts(
4024        &mut self,
4025        stats: &mut EnhancedGenerationStatistics,
4026    ) -> SynthResult<Arc<ChartOfAccounts>> {
4027        info!("Phase 1: Generating Chart of Accounts");
4028        let coa = self.generate_coa()?;
4029        stats.accounts_count = coa.account_count();
4030        info!(
4031            "Chart of Accounts generated: {} accounts",
4032            stats.accounts_count
4033        );
4034        self.check_resources_with_log("post-coa")?;
4035        Ok(coa)
4036    }
4037
4038    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
4039    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4040        if self.phase_config.generate_master_data {
4041            info!("Phase 2: Generating Master Data");
4042            self.generate_master_data()?;
4043            stats.vendor_count = self.master_data.vendors.len();
4044            stats.customer_count = self.master_data.customers.len();
4045            stats.material_count = self.master_data.materials.len();
4046            stats.asset_count = self.master_data.assets.len();
4047            stats.employee_count = self.master_data.employees.len();
4048            info!(
4049                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4050                stats.vendor_count, stats.customer_count, stats.material_count,
4051                stats.asset_count, stats.employee_count
4052            );
4053            self.check_resources_with_log("post-master-data")?;
4054        } else {
4055            debug!("Phase 2: Skipped (master data generation disabled)");
4056        }
4057        Ok(())
4058    }
4059
4060    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
4061    fn phase_document_flows(
4062        &mut self,
4063        stats: &mut EnhancedGenerationStatistics,
4064    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4065        let mut document_flows = DocumentFlowSnapshot::default();
4066        let mut subledger = SubledgerSnapshot::default();
4067        // Dunning JEs (interest + charges) accumulated here and merged into the
4068        // main FA-JE list below so they appear in the GL.
4069        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4070
4071        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4072            info!("Phase 3: Generating Document Flows");
4073            self.generate_document_flows(&mut document_flows)?;
4074            stats.p2p_chain_count = document_flows.p2p_chains.len();
4075            stats.o2c_chain_count = document_flows.o2c_chains.len();
4076            info!(
4077                "Document flows generated: {} P2P chains, {} O2C chains",
4078                stats.p2p_chain_count, stats.o2c_chain_count
4079            );
4080
4081            // Phase 3b: Link document flows to subledgers (for data coherence)
4082            debug!("Phase 3b: Linking document flows to subledgers");
4083            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4084            stats.ap_invoice_count = subledger.ap_invoices.len();
4085            stats.ar_invoice_count = subledger.ar_invoices.len();
4086            debug!(
4087                "Subledgers linked: {} AP invoices, {} AR invoices",
4088                stats.ap_invoice_count, stats.ar_invoice_count
4089            );
4090
4091            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
4092            // Without this step the subledger is systematically overstated because
4093            // amount_remaining is set at invoice creation and never reduced by
4094            // the payments that were generated in the document-flow phase.
4095            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4096            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4097            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4098            debug!("Payment settlements applied to AP and AR subledgers");
4099
4100            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
4101            // The as-of date is the last day of the configured period.
4102            if let Ok(start_date) =
4103                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4104            {
4105                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4106                    - chrono::Days::new(1);
4107                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4108                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4109                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4110                // derived from JE-level aggregation and will typically differ. This is a known
4111                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4112                // generated independently. A future reconciliation phase should align them by
4113                // using subledger totals as the authoritative source for BS Receivables.
4114                for company in &self.config.companies {
4115                    let ar_report = ARAgingReport::from_invoices(
4116                        company.code.clone(),
4117                        &subledger.ar_invoices,
4118                        as_of_date,
4119                    );
4120                    subledger.ar_aging_reports.push(ar_report);
4121
4122                    let ap_report = APAgingReport::from_invoices(
4123                        company.code.clone(),
4124                        &subledger.ap_invoices,
4125                        as_of_date,
4126                    );
4127                    subledger.ap_aging_reports.push(ap_report);
4128                }
4129                debug!(
4130                    "AR/AP aging reports built: {} AR, {} AP",
4131                    subledger.ar_aging_reports.len(),
4132                    subledger.ap_aging_reports.len()
4133                );
4134
4135                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4136                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4137                {
4138                    use datasynth_generators::DunningGenerator;
4139                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4140                    for company in &self.config.companies {
4141                        let currency = company.currency.as_str();
4142                        // Collect mutable references to AR invoices for this company
4143                        // (dunning generator updates dunning_info on invoices in-place).
4144                        let mut company_invoices: Vec<
4145                            datasynth_core::models::subledger::ar::ARInvoice,
4146                        > = subledger
4147                            .ar_invoices
4148                            .iter()
4149                            .filter(|inv| inv.company_code == company.code)
4150                            .cloned()
4151                            .collect();
4152
4153                        if company_invoices.is_empty() {
4154                            continue;
4155                        }
4156
4157                        let result = dunning_gen.execute_dunning_run(
4158                            &company.code,
4159                            as_of_date,
4160                            &mut company_invoices,
4161                            currency,
4162                        );
4163
4164                        // Write back updated dunning info to the main AR invoice list
4165                        for updated in &company_invoices {
4166                            if let Some(orig) = subledger
4167                                .ar_invoices
4168                                .iter_mut()
4169                                .find(|i| i.invoice_number == updated.invoice_number)
4170                            {
4171                                orig.dunning_info = updated.dunning_info.clone();
4172                            }
4173                        }
4174
4175                        subledger.dunning_runs.push(result.dunning_run);
4176                        subledger.dunning_letters.extend(result.letters);
4177                        // Dunning JEs (interest + charges) collected into local buffer.
4178                        dunning_journal_entries.extend(result.journal_entries);
4179                    }
4180                    debug!(
4181                        "Dunning runs complete: {} runs, {} letters",
4182                        subledger.dunning_runs.len(),
4183                        subledger.dunning_letters.len()
4184                    );
4185                }
4186            }
4187
4188            self.check_resources_with_log("post-document-flows")?;
4189        } else {
4190            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4191        }
4192
4193        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4194        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4195        if !self.master_data.assets.is_empty() {
4196            debug!("Generating FA subledger records");
4197            let company_code = self
4198                .config
4199                .companies
4200                .first()
4201                .map(|c| c.code.as_str())
4202                .unwrap_or("1000");
4203            let currency = self
4204                .config
4205                .companies
4206                .first()
4207                .map(|c| c.currency.as_str())
4208                .unwrap_or("USD");
4209
4210            let mut fa_gen = datasynth_generators::FAGenerator::new(
4211                datasynth_generators::FAGeneratorConfig::default(),
4212                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4213            );
4214
4215            for asset in &self.master_data.assets {
4216                let (record, je) = fa_gen.generate_asset_acquisition(
4217                    company_code,
4218                    &format!("{:?}", asset.asset_class),
4219                    &asset.description,
4220                    asset.acquisition_date,
4221                    currency,
4222                    asset.cost_center.as_deref(),
4223                );
4224                subledger.fa_records.push(record);
4225                fa_journal_entries.push(je);
4226            }
4227
4228            stats.fa_subledger_count = subledger.fa_records.len();
4229            debug!(
4230                "FA subledger records generated: {} (with {} acquisition JEs)",
4231                stats.fa_subledger_count,
4232                fa_journal_entries.len()
4233            );
4234        }
4235
4236        // Generate Inventory subledger records from master data materials
4237        if !self.master_data.materials.is_empty() {
4238            debug!("Generating Inventory subledger records");
4239            let first_company = self.config.companies.first();
4240            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4241            let inv_currency = first_company
4242                .map(|c| c.currency.clone())
4243                .unwrap_or_else(|| "USD".to_string());
4244
4245            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4246                datasynth_generators::InventoryGeneratorConfig::default(),
4247                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4248                inv_currency.clone(),
4249            );
4250
4251            for (i, material) in self.master_data.materials.iter().enumerate() {
4252                let plant = format!("PLANT{:02}", (i % 3) + 1);
4253                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4254                let initial_qty = rust_decimal::Decimal::from(
4255                    material
4256                        .safety_stock
4257                        .to_string()
4258                        .parse::<i64>()
4259                        .unwrap_or(100),
4260                );
4261
4262                let position = inv_gen.generate_position(
4263                    company_code,
4264                    &plant,
4265                    &storage_loc,
4266                    &material.material_id,
4267                    &material.description,
4268                    initial_qty,
4269                    Some(material.standard_cost),
4270                    &inv_currency,
4271                );
4272                subledger.inventory_positions.push(position);
4273            }
4274
4275            stats.inventory_subledger_count = subledger.inventory_positions.len();
4276            debug!(
4277                "Inventory subledger records generated: {}",
4278                stats.inventory_subledger_count
4279            );
4280        }
4281
4282        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4283        if !subledger.fa_records.is_empty() {
4284            if let Ok(start_date) =
4285                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4286            {
4287                let company_code = self
4288                    .config
4289                    .companies
4290                    .first()
4291                    .map(|c| c.code.as_str())
4292                    .unwrap_or("1000");
4293                let fiscal_year = start_date.year();
4294                let start_period = start_date.month();
4295                let end_period =
4296                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4297
4298                let depr_cfg = FaDepreciationScheduleConfig {
4299                    fiscal_year,
4300                    start_period,
4301                    end_period,
4302                    seed_offset: 800,
4303                };
4304                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4305                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4306                let run_count = runs.len();
4307                subledger.depreciation_runs = runs;
4308                debug!(
4309                    "Depreciation runs generated: {} runs for {} periods",
4310                    run_count, self.config.global.period_months
4311                );
4312            }
4313        }
4314
4315        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4316        if !subledger.inventory_positions.is_empty() {
4317            if let Ok(start_date) =
4318                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4319            {
4320                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4321                    - chrono::Days::new(1);
4322
4323                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4324                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4325
4326                for company in &self.config.companies {
4327                    let result = inv_val_gen.generate(
4328                        &company.code,
4329                        &subledger.inventory_positions,
4330                        as_of_date,
4331                    );
4332                    subledger.inventory_valuations.push(result);
4333                }
4334                debug!(
4335                    "Inventory valuations generated: {} company reports",
4336                    subledger.inventory_valuations.len()
4337                );
4338            }
4339        }
4340
4341        Ok((document_flows, subledger, fa_journal_entries))
4342    }
4343
4344    /// Phase 3c: Generate OCPM events from document flows.
4345    #[allow(clippy::too_many_arguments)]
4346    fn phase_ocpm_events(
4347        &mut self,
4348        document_flows: &DocumentFlowSnapshot,
4349        sourcing: &SourcingSnapshot,
4350        hr: &HrSnapshot,
4351        manufacturing: &ManufacturingSnapshot,
4352        banking: &BankingSnapshot,
4353        audit: &AuditSnapshot,
4354        financial_reporting: &FinancialReportingSnapshot,
4355        stats: &mut EnhancedGenerationStatistics,
4356    ) -> SynthResult<OcpmSnapshot> {
4357        let degradation = self.check_resources()?;
4358        if degradation >= DegradationLevel::Reduced {
4359            debug!(
4360                "Phase skipped due to resource pressure (degradation: {:?})",
4361                degradation
4362            );
4363            return Ok(OcpmSnapshot::default());
4364        }
4365        if self.phase_config.generate_ocpm_events {
4366            info!("Phase 3c: Generating OCPM Events");
4367            let ocpm_snapshot = self.generate_ocpm_events(
4368                document_flows,
4369                sourcing,
4370                hr,
4371                manufacturing,
4372                banking,
4373                audit,
4374                financial_reporting,
4375            )?;
4376            stats.ocpm_event_count = ocpm_snapshot.event_count;
4377            stats.ocpm_object_count = ocpm_snapshot.object_count;
4378            stats.ocpm_case_count = ocpm_snapshot.case_count;
4379            info!(
4380                "OCPM events generated: {} events, {} objects, {} cases",
4381                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4382            );
4383            self.check_resources_with_log("post-ocpm")?;
4384            Ok(ocpm_snapshot)
4385        } else {
4386            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4387            Ok(OcpmSnapshot::default())
4388        }
4389    }
4390
4391    /// Phase 4: Generate journal entries from document flows and standalone generation.
4392    fn phase_journal_entries(
4393        &mut self,
4394        coa: &Arc<ChartOfAccounts>,
4395        document_flows: &DocumentFlowSnapshot,
4396        _stats: &mut EnhancedGenerationStatistics,
4397    ) -> SynthResult<Vec<JournalEntry>> {
4398        let mut entries = Vec::new();
4399
4400        // Phase 4a: Generate JEs from document flows (for data coherence)
4401        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4402            debug!("Phase 4a: Generating JEs from document flows");
4403            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4404            debug!("Generated {} JEs from document flows", flow_entries.len());
4405            entries.extend(flow_entries);
4406        }
4407
4408        // Phase 4b: Generate standalone journal entries
4409        if self.phase_config.generate_journal_entries {
4410            info!("Phase 4: Generating Journal Entries");
4411            let je_entries = self.generate_journal_entries(coa)?;
4412            info!("Generated {} standalone journal entries", je_entries.len());
4413            entries.extend(je_entries);
4414        } else {
4415            debug!("Phase 4: Skipped (journal entry generation disabled)");
4416        }
4417
4418        // Phase 4c (shard mode): inject pre-built IC journal entries from
4419        // `ShardContext`. When running standalone (no group engine), this
4420        // is a no-op. See crate::shard_context::ShardContext for rationale.
4421        if let Some(ctx) = &self.shard_context {
4422            if !ctx.extra_journal_entries.is_empty() {
4423                debug!(
4424                    "Phase 4c: appending {} shard-mode IC journal entries",
4425                    ctx.extra_journal_entries.len()
4426                );
4427                entries.extend(ctx.extra_journal_entries.iter().cloned());
4428            }
4429        }
4430
4431        if !entries.is_empty() {
4432            // Note: stats.total_entries/total_line_items are set in generate()
4433            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4434            self.check_resources_with_log("post-journal-entries")?;
4435        }
4436
4437        Ok(entries)
4438    }
4439
4440    /// Phase 5: Inject anomalies into journal entries.
4441    fn phase_anomaly_injection(
4442        &mut self,
4443        entries: &mut [JournalEntry],
4444        actions: &DegradationActions,
4445        stats: &mut EnhancedGenerationStatistics,
4446    ) -> SynthResult<AnomalyLabels> {
4447        if self.phase_config.inject_anomalies
4448            && !entries.is_empty()
4449            && !actions.skip_anomaly_injection
4450        {
4451            info!("Phase 5: Injecting Anomalies");
4452            let result = self.inject_anomalies(entries)?;
4453            stats.anomalies_injected = result.labels.len();
4454            info!("Injected {} anomalies", stats.anomalies_injected);
4455            self.check_resources_with_log("post-anomaly-injection")?;
4456            Ok(result)
4457        } else if actions.skip_anomaly_injection {
4458            warn!("Phase 5: Skipped due to resource degradation");
4459            Ok(AnomalyLabels::default())
4460        } else {
4461            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4462            Ok(AnomalyLabels::default())
4463        }
4464    }
4465
4466    /// Phase 8d (W8.1): TB drift-correction pass.
4467    ///
4468    /// Builds a `RunningBalanceTracker` over all JEs assembled so far, attaches
4469    /// the TB anchor prior (when available), and — if `drift_correction_needed()`
4470    /// fires for any company — emits one balanced "SA" adjustment JE per company
4471    /// to pull the synthetic balances toward the corpus-median targets.
4472    ///
4473    /// No-op when no TB anchor is loaded (backwards-compatible).
4474    fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4475        // Only proceed when priors with a TB anchor are loaded.
4476        let tb_anchor = match &self.cached_priors {
4477            Some(priors) => match &priors.tb_anchor {
4478                Some(anchor) => anchor.clone(),
4479                None => return Ok(()),
4480            },
4481            None => return Ok(()),
4482        };
4483
4484        if !tb_anchor.has_data() {
4485            return Ok(());
4486        }
4487
4488        tracing::info!(
4489            target: "datasynth_runtime::tb_anchor",
4490            accounts = tb_anchor.per_account.len(),
4491            total_assets = tb_anchor.total_assets,
4492            "W8.1 — TB anchor loaded; running drift-correction pass"
4493        );
4494
4495        // Build a tracker over all current JEs.
4496        let tracker_config = BalanceTrackerConfig {
4497            validate_on_each_entry: false,
4498            track_history: false,
4499            fail_on_validation_error: false,
4500            ..Default::default()
4501        };
4502        let currency = self
4503            .config
4504            .companies
4505            .first()
4506            .map(|c| c.currency.clone())
4507            .unwrap_or_else(|| "USD".to_string());
4508
4509        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4510        tracker.set_tb_anchor(tb_anchor.clone());
4511        let _ = tracker.apply_entries(entries);
4512
4513        // SP5.1 — Diagnostic: log the number of accounts being tracked vs in the
4514        // anchor, plus the top-5 most-drifted accounts for each company so we
4515        // can distinguish "no drift" from "drift below threshold" at a glance.
4516        for company in &self.config.companies {
4517            let code = &company.code;
4518            let drifts = tracker.account_drift(code);
4519            let mut sorted_drifts = drifts.clone();
4520            sorted_drifts.sort_by(|a, b| {
4521                b.1.abs()
4522                    .partial_cmp(&a.1.abs())
4523                    .unwrap_or(std::cmp::Ordering::Equal)
4524            });
4525            let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4526            let correction_needed = tracker.drift_correction_needed(code);
4527            tracing::info!(
4528                target: "datasynth_runtime::tb_anchor",
4529                company = %code,
4530                anchor_accounts = tb_anchor.per_account.len(),
4531                tracked_accounts = drifts.len(),
4532                aggregate_drift = aggregate_drift,
4533                correction_needed = correction_needed,
4534                "W8.1 SP5.1 — per-company drift summary before correction"
4535            );
4536            for (acc, drift) in sorted_drifts.iter().take(5) {
4537                tracing::info!(
4538                    target: "datasynth_runtime::tb_anchor",
4539                    company = %code,
4540                    account = %acc,
4541                    drift = drift,
4542                    "W8.1 SP5.1 — top-5 drifted accounts"
4543                );
4544            }
4545        }
4546
4547        // Derive the posting date: use the last day of the simulation period.
4548        let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4549            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4550            .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4551
4552        // Distinct seed offset so drift-correction draws are independent of other phases.
4553        use rand_chacha::rand_core::SeedableRng as _;
4554        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4555
4556        let mut correction_count = 0usize;
4557        for company in &self.config.companies {
4558            let code = &company.code;
4559            if !tracker.drift_correction_needed(code) {
4560                tracing::debug!(
4561                    target: "datasynth_runtime::tb_anchor",
4562                    company = %code,
4563                    "W8.1 — drift_correction_needed returned false; skipping company"
4564                );
4565                continue;
4566            }
4567            if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4568                tracing::debug!(
4569                    target: "datasynth_runtime::tb_anchor",
4570                    company = %code,
4571                    lines = je.lines.len(),
4572                    debit = %je.total_debit(),
4573                    credit = %je.total_credit(),
4574                    "W8.1 — emitting drift-correction JE"
4575                );
4576                // Apply the correction to the tracker so the running state is current.
4577                let _ = tracker.apply_entry(&je);
4578                entries.push(je);
4579                correction_count += 1;
4580            }
4581        }
4582
4583        if correction_count > 0 {
4584            tracing::info!(
4585                target: "datasynth_runtime::tb_anchor",
4586                correction_count,
4587                "W8.1 — drift-correction pass emitted {} JE(s)",
4588                correction_count
4589            );
4590        } else {
4591            tracing::debug!(
4592                target: "datasynth_runtime::tb_anchor",
4593                "W8.1 — drift-correction pass: no corrections needed"
4594            );
4595        }
4596
4597        Ok(())
4598    }
4599
4600    /// Phase 6: Validate balance sheet equation on journal entries.
4601    fn phase_balance_validation(
4602        &mut self,
4603        entries: &[JournalEntry],
4604    ) -> SynthResult<BalanceValidationResult> {
4605        if self.phase_config.validate_balances && !entries.is_empty() {
4606            debug!("Phase 6: Validating Balances");
4607            let balance_validation = self.validate_journal_entries(entries)?;
4608            if balance_validation.is_balanced {
4609                debug!("Balance validation passed");
4610            } else {
4611                warn!(
4612                    "Balance validation found {} errors",
4613                    balance_validation.validation_errors.len()
4614                );
4615            }
4616            Ok(balance_validation)
4617        } else {
4618            Ok(BalanceValidationResult::default())
4619        }
4620    }
4621
4622    /// Validate that every `gl_account` referenced in `entries` exists in the
4623    /// chart of accounts.
4624    ///
4625    /// Always emits a warn-level log when the COA is missing accounts; in
4626    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4627    /// `SynthError::generation` so the caller can fail fast.
4628    fn validate_coa_coverage(
4629        &self,
4630        entries: &[JournalEntry],
4631        coa: &ChartOfAccounts,
4632    ) -> SynthResult<()> {
4633        if entries.is_empty() {
4634            return Ok(());
4635        }
4636        let coa_set: std::collections::HashSet<&str> = coa
4637            .accounts
4638            .iter()
4639            .map(|a| a.account_number.as_str())
4640            .collect();
4641        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4642        for je in entries {
4643            for line in je.lines.iter() {
4644                if !coa_set.contains(line.gl_account.as_str()) {
4645                    missing.insert(line.gl_account.clone());
4646                }
4647            }
4648        }
4649        if missing.is_empty() {
4650            debug!("COA coverage validation passed");
4651            return Ok(());
4652        }
4653        let msg = format!(
4654            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4655            missing.len(),
4656            missing.iter().take(10).collect::<Vec<_>>()
4657        );
4658        if self.phase_config.validate_coa_coverage_strict {
4659            Err(SynthError::generation(msg))
4660        } else {
4661            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4662            Ok(())
4663        }
4664    }
4665
4666    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4667    fn phase_data_quality_injection(
4668        &mut self,
4669        entries: &mut [JournalEntry],
4670        actions: &DegradationActions,
4671        stats: &mut EnhancedGenerationStatistics,
4672    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4673        if self.phase_config.inject_data_quality
4674            && !entries.is_empty()
4675            && !actions.skip_data_quality
4676        {
4677            info!("Phase 7: Injecting Data Quality Variations");
4678            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4679            stats.data_quality_issues = dq_stats.records_with_issues;
4680            info!("Injected {} data quality issues", stats.data_quality_issues);
4681            self.check_resources_with_log("post-data-quality")?;
4682            Ok((dq_stats, quality_issues))
4683        } else if actions.skip_data_quality {
4684            warn!("Phase 7: Skipped due to resource degradation");
4685            // v4.4.1: report the denominator (entries seen) even when
4686            // injection is skipped, so downstream consumers can tell
4687            // "skipped, 0/N" apart from "ran but found nothing".
4688            Ok((stats_with_denominator(entries.len()), Vec::new()))
4689        } else {
4690            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4691            Ok((stats_with_denominator(entries.len()), Vec::new()))
4692        }
4693    }
4694
4695    /// Phase 10b: Generate period-close journal entries.
4696    ///
4697    /// Generates:
4698    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4699    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4700    ///    for the configured period.
4701    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4702    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4703    ///    earnings via the Income Summary (3600) clearing account.
4704    fn phase_period_close(
4705        &mut self,
4706        entries: &mut Vec<JournalEntry>,
4707        subledger: &SubledgerSnapshot,
4708        stats: &mut EnhancedGenerationStatistics,
4709    ) -> SynthResult<()> {
4710        if !self.phase_config.generate_period_close || entries.is_empty() {
4711            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4712            return Ok(());
4713        }
4714
4715        info!("Phase 10b: Generating period-close journal entries");
4716
4717        use datasynth_core::accounts::{
4718            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4719        };
4720        use rust_decimal::Decimal;
4721
4722        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4723            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4724        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4725        // Posting date for close entries is the last day of the period
4726        let close_date = end_date - chrono::Days::new(1);
4727
4728        // Statutory tax rate (21% — configurable rates come in later tiers)
4729        let tax_rate = Decimal::new(21, 2); // 0.21
4730
4731        // Collect company codes from config
4732        let company_codes: Vec<String> = self
4733            .config
4734            .companies
4735            .iter()
4736            .map(|c| c.code.clone())
4737            .collect();
4738
4739        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4740        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4741        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4742
4743        // --- Depreciation JEs (per asset) ---
4744        // Compute period depreciation for each active fixed asset using straight-line method.
4745        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4746        let period_months = self.config.global.period_months;
4747        for asset in &subledger.fa_records {
4748            // Skip assets that are inactive / fully depreciated / non-depreciable
4749            use datasynth_core::models::subledger::fa::AssetStatus;
4750            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4751                continue;
4752            }
4753            let useful_life_months = asset.useful_life_months();
4754            if useful_life_months == 0 {
4755                // Land or CIP — not depreciated
4756                continue;
4757            }
4758            let salvage_value = asset.salvage_value();
4759            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4760            if depreciable_base == Decimal::ZERO {
4761                continue;
4762            }
4763            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4764                * Decimal::from(period_months))
4765            .round_dp(2);
4766            if period_depr <= Decimal::ZERO {
4767                continue;
4768            }
4769
4770            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4771            depr_header.document_type = "CL".to_string();
4772            depr_header.header_text = Some(format!(
4773                "Depreciation - {} {}",
4774                asset.asset_number, asset.description
4775            ));
4776            depr_header.created_by = "CLOSE_ENGINE".to_string();
4777            depr_header.source = TransactionSource::Automated;
4778            depr_header.business_process = Some(BusinessProcess::R2R);
4779
4780            let doc_id = depr_header.document_id;
4781            let mut depr_je = JournalEntry::new(depr_header);
4782
4783            // DR Depreciation Expense (6000)
4784            depr_je.add_line(JournalEntryLine::debit(
4785                doc_id,
4786                1,
4787                expense_accounts::DEPRECIATION.to_string(),
4788                period_depr,
4789            ));
4790            // CR Accumulated Depreciation (1510)
4791            depr_je.add_line(JournalEntryLine::credit(
4792                doc_id,
4793                2,
4794                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4795                period_depr,
4796            ));
4797
4798            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4799            close_jes.push(depr_je);
4800        }
4801
4802        if !subledger.fa_records.is_empty() {
4803            debug!(
4804                "Generated {} depreciation JEs from {} FA records",
4805                close_jes.len(),
4806                subledger.fa_records.len()
4807            );
4808        }
4809
4810        // --- Accrual entries (standard period-end accruals per company) ---
4811        // Generate standard accrued expense entries (utilities, rent, interest) using
4812        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4813        {
4814            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4815            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4816            // v3.4.3: snap reversal dates to business days. No-op when
4817            // temporal_patterns.business_days is disabled.
4818            if let Some(ctx) = &self.temporal_context {
4819                accrual_gen.set_temporal_context(Arc::clone(ctx));
4820            }
4821
4822            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4823            let accrual_items: &[(&str, &str, &str)] = &[
4824                ("Accrued Utilities", "6200", "2100"),
4825                ("Accrued Rent", "6300", "2100"),
4826                ("Accrued Interest", "6100", "2150"),
4827            ];
4828
4829            for company_code in &company_codes {
4830                // Estimate company revenue from existing JEs
4831                let company_revenue: Decimal = entries
4832                    .iter()
4833                    .filter(|e| e.header.company_code == *company_code)
4834                    .flat_map(|e| e.lines.iter())
4835                    .filter(|l| l.gl_account.starts_with('4'))
4836                    .map(|l| l.credit_amount - l.debit_amount)
4837                    .fold(Decimal::ZERO, |acc, v| acc + v);
4838
4839                if company_revenue <= Decimal::ZERO {
4840                    continue;
4841                }
4842
4843                // Use 0.5% of period revenue per accrual item as a proxy
4844                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4845                if accrual_base <= Decimal::ZERO {
4846                    continue;
4847                }
4848
4849                for (description, expense_acct, liability_acct) in accrual_items {
4850                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4851                        company_code,
4852                        description,
4853                        accrual_base,
4854                        expense_acct,
4855                        liability_acct,
4856                        close_date,
4857                        None,
4858                    );
4859                    close_jes.push(accrual_je);
4860                    if let Some(rev_je) = reversal_je {
4861                        close_jes.push(rev_je);
4862                    }
4863                }
4864            }
4865
4866            debug!(
4867                "Generated accrual entries for {} companies",
4868                company_codes.len()
4869            );
4870        }
4871
4872        for company_code in &company_codes {
4873            // Calculate net income for this company from existing JEs:
4874            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4875            // Revenue (4xxx): credit-normal, so net = credits - debits
4876            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4877            let mut total_revenue = Decimal::ZERO;
4878            let mut total_expenses = Decimal::ZERO;
4879
4880            for entry in entries.iter() {
4881                if entry.header.company_code != *company_code {
4882                    continue;
4883                }
4884                for line in &entry.lines {
4885                    let category = AccountCategory::from_account(&line.gl_account);
4886                    match category {
4887                        AccountCategory::Revenue => {
4888                            // Revenue is credit-normal: net revenue = credits - debits
4889                            total_revenue += line.credit_amount - line.debit_amount;
4890                        }
4891                        AccountCategory::Cogs
4892                        | AccountCategory::OperatingExpense
4893                        | AccountCategory::OtherIncomeExpense
4894                        | AccountCategory::Tax => {
4895                            // Expenses are debit-normal: net expense = debits - credits
4896                            total_expenses += line.debit_amount - line.credit_amount;
4897                        }
4898                        _ => {}
4899                    }
4900                }
4901            }
4902
4903            let pre_tax_income = total_revenue - total_expenses;
4904
4905            // Skip if no income statement activity
4906            if pre_tax_income == Decimal::ZERO {
4907                debug!(
4908                    "Company {}: no pre-tax income, skipping period close",
4909                    company_code
4910                );
4911                continue;
4912            }
4913
4914            // --- Tax provision / DTA JE ---
4915            if pre_tax_income > Decimal::ZERO {
4916                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4917                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4918
4919                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4920                tax_header.document_type = "CL".to_string();
4921                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4922                tax_header.created_by = "CLOSE_ENGINE".to_string();
4923                tax_header.source = TransactionSource::Automated;
4924                tax_header.business_process = Some(BusinessProcess::R2R);
4925
4926                let doc_id = tax_header.document_id;
4927                let mut tax_je = JournalEntry::new(tax_header);
4928
4929                // DR Tax Expense (8000)
4930                tax_je.add_line(JournalEntryLine::debit(
4931                    doc_id,
4932                    1,
4933                    tax_accounts::TAX_EXPENSE.to_string(),
4934                    tax_amount,
4935                ));
4936                // CR Income Tax Payable (2130)
4937                tax_je.add_line(JournalEntryLine::credit(
4938                    doc_id,
4939                    2,
4940                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4941                    tax_amount,
4942                ));
4943
4944                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4945                close_jes.push(tax_je);
4946            } else {
4947                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4948                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4949                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4950                if dta_amount > Decimal::ZERO {
4951                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4952                    dta_header.document_type = "CL".to_string();
4953                    dta_header.header_text =
4954                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4955                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4956                    dta_header.source = TransactionSource::Automated;
4957                    dta_header.business_process = Some(BusinessProcess::R2R);
4958
4959                    let doc_id = dta_header.document_id;
4960                    let mut dta_je = JournalEntry::new(dta_header);
4961
4962                    // DR Deferred Tax Asset (1600)
4963                    dta_je.add_line(JournalEntryLine::debit(
4964                        doc_id,
4965                        1,
4966                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4967                        dta_amount,
4968                    ));
4969                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4970                    // reflecting the benefit of the future deductible temporary difference.
4971                    dta_je.add_line(JournalEntryLine::credit(
4972                        doc_id,
4973                        2,
4974                        tax_accounts::TAX_EXPENSE.to_string(),
4975                        dta_amount,
4976                    ));
4977
4978                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4979                    close_jes.push(dta_je);
4980                    debug!(
4981                        "Company {}: loss year — recognised DTA of {}",
4982                        company_code, dta_amount
4983                    );
4984                }
4985            }
4986
4987            // --- Dividend JEs (v2.4) ---
4988            // If the entity is profitable after tax, declare a 10% dividend payout.
4989            // This runs AFTER tax provision so the dividend is based on post-tax income
4990            // but BEFORE the retained earnings close so the RE transfer reflects the
4991            // reduced balance.
4992            let tax_provision = if pre_tax_income > Decimal::ZERO {
4993                (pre_tax_income * tax_rate).round_dp(2)
4994            } else {
4995                Decimal::ZERO
4996            };
4997            let net_income = pre_tax_income - tax_provision;
4998
4999            if net_income > Decimal::ZERO {
5000                use datasynth_generators::DividendGenerator;
5001                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
5002                let mut div_gen = DividendGenerator::new(self.seed + 460);
5003                let currency_str = self
5004                    .config
5005                    .companies
5006                    .iter()
5007                    .find(|c| c.code == *company_code)
5008                    .map(|c| c.currency.as_str())
5009                    .unwrap_or("USD");
5010                let div_result = div_gen.generate(
5011                    company_code,
5012                    close_date,
5013                    Decimal::new(1, 0), // $1 per share placeholder
5014                    dividend_amount,
5015                    currency_str,
5016                );
5017                let div_je_count = div_result.journal_entries.len();
5018                close_jes.extend(div_result.journal_entries);
5019                debug!(
5020                    "Company {}: declared dividend of {} ({} JEs)",
5021                    company_code, dividend_amount, div_je_count
5022                );
5023            }
5024
5025            // --- Income statement closing JE ---
5026            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
5027            // For a loss year the DTA JE above already recognises the deferred benefit; here we
5028            // close the pre-tax loss into Retained Earnings as-is.
5029            if net_income != Decimal::ZERO {
5030                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5031                close_header.document_type = "CL".to_string();
5032                close_header.header_text =
5033                    Some(format!("Income statement close - {}", company_code));
5034                close_header.created_by = "CLOSE_ENGINE".to_string();
5035                close_header.source = TransactionSource::Automated;
5036                close_header.business_process = Some(BusinessProcess::R2R);
5037
5038                let doc_id = close_header.document_id;
5039                let mut close_je = JournalEntry::new(close_header);
5040
5041                let abs_net_income = net_income.abs();
5042
5043                if net_income > Decimal::ZERO {
5044                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
5045                    close_je.add_line(JournalEntryLine::debit(
5046                        doc_id,
5047                        1,
5048                        equity_accounts::INCOME_SUMMARY.to_string(),
5049                        abs_net_income,
5050                    ));
5051                    close_je.add_line(JournalEntryLine::credit(
5052                        doc_id,
5053                        2,
5054                        equity_accounts::RETAINED_EARNINGS.to_string(),
5055                        abs_net_income,
5056                    ));
5057                } else {
5058                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
5059                    close_je.add_line(JournalEntryLine::debit(
5060                        doc_id,
5061                        1,
5062                        equity_accounts::RETAINED_EARNINGS.to_string(),
5063                        abs_net_income,
5064                    ));
5065                    close_je.add_line(JournalEntryLine::credit(
5066                        doc_id,
5067                        2,
5068                        equity_accounts::INCOME_SUMMARY.to_string(),
5069                        abs_net_income,
5070                    ));
5071                }
5072
5073                debug_assert!(
5074                    close_je.is_balanced(),
5075                    "Income statement closing JE must be balanced"
5076                );
5077                close_jes.push(close_je);
5078            }
5079        }
5080
5081        let close_count = close_jes.len();
5082        if close_count > 0 {
5083            info!("Generated {} period-close journal entries", close_count);
5084            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5085            entries.extend(close_jes);
5086            stats.period_close_je_count = close_count;
5087
5088            // Update total entry/line-item stats
5089            stats.total_entries = entries.len() as u64;
5090            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5091        } else {
5092            debug!("No period-close entries generated (no income statement activity)");
5093        }
5094
5095        Ok(())
5096    }
5097
5098    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
5099    fn phase_audit_data(
5100        &mut self,
5101        entries: &[JournalEntry],
5102        stats: &mut EnhancedGenerationStatistics,
5103    ) -> SynthResult<AuditSnapshot> {
5104        if self.phase_config.generate_audit {
5105            info!("Phase 8: Generating Audit Data");
5106            let audit_snapshot = self.generate_audit_data(entries)?;
5107            stats.audit_engagement_count = audit_snapshot.engagements.len();
5108            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5109            stats.audit_evidence_count = audit_snapshot.evidence.len();
5110            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5111            stats.audit_finding_count = audit_snapshot.findings.len();
5112            stats.audit_judgment_count = audit_snapshot.judgments.len();
5113            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5114            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5115            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5116            stats.audit_sample_count = audit_snapshot.samples.len();
5117            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5118            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5119            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5120            stats.audit_related_party_count = audit_snapshot.related_parties.len();
5121            stats.audit_related_party_transaction_count =
5122                audit_snapshot.related_party_transactions.len();
5123            info!(
5124                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5125                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5126                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5127                 {} RP transactions",
5128                stats.audit_engagement_count,
5129                stats.audit_workpaper_count,
5130                stats.audit_evidence_count,
5131                stats.audit_risk_count,
5132                stats.audit_finding_count,
5133                stats.audit_judgment_count,
5134                stats.audit_confirmation_count,
5135                stats.audit_procedure_step_count,
5136                stats.audit_sample_count,
5137                stats.audit_analytical_result_count,
5138                stats.audit_ia_function_count,
5139                stats.audit_ia_report_count,
5140                stats.audit_related_party_count,
5141                stats.audit_related_party_transaction_count,
5142            );
5143            self.check_resources_with_log("post-audit")?;
5144            Ok(audit_snapshot)
5145        } else {
5146            debug!("Phase 8: Skipped (audit generation disabled)");
5147            Ok(AuditSnapshot::default())
5148        }
5149    }
5150
5151    /// Phase 9: Generate banking KYC/AML data.
5152    fn phase_banking_data(
5153        &mut self,
5154        stats: &mut EnhancedGenerationStatistics,
5155    ) -> SynthResult<BankingSnapshot> {
5156        if self.phase_config.generate_banking {
5157            info!("Phase 9: Generating Banking KYC/AML Data");
5158            let banking_snapshot = self.generate_banking_data()?;
5159            stats.banking_customer_count = banking_snapshot.customers.len();
5160            stats.banking_account_count = banking_snapshot.accounts.len();
5161            stats.banking_transaction_count = banking_snapshot.transactions.len();
5162            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5163            info!(
5164                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5165                stats.banking_customer_count, stats.banking_account_count,
5166                stats.banking_transaction_count, stats.banking_suspicious_count
5167            );
5168            self.check_resources_with_log("post-banking")?;
5169            Ok(banking_snapshot)
5170        } else {
5171            debug!("Phase 9: Skipped (banking generation disabled)");
5172            Ok(BankingSnapshot::default())
5173        }
5174    }
5175
5176    /// Phase 10: Export accounting network graphs for ML training.
5177    fn phase_graph_export(
5178        &mut self,
5179        entries: &[JournalEntry],
5180        coa: &Arc<ChartOfAccounts>,
5181        stats: &mut EnhancedGenerationStatistics,
5182    ) -> SynthResult<GraphExportSnapshot> {
5183        if self.phase_config.generate_graph_export && !entries.is_empty() {
5184            info!("Phase 10: Exporting Accounting Network Graphs");
5185            match self.export_graphs(entries, coa, stats) {
5186                Ok(snapshot) => {
5187                    info!(
5188                        "Graph export complete: {} graphs ({} nodes, {} edges)",
5189                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5190                    );
5191                    Ok(snapshot)
5192                }
5193                Err(e) => {
5194                    warn!("Phase 10: Graph export failed: {}", e);
5195                    Ok(GraphExportSnapshot::default())
5196                }
5197            }
5198        } else {
5199            debug!("Phase 10: Skipped (graph export disabled or no entries)");
5200            Ok(GraphExportSnapshot::default())
5201        }
5202    }
5203
5204    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
5205    #[allow(clippy::too_many_arguments)]
5206    fn phase_hypergraph_export(
5207        &self,
5208        coa: &Arc<ChartOfAccounts>,
5209        entries: &[JournalEntry],
5210        document_flows: &DocumentFlowSnapshot,
5211        sourcing: &SourcingSnapshot,
5212        hr: &HrSnapshot,
5213        manufacturing: &ManufacturingSnapshot,
5214        banking: &BankingSnapshot,
5215        audit: &AuditSnapshot,
5216        financial_reporting: &FinancialReportingSnapshot,
5217        ocpm: &OcpmSnapshot,
5218        compliance: &ComplianceRegulationsSnapshot,
5219        stats: &mut EnhancedGenerationStatistics,
5220    ) -> SynthResult<()> {
5221        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5222            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5223            match self.export_hypergraph(
5224                coa,
5225                entries,
5226                document_flows,
5227                sourcing,
5228                hr,
5229                manufacturing,
5230                banking,
5231                audit,
5232                financial_reporting,
5233                ocpm,
5234                compliance,
5235                stats,
5236            ) {
5237                Ok(info) => {
5238                    info!(
5239                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5240                        info.node_count, info.edge_count, info.hyperedge_count
5241                    );
5242                }
5243                Err(e) => {
5244                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5245                }
5246            }
5247        } else {
5248            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5249        }
5250        Ok(())
5251    }
5252
5253    /// Phase 11: LLM Enrichment.
5254    ///
5255    /// Uses an LLM provider (mock by default) to enrich vendor names with
5256    /// realistic, context-aware names. This phase is non-blocking: failures
5257    /// log a warning but do not stop the generation pipeline.
5258    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5259        if !self.config.llm.enabled {
5260            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5261            return;
5262        }
5263
5264        info!("Phase 11: Starting LLM Enrichment");
5265        let start = std::time::Instant::now();
5266
5267        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5268            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5269            // and the corresponding API key environment variable is present.
5270            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5271                let schema_provider = &self.config.llm.provider;
5272                let api_key_env = match schema_provider.as_str() {
5273                    "openai" => Some("OPENAI_API_KEY"),
5274                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5275                    "custom" => Some("LLM_API_KEY"),
5276                    _ => None,
5277                };
5278                if let Some(key_env) = api_key_env {
5279                    if std::env::var(key_env).is_ok() {
5280                        let llm_config = datasynth_core::llm::LlmConfig {
5281                            model: self.config.llm.model.clone(),
5282                            api_key_env: key_env.to_string(),
5283                            ..datasynth_core::llm::LlmConfig::default()
5284                        };
5285                        match HttpLlmProvider::new(llm_config) {
5286                            Ok(p) => Arc::new(p),
5287                            Err(e) => {
5288                                warn!(
5289                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5290                                    e
5291                                );
5292                                Arc::new(MockLlmProvider::new(self.seed))
5293                            }
5294                        }
5295                    } else {
5296                        Arc::new(MockLlmProvider::new(self.seed))
5297                    }
5298                } else {
5299                    Arc::new(MockLlmProvider::new(self.seed))
5300                }
5301            };
5302            // v4.1.1+: multi-category enrichment. Vendors remain the
5303            // default path; customers and materials opt in via
5304            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5305            let industry = format!("{:?}", self.config.global.industry);
5306
5307            let vendor_enricher =
5308                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5309            let max_vendors = self
5310                .config
5311                .llm
5312                .max_vendor_enrichments
5313                .min(self.master_data.vendors.len());
5314            let mut vendors_enriched = 0usize;
5315            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5316                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5317                    Ok(name) => {
5318                        vendor.name = name;
5319                        vendors_enriched += 1;
5320                    }
5321                    Err(e) => warn!(
5322                        "LLM vendor enrichment failed for {}: {}",
5323                        vendor.vendor_id, e
5324                    ),
5325                }
5326            }
5327
5328            let mut customers_enriched = 0usize;
5329            if self.config.llm.enrich_customers {
5330                let customer_enricher =
5331                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5332                        &provider,
5333                    ));
5334                let max_customers = self
5335                    .config
5336                    .llm
5337                    .max_customer_enrichments
5338                    .min(self.master_data.customers.len());
5339                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5340                    match customer_enricher.enrich_customer_name(
5341                        &industry,
5342                        "general",
5343                        &customer.country,
5344                    ) {
5345                        Ok(name) => {
5346                            customer.name = name;
5347                            customers_enriched += 1;
5348                        }
5349                        Err(e) => warn!(
5350                            "LLM customer enrichment failed for {}: {}",
5351                            customer.customer_id, e
5352                        ),
5353                    }
5354                }
5355            }
5356
5357            let mut materials_enriched = 0usize;
5358            if self.config.llm.enrich_materials {
5359                let material_enricher =
5360                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5361                        &provider,
5362                    ));
5363                let max_materials = self
5364                    .config
5365                    .llm
5366                    .max_material_enrichments
5367                    .min(self.master_data.materials.len());
5368                for material in self.master_data.materials.iter_mut().take(max_materials) {
5369                    let material_type = format!("{:?}", material.material_type);
5370                    match material_enricher.enrich_material_description(&material_type, &industry) {
5371                        Ok(desc) => {
5372                            material.description = desc;
5373                            materials_enriched += 1;
5374                        }
5375                        Err(e) => warn!(
5376                            "LLM material enrichment failed for {}: {}",
5377                            material.material_id, e
5378                        ),
5379                    }
5380                }
5381            }
5382
5383            (vendors_enriched, customers_enriched, materials_enriched)
5384        }));
5385
5386        match result {
5387            Ok((v, c, m)) => {
5388                stats.llm_vendors_enriched = v;
5389                stats.llm_customers_enriched = c;
5390                stats.llm_materials_enriched = m;
5391                let elapsed = start.elapsed();
5392                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5393                info!(
5394                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5395                    v, c, m, stats.llm_enrichment_ms
5396                );
5397            }
5398            Err(_) => {
5399                let elapsed = start.elapsed();
5400                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5401                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5402            }
5403        }
5404    }
5405
5406    /// Phase 12: Diffusion Enhancement.
5407    ///
5408    /// Generates a sample set matching distribution properties from the
5409    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5410    /// - `"statistical"` (default) — moment-matching backend, always fast.
5411    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5412    ///   the `neural` Cargo feature; falls back to statistical when the
5413    ///   feature isn't compiled in, with a loud warning.
5414    ///
5415    /// This phase is non-blocking: failures log a warning but do not
5416    /// stop the pipeline.
5417    fn phase_diffusion_enhancement(
5418        &self,
5419        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5420        stats: &mut EnhancedGenerationStatistics,
5421    ) {
5422        if !self.config.diffusion.enabled {
5423            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5424            return;
5425        }
5426
5427        info!("Phase 12: Starting Diffusion Enhancement");
5428        let start = std::time::Instant::now();
5429
5430        let backend_choice = self.config.diffusion.backend.as_str();
5431        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5432
5433        if use_neural {
5434            #[cfg(feature = "neural")]
5435            {
5436                match self.run_neural_diffusion_phase(entries) {
5437                    Ok(sample_count) => {
5438                        stats.diffusion_samples_generated = sample_count;
5439                        let elapsed = start.elapsed();
5440                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5441                        info!(
5442                            "Phase 12 complete ({}): {} samples in {}ms",
5443                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5444                        );
5445                        return;
5446                    }
5447                    Err(e) => {
5448                        warn!(
5449                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5450                        );
5451                        // Fall through to statistical path below.
5452                    }
5453                }
5454            }
5455            #[cfg(not(feature = "neural"))]
5456            {
5457                warn!(
5458                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5459                     not compiled in — falling back to statistical. Rebuild with \
5460                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5461                    backend_choice
5462                );
5463            }
5464        } else if !matches!(backend_choice, "statistical" | "") {
5465            warn!(
5466                "Phase 12: unknown backend '{}', falling back to statistical",
5467                backend_choice
5468            );
5469        }
5470
5471        // Statistical path (default + fallback).
5472        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5473            let means = vec![5000.0, 3.0, 2.0];
5474            let stds = vec![2000.0, 1.5, 1.0];
5475
5476            let diffusion_config = DiffusionConfig {
5477                n_steps: self.config.diffusion.n_steps,
5478                seed: self.seed,
5479                ..Default::default()
5480            };
5481
5482            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5483            let n_samples = self.config.diffusion.sample_size;
5484            let n_features = 3;
5485            backend.generate(n_samples, n_features, self.seed).len()
5486        }));
5487
5488        match result {
5489            Ok(sample_count) => {
5490                stats.diffusion_samples_generated = sample_count;
5491                let elapsed = start.elapsed();
5492                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5493                info!(
5494                    "Phase 12 complete (statistical): {} samples in {}ms",
5495                    sample_count, stats.diffusion_enhancement_ms
5496                );
5497            }
5498            Err(_) => {
5499                let elapsed = start.elapsed();
5500                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5501                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5502            }
5503        }
5504    }
5505
5506    /// Neural-backend execution — either load a pre-trained checkpoint
5507    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5508    /// from the first batch of JE amounts. Returns the sample count
5509    /// produced; any error bubbles up to the statistical fallback.
5510    #[cfg(feature = "neural")]
5511    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5512        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5513
5514        if entries.is_empty() {
5515            return Err(SynthError::generation(
5516                "neural diffusion: no journal entries available as training data",
5517            ));
5518        }
5519
5520        let training_data: Vec<Vec<f64>> = entries
5521            .iter()
5522            .take(5000)
5523            .map(|je| {
5524                let total_amount: f64 = je
5525                    .lines
5526                    .iter()
5527                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5528                    .map(|l| {
5529                        use rust_decimal::prelude::ToPrimitive;
5530                        l.debit_amount.to_f64().unwrap_or(0.0)
5531                    })
5532                    .sum();
5533                let line_count = je.lines.len() as f64;
5534                // Use the approval-workflow depth as the third feature
5535                // (proxy for complexity / risk). `None` → 1.
5536                let approval_level = je
5537                    .header
5538                    .approval_workflow
5539                    .as_ref()
5540                    .map(|w| w.required_levels as f64)
5541                    .unwrap_or(1.0);
5542                vec![total_amount, line_count, approval_level]
5543            })
5544            .collect();
5545
5546        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5547
5548        let cfg = &self.config.diffusion;
5549        let neural_cfg = &cfg.neural;
5550
5551        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5552            neural_cfg.checkpoint_path.as_ref()
5553        {
5554            let path = std::path::Path::new(ckpt_path);
5555            info!(
5556                "  Neural diffusion: loading checkpoint from {}",
5557                path.display()
5558            );
5559            NeuralDiffusionBackend::load(path)
5560                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5561        } else {
5562            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5563            info!(
5564                "  Neural diffusion: training score network on {} rows × {} features, \
5565                     {} epochs, hidden_dims={:?}",
5566                training_data.len(),
5567                n_features,
5568                neural_cfg.training_epochs,
5569                neural_cfg.hidden_dims
5570            );
5571            let training_config = NeuralTrainingConfig {
5572                n_steps: cfg.n_steps,
5573                schedule: cfg.schedule.clone(),
5574                hidden_dims: neural_cfg.hidden_dims.clone(),
5575                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5576                learning_rate: neural_cfg.learning_rate,
5577                epochs: neural_cfg.training_epochs,
5578                batch_size: neural_cfg.batch_size,
5579            };
5580            let (backend, report) =
5581                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5582                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5583            info!(
5584                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5585                report.epochs_completed, report.final_loss
5586            );
5587            backend
5588        };
5589
5590        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5591        Ok(samples.len())
5592    }
5593
5594    /// Phase 13: Causal Overlay.
5595    ///
5596    /// Builds a structural causal model from a built-in template (e.g.,
5597    /// fraud_detection) and generates causal samples. Optionally validates
5598    /// that the output respects the causal structure. This phase is
5599    /// non-blocking: failures log a warning but do not stop the pipeline.
5600    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5601        if !self.config.causal.enabled {
5602            debug!("Phase 13: Skipped (causal generation disabled)");
5603            return;
5604        }
5605
5606        info!("Phase 13: Starting Causal Overlay");
5607        let start = std::time::Instant::now();
5608
5609        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5610            // Select template based on config
5611            let graph = match self.config.causal.template.as_str() {
5612                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5613                _ => CausalGraph::fraud_detection_template(),
5614            };
5615
5616            let scm = StructuralCausalModel::new(graph.clone())
5617                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5618
5619            let n_samples = self.config.causal.sample_size;
5620            let samples = scm
5621                .generate(n_samples, self.seed)
5622                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5623
5624            // Optionally validate causal structure
5625            let validation_passed = if self.config.causal.validate {
5626                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5627                if report.valid {
5628                    info!(
5629                        "Causal validation passed: all {} checks OK",
5630                        report.checks.len()
5631                    );
5632                } else {
5633                    warn!(
5634                        "Causal validation: {} violations detected: {:?}",
5635                        report.violations.len(),
5636                        report.violations
5637                    );
5638                }
5639                Some(report.valid)
5640            } else {
5641                None
5642            };
5643
5644            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5645        }));
5646
5647        match result {
5648            Ok(Ok((sample_count, validation_passed))) => {
5649                stats.causal_samples_generated = sample_count;
5650                stats.causal_validation_passed = validation_passed;
5651                let elapsed = start.elapsed();
5652                stats.causal_generation_ms = elapsed.as_millis() as u64;
5653                info!(
5654                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5655                    sample_count, stats.causal_generation_ms, validation_passed,
5656                );
5657            }
5658            Ok(Err(e)) => {
5659                let elapsed = start.elapsed();
5660                stats.causal_generation_ms = elapsed.as_millis() as u64;
5661                warn!("Phase 13: Causal generation failed: {}", e);
5662            }
5663            Err(_) => {
5664                let elapsed = start.elapsed();
5665                stats.causal_generation_ms = elapsed.as_millis() as u64;
5666                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5667            }
5668        }
5669    }
5670
5671    /// Phase 14: Generate S2C sourcing data.
5672    fn phase_sourcing_data(
5673        &mut self,
5674        stats: &mut EnhancedGenerationStatistics,
5675    ) -> SynthResult<SourcingSnapshot> {
5676        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5677            debug!("Phase 14: Skipped (sourcing generation disabled)");
5678            return Ok(SourcingSnapshot::default());
5679        }
5680        let degradation = self.check_resources()?;
5681        if degradation >= DegradationLevel::Reduced {
5682            debug!(
5683                "Phase skipped due to resource pressure (degradation: {:?})",
5684                degradation
5685            );
5686            return Ok(SourcingSnapshot::default());
5687        }
5688
5689        info!("Phase 14: Generating S2C Sourcing Data");
5690        let seed = self.seed;
5691
5692        // Gather vendor data from master data
5693        let vendor_ids: Vec<String> = self
5694            .master_data
5695            .vendors
5696            .iter()
5697            .map(|v| v.vendor_id.clone())
5698            .collect();
5699        if vendor_ids.is_empty() {
5700            debug!("Phase 14: Skipped (no vendors available)");
5701            return Ok(SourcingSnapshot::default());
5702        }
5703
5704        let categories: Vec<(String, String)> = vec![
5705            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5706            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5707            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5708            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5709            ("CAT-LOG".to_string(), "Logistics".to_string()),
5710        ];
5711        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5712            .iter()
5713            .map(|(id, name)| {
5714                (
5715                    id.clone(),
5716                    name.clone(),
5717                    rust_decimal::Decimal::from(100_000),
5718                )
5719            })
5720            .collect();
5721
5722        let company_code = self
5723            .config
5724            .companies
5725            .first()
5726            .map(|c| c.code.as_str())
5727            .unwrap_or("1000");
5728        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5729            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5730        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5731        let fiscal_year = start_date.year() as u16;
5732        let owner_ids: Vec<String> = self
5733            .master_data
5734            .employees
5735            .iter()
5736            .take(5)
5737            .map(|e| e.employee_id.clone())
5738            .collect();
5739        let owner_id = owner_ids
5740            .first()
5741            .map(std::string::String::as_str)
5742            .unwrap_or("BUYER-001");
5743
5744        // Step 1: Spend Analysis
5745        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5746        let spend_analyses =
5747            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5748
5749        // Step 2: Sourcing Projects
5750        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5751        let sourcing_projects = if owner_ids.is_empty() {
5752            Vec::new()
5753        } else {
5754            project_gen.generate(
5755                company_code,
5756                &categories_with_spend,
5757                &owner_ids,
5758                start_date,
5759                self.config.global.period_months,
5760            )
5761        };
5762        stats.sourcing_project_count = sourcing_projects.len();
5763
5764        // Step 3: Qualifications
5765        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5766        let mut qual_gen = QualificationGenerator::new(seed + 2);
5767        let qualifications = qual_gen.generate(
5768            company_code,
5769            &qual_vendor_ids,
5770            sourcing_projects.first().map(|p| p.project_id.as_str()),
5771            owner_id,
5772            start_date,
5773        );
5774
5775        // Step 4: RFx Events
5776        let mut rfx_gen = RfxGenerator::new(seed + 3);
5777        let rfx_events: Vec<RfxEvent> = sourcing_projects
5778            .iter()
5779            .map(|proj| {
5780                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5781                rfx_gen.generate(
5782                    company_code,
5783                    &proj.project_id,
5784                    &proj.category_id,
5785                    &qualified_vids,
5786                    owner_id,
5787                    start_date,
5788                    50000.0,
5789                )
5790            })
5791            .collect();
5792        stats.rfx_event_count = rfx_events.len();
5793
5794        // Step 5: Bids
5795        let mut bid_gen = BidGenerator::new(seed + 4);
5796        let mut all_bids = Vec::new();
5797        for rfx in &rfx_events {
5798            let bidder_count = vendor_ids.len().clamp(2, 5);
5799            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5800            let bids = bid_gen.generate(rfx, &responding, start_date);
5801            all_bids.extend(bids);
5802        }
5803        stats.bid_count = all_bids.len();
5804
5805        // Step 6: Bid Evaluations
5806        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5807        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5808            .iter()
5809            .map(|rfx| {
5810                let rfx_bids: Vec<SupplierBid> = all_bids
5811                    .iter()
5812                    .filter(|b| b.rfx_id == rfx.rfx_id)
5813                    .cloned()
5814                    .collect();
5815                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5816            })
5817            .collect();
5818
5819        // Step 7: Contracts from winning bids
5820        let mut contract_gen = ContractGenerator::new(seed + 6);
5821        let contracts: Vec<ProcurementContract> = bid_evaluations
5822            .iter()
5823            .zip(rfx_events.iter())
5824            .filter_map(|(eval, rfx)| {
5825                eval.ranked_bids.first().and_then(|winner| {
5826                    all_bids
5827                        .iter()
5828                        .find(|b| b.bid_id == winner.bid_id)
5829                        .map(|winning_bid| {
5830                            contract_gen.generate_from_bid(
5831                                winning_bid,
5832                                Some(&rfx.sourcing_project_id),
5833                                &rfx.category_id,
5834                                owner_id,
5835                                start_date,
5836                            )
5837                        })
5838                })
5839            })
5840            .collect();
5841        stats.contract_count = contracts.len();
5842
5843        // Step 8: Catalog Items
5844        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5845        let catalog_items = catalog_gen.generate(&contracts);
5846        stats.catalog_item_count = catalog_items.len();
5847
5848        // Step 9: Scorecards
5849        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5850        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5851            .iter()
5852            .fold(
5853                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5854                |mut acc, c| {
5855                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5856                    acc
5857                },
5858            )
5859            .into_iter()
5860            .collect();
5861        let scorecards = scorecard_gen.generate(
5862            company_code,
5863            &vendor_contracts,
5864            start_date,
5865            end_date,
5866            owner_id,
5867        );
5868        stats.scorecard_count = scorecards.len();
5869
5870        // Back-populate cross-references on sourcing projects (Task 35)
5871        // Link each project to its RFx events, contracts, and spend analyses
5872        let mut sourcing_projects = sourcing_projects;
5873        for project in &mut sourcing_projects {
5874            // Link RFx events generated for this project
5875            project.rfx_ids = rfx_events
5876                .iter()
5877                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5878                .map(|rfx| rfx.rfx_id.clone())
5879                .collect();
5880
5881            // Link contract awarded from this project's RFx
5882            project.contract_id = contracts
5883                .iter()
5884                .find(|c| {
5885                    c.sourcing_project_id
5886                        .as_deref()
5887                        .is_some_and(|sp| sp == project.project_id)
5888                })
5889                .map(|c| c.contract_id.clone());
5890
5891            // Link spend analysis for matching category (use category_id as the reference)
5892            project.spend_analysis_id = spend_analyses
5893                .iter()
5894                .find(|sa| sa.category_id == project.category_id)
5895                .map(|sa| sa.category_id.clone());
5896        }
5897
5898        info!(
5899            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5900            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5901            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5902        );
5903        self.check_resources_with_log("post-sourcing")?;
5904
5905        Ok(SourcingSnapshot {
5906            spend_analyses,
5907            sourcing_projects,
5908            qualifications,
5909            rfx_events,
5910            bids: all_bids,
5911            bid_evaluations,
5912            contracts,
5913            catalog_items,
5914            scorecards,
5915        })
5916    }
5917
5918    /// Build a [`GroupStructure`] from the current company configuration.
5919    ///
5920    /// The first company in the configuration is treated as the ultimate parent.
5921    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5922    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5923    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5924        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5925
5926        let parent_code = self
5927            .config
5928            .companies
5929            .first()
5930            .map(|c| c.code.clone())
5931            .unwrap_or_else(|| "PARENT".to_string());
5932
5933        let mut group = GroupStructure::new(parent_code);
5934
5935        for company in self.config.companies.iter().skip(1) {
5936            let sub =
5937                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5938            group.add_subsidiary(sub);
5939        }
5940
5941        group
5942    }
5943
5944    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5945    fn phase_intercompany(
5946        &mut self,
5947        journal_entries: &[JournalEntry],
5948        stats: &mut EnhancedGenerationStatistics,
5949    ) -> SynthResult<IntercompanySnapshot> {
5950        // Skip if intercompany is disabled in config
5951        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5952            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5953            return Ok(IntercompanySnapshot::default());
5954        }
5955
5956        // Intercompany requires at least 2 companies
5957        if self.config.companies.len() < 2 {
5958            debug!(
5959                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5960                self.config.companies.len()
5961            );
5962            return Ok(IntercompanySnapshot::default());
5963        }
5964
5965        info!("Phase 14b: Generating Intercompany Transactions");
5966
5967        // Build the group structure early — used by ISA 600 component auditor scope
5968        // and consolidated financial statement generators downstream.
5969        let group_structure = self.build_group_structure();
5970        debug!(
5971            "Group structure built: parent={}, subsidiaries={}",
5972            group_structure.parent_entity,
5973            group_structure.subsidiaries.len()
5974        );
5975
5976        let seed = self.seed;
5977        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5980
5981        // Build ownership structure from company configs
5982        // First company is treated as the parent, remaining are subsidiaries
5983        let parent_code = self.config.companies[0].code.clone();
5984        let mut ownership_structure =
5985            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5986
5987        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5988            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5989                format!("REL{:03}", i + 1),
5990                parent_code.clone(),
5991                company.code.clone(),
5992                rust_decimal::Decimal::from(100), // Default 100% ownership
5993                start_date,
5994            );
5995            ownership_structure.add_relationship(relationship);
5996        }
5997
5998        // Convert config transfer pricing method to core model enum
5999        let tp_method = match self.config.intercompany.transfer_pricing_method {
6000            datasynth_config::schema::TransferPricingMethod::CostPlus => {
6001                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6002            }
6003            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6004                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6005            }
6006            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6007                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6008            }
6009            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6010                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6011            }
6012            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6013                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6014            }
6015        };
6016
6017        // Build IC generator config from schema config
6018        let ic_currency = self
6019            .config
6020            .companies
6021            .first()
6022            .map(|c| c.currency.clone())
6023            .unwrap_or_else(|| "USD".to_string());
6024        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6025            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6026            transfer_pricing_method: tp_method,
6027            markup_percent: rust_decimal::Decimal::from_f64_retain(
6028                self.config.intercompany.markup_percent,
6029            )
6030            .unwrap_or(rust_decimal::Decimal::from(5)),
6031            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6032            default_currency: ic_currency,
6033            ..Default::default()
6034        };
6035
6036        // Create IC generator
6037        let mut ic_generator = datasynth_generators::ICGenerator::new(
6038            ic_gen_config,
6039            ownership_structure.clone(),
6040            seed + 50,
6041        );
6042
6043        // Generate IC transactions for the period
6044        // Use ~3 transactions per day as a reasonable default
6045        let transactions_per_day = 3;
6046        let matched_pairs = ic_generator.generate_transactions_for_period(
6047            start_date,
6048            end_date,
6049            transactions_per_day,
6050        );
6051
6052        // Generate IC source P2P/O2C documents
6053        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6054        debug!(
6055            "Generated {} IC seller invoices, {} IC buyer POs",
6056            ic_doc_chains.seller_invoices.len(),
6057            ic_doc_chains.buyer_orders.len()
6058        );
6059
6060        // Generate journal entries from matched pairs
6061        let mut seller_entries = Vec::new();
6062        let mut buyer_entries = Vec::new();
6063        let fiscal_year = start_date.year();
6064
6065        for pair in &matched_pairs {
6066            let fiscal_period = pair.posting_date.month();
6067            let (seller_je, buyer_je) =
6068                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6069            seller_entries.push(seller_je);
6070            buyer_entries.push(buyer_je);
6071        }
6072
6073        // Run matching engine
6074        let matching_config = datasynth_generators::ICMatchingConfig {
6075            base_currency: self
6076                .config
6077                .companies
6078                .first()
6079                .map(|c| c.currency.clone())
6080                .unwrap_or_else(|| "USD".to_string()),
6081            ..Default::default()
6082        };
6083        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6084        matching_engine.load_matched_pairs(&matched_pairs);
6085        let matching_result = matching_engine.run_matching(end_date);
6086
6087        // Generate elimination entries if configured
6088        let mut elimination_entries = Vec::new();
6089        if self.config.intercompany.generate_eliminations {
6090            let elim_config = datasynth_generators::EliminationConfig {
6091                consolidation_entity: "GROUP".to_string(),
6092                base_currency: self
6093                    .config
6094                    .companies
6095                    .first()
6096                    .map(|c| c.currency.clone())
6097                    .unwrap_or_else(|| "USD".to_string()),
6098                ..Default::default()
6099            };
6100
6101            let mut elim_generator =
6102                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6103
6104            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6105            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6106                matching_result
6107                    .matched_balances
6108                    .iter()
6109                    .chain(matching_result.unmatched_balances.iter())
6110                    .cloned()
6111                    .collect();
6112
6113            // Build investment and equity maps from the group structure so that the
6114            // elimination generator can produce equity-investment elimination entries
6115            // (parent's investment in subsidiary vs. subsidiary's equity capital).
6116            //
6117            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
6118            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
6119            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
6120            //
6121            // Net assets are derived from the journal entries using account-range heuristics:
6122            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
6123            // no JE data is available (IC phase runs early in the generation pipeline).
6124            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6125                std::collections::HashMap::new();
6126            let mut equity_amounts: std::collections::HashMap<
6127                String,
6128                std::collections::HashMap<String, rust_decimal::Decimal>,
6129            > = std::collections::HashMap::new();
6130            {
6131                use rust_decimal::Decimal;
6132                let hundred = Decimal::from(100u32);
6133                let ten_pct = Decimal::new(10, 2); // 0.10
6134                let thirty_pct = Decimal::new(30, 2); // 0.30
6135                let sixty_pct = Decimal::new(60, 2); // 0.60
6136                let parent_code = &group_structure.parent_entity;
6137                for sub in &group_structure.subsidiaries {
6138                    let net_assets = {
6139                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6140                        if na > Decimal::ZERO {
6141                            na
6142                        } else {
6143                            Decimal::from(1_000_000u64)
6144                        }
6145                    };
6146                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
6147                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6148                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6149
6150                    // Split subsidiary equity into conventional components:
6151                    // 10 % share capital / 30 % APIC / 60 % retained earnings
6152                    let mut eq_map = std::collections::HashMap::new();
6153                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6154                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6155                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6156                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
6157                }
6158            }
6159
6160            let journal = elim_generator.generate_eliminations(
6161                &fiscal_period,
6162                end_date,
6163                &all_balances,
6164                &matched_pairs,
6165                &investment_amounts,
6166                &equity_amounts,
6167            );
6168
6169            elimination_entries = journal.entries.clone();
6170        }
6171
6172        let matched_pair_count = matched_pairs.len();
6173        let elimination_entry_count = elimination_entries.len();
6174        let match_rate = matching_result.match_rate;
6175
6176        stats.ic_matched_pair_count = matched_pair_count;
6177        stats.ic_elimination_count = elimination_entry_count;
6178        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6179
6180        info!(
6181            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6182            matched_pair_count,
6183            stats.ic_transaction_count,
6184            seller_entries.len(),
6185            buyer_entries.len(),
6186            elimination_entry_count,
6187            match_rate * 100.0
6188        );
6189        self.check_resources_with_log("post-intercompany")?;
6190
6191        // ----------------------------------------------------------------
6192        // NCI measurements: derive from group structure ownership percentages
6193        // ----------------------------------------------------------------
6194        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6195            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6196            use rust_decimal::Decimal;
6197
6198            let eight_pct = Decimal::new(8, 2); // 0.08
6199
6200            group_structure
6201                .subsidiaries
6202                .iter()
6203                .filter(|sub| {
6204                    sub.nci_percentage > Decimal::ZERO
6205                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6206                })
6207                .map(|sub| {
6208                    // Compute net assets from actual journal entries for this subsidiary.
6209                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
6210                    // IC phase runs before the main JE batch has been populated).
6211                    let net_assets_from_jes =
6212                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6213
6214                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
6215                        net_assets_from_jes.round_dp(2)
6216                    } else {
6217                        // Fallback: use a plausible base amount
6218                        Decimal::from(1_000_000u64)
6219                    };
6220
6221                    // Net income approximated as 8% of net assets
6222                    let net_income = (net_assets * eight_pct).round_dp(2);
6223
6224                    NciMeasurement::compute(
6225                        sub.entity_code.clone(),
6226                        sub.nci_percentage,
6227                        net_assets,
6228                        net_income,
6229                    )
6230                })
6231                .collect()
6232        };
6233
6234        if !nci_measurements.is_empty() {
6235            info!(
6236                "NCI measurements: {} subsidiaries with non-controlling interests",
6237                nci_measurements.len()
6238            );
6239        }
6240
6241        Ok(IntercompanySnapshot {
6242            group_structure: Some(group_structure),
6243            matched_pairs,
6244            seller_journal_entries: seller_entries,
6245            buyer_journal_entries: buyer_entries,
6246            elimination_entries,
6247            nci_measurements,
6248            ic_document_chains: Some(ic_doc_chains),
6249            matched_pair_count,
6250            elimination_entry_count,
6251            match_rate,
6252        })
6253    }
6254
6255    /// Phase 15: Generate bank reconciliations and financial statements.
6256    fn phase_financial_reporting(
6257        &mut self,
6258        document_flows: &DocumentFlowSnapshot,
6259        journal_entries: &[JournalEntry],
6260        coa: &Arc<ChartOfAccounts>,
6261        _hr: &HrSnapshot,
6262        _audit: &AuditSnapshot,
6263        stats: &mut EnhancedGenerationStatistics,
6264    ) -> SynthResult<FinancialReportingSnapshot> {
6265        let fs_enabled = self.phase_config.generate_financial_statements
6266            || self.config.financial_reporting.enabled;
6267        let br_enabled = self.phase_config.generate_bank_reconciliation;
6268
6269        if !fs_enabled && !br_enabled {
6270            debug!("Phase 15: Skipped (financial reporting disabled)");
6271            return Ok(FinancialReportingSnapshot::default());
6272        }
6273
6274        info!("Phase 15: Generating Financial Reporting Data");
6275
6276        let seed = self.seed;
6277        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6278            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6279
6280        let mut financial_statements = Vec::new();
6281        let mut bank_reconciliations = Vec::new();
6282        let mut trial_balances = Vec::new();
6283        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6284        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6285            Vec::new();
6286        // Standalone statements keyed by entity code
6287        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6288            std::collections::HashMap::new();
6289        // Consolidated statements (one per period)
6290        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6291        // Consolidation schedules (one per period)
6292        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6293
6294        // Generate financial statements from JE-derived trial balances.
6295        //
6296        // When journal entries are available, we use cumulative trial balances for
6297        // balance sheet accounts and current-period trial balances for income
6298        // statement accounts. We also track prior-period trial balances so the
6299        // generator can produce comparative amounts, and we build a proper
6300        // cash flow statement from working capital changes rather than random data.
6301        if fs_enabled {
6302            let has_journal_entries = !journal_entries.is_empty();
6303
6304            // Use FinancialStatementGenerator for balance sheet and income statement,
6305            // but build cash flow ourselves from TB data when JEs are available.
6306            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6307            // Separate generator for consolidated statements (different seed offset)
6308            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6309
6310            // Collect elimination JEs once (reused across periods)
6311            let elimination_entries: Vec<&JournalEntry> = journal_entries
6312                .iter()
6313                .filter(|je| je.header.is_elimination)
6314                .collect();
6315
6316            // Generate one set of statements per period, per entity
6317            for period in 0..self.config.global.period_months {
6318                let period_start = start_date + chrono::Months::new(period);
6319                let period_end =
6320                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6321                let fiscal_year = period_end.year() as u16;
6322                let fiscal_period = period_end.month() as u8;
6323                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6324
6325                // Build per-entity trial balances for this period (non-elimination JEs)
6326                // We accumulate them for the consolidation step.
6327                let mut entity_tb_map: std::collections::HashMap<
6328                    String,
6329                    std::collections::HashMap<String, rust_decimal::Decimal>,
6330                > = std::collections::HashMap::new();
6331
6332                // --- Standalone: one set of statements per company ---
6333                // v5.33: resolve once per phase. In single-shard / standalone
6334                // mode this is the primary country's framework; in group
6335                // mode each shard runs against its own entity (one company)
6336                // so the primary-country lookup is the entity's. Either way
6337                // the string drives framework-aware TB classification (Defect
6338                // A fix — German SKR / French PCG accounts no longer routed
6339                // through a US-only prefix table).
6340                let framework_str = self.resolve_framework_str();
6341                for (company_idx, company) in self.config.companies.iter().enumerate() {
6342                    let company_code = company.code.as_str();
6343                    let currency = company.currency.as_str();
6344                    // Use a unique seed offset per company to keep statements deterministic
6345                    // and distinct across companies
6346                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6347                    let mut company_fs_gen =
6348                        FinancialStatementGenerator::new(seed + company_seed_offset);
6349
6350                    if has_journal_entries {
6351                        let tb_entries = Self::build_cumulative_trial_balance(
6352                            journal_entries,
6353                            coa,
6354                            company_code,
6355                            start_date,
6356                            period_end,
6357                            fiscal_year,
6358                            fiscal_period,
6359                            framework_str,
6360                        );
6361
6362                        // Accumulate per-entity category balances for consolidation
6363                        let entity_cat_map =
6364                            entity_tb_map.entry(company_code.to_string()).or_default();
6365                        for tb_entry in &tb_entries {
6366                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6367                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6368                        }
6369
6370                        let stmts = company_fs_gen.generate(
6371                            company_code,
6372                            currency,
6373                            &tb_entries,
6374                            period_start,
6375                            period_end,
6376                            fiscal_year,
6377                            fiscal_period,
6378                            None,
6379                            "SYS-AUTOCLOSE",
6380                        );
6381
6382                        let mut entity_stmts = Vec::new();
6383                        for stmt in stmts {
6384                            if stmt.statement_type == StatementType::CashFlowStatement {
6385                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6386                                let cf_items = Self::build_cash_flow_from_trial_balances(
6387                                    &tb_entries,
6388                                    None,
6389                                    net_income,
6390                                );
6391                                entity_stmts.push(FinancialStatement {
6392                                    cash_flow_items: cf_items,
6393                                    ..stmt
6394                                });
6395                            } else {
6396                                entity_stmts.push(stmt);
6397                            }
6398                        }
6399
6400                        // Add to the flat financial_statements list (used by KPI/budget)
6401                        financial_statements.extend(entity_stmts.clone());
6402
6403                        // Store standalone per-entity
6404                        standalone_statements
6405                            .entry(company_code.to_string())
6406                            .or_default()
6407                            .extend(entity_stmts);
6408
6409                        // Only store trial balance for the first company in the period
6410                        // to avoid duplicates in the trial_balances list
6411                        if company_idx == 0 {
6412                            trial_balances.push(PeriodTrialBalance {
6413                                fiscal_year,
6414                                fiscal_period,
6415                                period_start,
6416                                period_end,
6417                                entries: tb_entries,
6418                                framework: framework_str.to_string(),
6419                            });
6420                        }
6421                    } else {
6422                        // Fallback: no JEs available
6423                        let tb_entries = Self::build_trial_balance_from_entries(
6424                            journal_entries,
6425                            coa,
6426                            company_code,
6427                            fiscal_year,
6428                            fiscal_period,
6429                            framework_str,
6430                        );
6431
6432                        let stmts = company_fs_gen.generate(
6433                            company_code,
6434                            currency,
6435                            &tb_entries,
6436                            period_start,
6437                            period_end,
6438                            fiscal_year,
6439                            fiscal_period,
6440                            None,
6441                            "SYS-AUTOCLOSE",
6442                        );
6443                        financial_statements.extend(stmts.clone());
6444                        standalone_statements
6445                            .entry(company_code.to_string())
6446                            .or_default()
6447                            .extend(stmts);
6448
6449                        if company_idx == 0 && !tb_entries.is_empty() {
6450                            trial_balances.push(PeriodTrialBalance {
6451                                fiscal_year,
6452                                fiscal_period,
6453                                period_start,
6454                                period_end,
6455                                entries: tb_entries,
6456                                framework: framework_str.to_string(),
6457                            });
6458                        }
6459                    }
6460                }
6461
6462                // --- Consolidated: aggregate all entities + apply eliminations ---
6463                // Use the primary (first) company's currency for the consolidated statement
6464                let group_currency = self
6465                    .config
6466                    .companies
6467                    .first()
6468                    .map(|c| c.currency.as_str())
6469                    .unwrap_or("USD");
6470
6471                // Build owned elimination entries for this period
6472                let period_eliminations: Vec<JournalEntry> = elimination_entries
6473                    .iter()
6474                    .filter(|je| {
6475                        je.header.fiscal_year == fiscal_year
6476                            && je.header.fiscal_period == fiscal_period
6477                    })
6478                    .map(|je| (*je).clone())
6479                    .collect();
6480
6481                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6482                    &entity_tb_map,
6483                    &period_eliminations,
6484                    &period_label,
6485                );
6486
6487                // Build a pseudo trial balance from consolidated line items for the
6488                // FinancialStatementGenerator to use (only for cash flow direction).
6489                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6490                    .line_items
6491                    .iter()
6492                    .map(|li| {
6493                        let net = li.post_elimination_total;
6494                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6495                            (net, rust_decimal::Decimal::ZERO)
6496                        } else {
6497                            (rust_decimal::Decimal::ZERO, -net)
6498                        };
6499                        datasynth_generators::TrialBalanceEntry {
6500                            account_code: li.account_category.clone(),
6501                            account_name: li.account_category.clone(),
6502                            category: li.account_category.clone(),
6503                            debit_balance: debit,
6504                            credit_balance: credit,
6505                        }
6506                    })
6507                    .collect();
6508
6509                let mut cons_stmts = cons_gen.generate(
6510                    "GROUP",
6511                    group_currency,
6512                    &cons_tb,
6513                    period_start,
6514                    period_end,
6515                    fiscal_year,
6516                    fiscal_period,
6517                    None,
6518                    "SYS-AUTOCLOSE",
6519                );
6520
6521                // Split consolidated line items by statement type.
6522                // The consolidation generator returns BS items first, then IS items,
6523                // identified by their CONS- prefix and category.
6524                let bs_categories: &[&str] = &[
6525                    "CASH",
6526                    "RECEIVABLES",
6527                    "INVENTORY",
6528                    "FIXEDASSETS",
6529                    "PAYABLES",
6530                    "ACCRUEDLIABILITIES",
6531                    "LONGTERMDEBT",
6532                    "EQUITY",
6533                ];
6534                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6535                    cons_line_items.into_iter().partition(|li| {
6536                        let upper = li.label.to_uppercase();
6537                        bs_categories.iter().any(|c| upper == *c)
6538                    });
6539
6540                for stmt in &mut cons_stmts {
6541                    stmt.is_consolidated = true;
6542                    match stmt.statement_type {
6543                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6544                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6545                        _ => {} // CF and equity change statements keep generator output
6546                    }
6547                }
6548
6549                consolidated_statements.extend(cons_stmts);
6550                consolidation_schedules.push(schedule);
6551            }
6552
6553            // Backward compat: if only 1 company, use existing code path logic
6554            // (prior_cumulative_tb for comparative amounts). Already handled above;
6555            // the prior_ref is omitted to keep this change minimal.
6556            let _ = &mut fs_gen; // suppress unused warning
6557
6558            stats.financial_statement_count = financial_statements.len();
6559            info!(
6560                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6561                stats.financial_statement_count,
6562                consolidated_statements.len(),
6563                has_journal_entries
6564            );
6565
6566            // ----------------------------------------------------------------
6567            // IFRS 8 / ASC 280: Operating Segment Reporting
6568            // ----------------------------------------------------------------
6569            // Build entity seeds from the company configuration.
6570            let entity_seeds: Vec<SegmentSeed> = self
6571                .config
6572                .companies
6573                .iter()
6574                .map(|c| SegmentSeed {
6575                    code: c.code.clone(),
6576                    name: c.name.clone(),
6577                    currency: c.currency.clone(),
6578                })
6579                .collect();
6580
6581            let mut seg_gen = SegmentGenerator::new(seed + 30);
6582
6583            // Generate one set of segment reports per period.
6584            // We extract consolidated revenue / profit / assets from the consolidated
6585            // financial statements produced above, falling back to simple sums when
6586            // no consolidated statements were generated (single-entity path).
6587            for period in 0..self.config.global.period_months {
6588                let period_end =
6589                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6590                let fiscal_year = period_end.year() as u16;
6591                let fiscal_period = period_end.month() as u8;
6592                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6593
6594                use datasynth_core::models::StatementType;
6595
6596                // Try to find consolidated income statement for this period
6597                let cons_is = consolidated_statements.iter().find(|s| {
6598                    s.fiscal_year == fiscal_year
6599                        && s.fiscal_period == fiscal_period
6600                        && s.statement_type == StatementType::IncomeStatement
6601                });
6602                let cons_bs = consolidated_statements.iter().find(|s| {
6603                    s.fiscal_year == fiscal_year
6604                        && s.fiscal_period == fiscal_period
6605                        && s.statement_type == StatementType::BalanceSheet
6606                });
6607
6608                // If consolidated statements not available fall back to the flat list
6609                let is_stmt = cons_is.or_else(|| {
6610                    financial_statements.iter().find(|s| {
6611                        s.fiscal_year == fiscal_year
6612                            && s.fiscal_period == fiscal_period
6613                            && s.statement_type == StatementType::IncomeStatement
6614                    })
6615                });
6616                let bs_stmt = cons_bs.or_else(|| {
6617                    financial_statements.iter().find(|s| {
6618                        s.fiscal_year == fiscal_year
6619                            && s.fiscal_period == fiscal_period
6620                            && s.statement_type == StatementType::BalanceSheet
6621                    })
6622                });
6623
6624                let consolidated_revenue = is_stmt
6625                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6626                    .map(|li| -li.amount) // revenue is stored as negative in IS
6627                    .unwrap_or(rust_decimal::Decimal::ZERO);
6628
6629                let consolidated_profit = is_stmt
6630                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6631                    .map(|li| li.amount)
6632                    .unwrap_or(rust_decimal::Decimal::ZERO);
6633
6634                let consolidated_assets = bs_stmt
6635                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6636                    .map(|li| li.amount)
6637                    .unwrap_or(rust_decimal::Decimal::ZERO);
6638
6639                // Skip periods where we have no financial data
6640                if consolidated_revenue == rust_decimal::Decimal::ZERO
6641                    && consolidated_assets == rust_decimal::Decimal::ZERO
6642                {
6643                    continue;
6644                }
6645
6646                let group_code = self
6647                    .config
6648                    .companies
6649                    .first()
6650                    .map(|c| c.code.as_str())
6651                    .unwrap_or("GROUP");
6652
6653                // Compute period depreciation from JEs with document type "CL" hitting account
6654                // 6000 (depreciation expense).  These are generated by phase_period_close.
6655                let total_depr: rust_decimal::Decimal = journal_entries
6656                    .iter()
6657                    .filter(|je| je.header.document_type == "CL")
6658                    .flat_map(|je| je.lines.iter())
6659                    .filter(|l| l.gl_account.starts_with("6000"))
6660                    .map(|l| l.debit_amount)
6661                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6662                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6663                    Some(total_depr)
6664                } else {
6665                    None
6666                };
6667
6668                let (segs, recon) = seg_gen.generate(
6669                    group_code,
6670                    &period_label,
6671                    consolidated_revenue,
6672                    consolidated_profit,
6673                    consolidated_assets,
6674                    &entity_seeds,
6675                    depr_param,
6676                );
6677                segment_reports.extend(segs);
6678                segment_reconciliations.push(recon);
6679            }
6680
6681            info!(
6682                "Segment reports generated: {} segments, {} reconciliations",
6683                segment_reports.len(),
6684                segment_reconciliations.len()
6685            );
6686        }
6687
6688        // Generate bank reconciliations from payment data
6689        if br_enabled && !document_flows.payments.is_empty() {
6690            let employee_ids: Vec<String> = self
6691                .master_data
6692                .employees
6693                .iter()
6694                .map(|e| e.employee_id.clone())
6695                .collect();
6696            let mut br_gen =
6697                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6698
6699            // Group payments by company code and period
6700            for company in &self.config.companies {
6701                let company_payments: Vec<PaymentReference> = document_flows
6702                    .payments
6703                    .iter()
6704                    .filter(|p| p.header.company_code == company.code)
6705                    .map(|p| PaymentReference {
6706                        id: p.header.document_id.clone(),
6707                        amount: if p.is_vendor { p.amount } else { -p.amount },
6708                        date: p.header.document_date,
6709                        reference: p
6710                            .check_number
6711                            .clone()
6712                            .or_else(|| p.wire_reference.clone())
6713                            .unwrap_or_else(|| p.header.document_id.clone()),
6714                    })
6715                    .collect();
6716
6717                if company_payments.is_empty() {
6718                    continue;
6719                }
6720
6721                let bank_account_id = format!("{}-MAIN", company.code);
6722
6723                // Generate one reconciliation per period
6724                for period in 0..self.config.global.period_months {
6725                    let period_start = start_date + chrono::Months::new(period);
6726                    let period_end =
6727                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6728
6729                    let period_payments: Vec<PaymentReference> = company_payments
6730                        .iter()
6731                        .filter(|p| p.date >= period_start && p.date <= period_end)
6732                        .cloned()
6733                        .collect();
6734
6735                    let recon = br_gen.generate(
6736                        &company.code,
6737                        &bank_account_id,
6738                        period_start,
6739                        period_end,
6740                        &company.currency,
6741                        &period_payments,
6742                    );
6743                    bank_reconciliations.push(recon);
6744                }
6745            }
6746            info!(
6747                "Bank reconciliations generated: {} reconciliations",
6748                bank_reconciliations.len()
6749            );
6750        }
6751
6752        stats.bank_reconciliation_count = bank_reconciliations.len();
6753        self.check_resources_with_log("post-financial-reporting")?;
6754
6755        if !trial_balances.is_empty() {
6756            info!(
6757                "Period-close trial balances captured: {} periods",
6758                trial_balances.len()
6759            );
6760        }
6761
6762        // Notes to financial statements are generated in a separate post-processing step
6763        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6764        // phases have completed, so that deferred tax and provision data can be wired in.
6765        let notes_to_financial_statements = Vec::new();
6766
6767        Ok(FinancialReportingSnapshot {
6768            financial_statements,
6769            standalone_statements,
6770            consolidated_statements,
6771            consolidation_schedules,
6772            bank_reconciliations,
6773            trial_balances,
6774            segment_reports,
6775            segment_reconciliations,
6776            notes_to_financial_statements,
6777        })
6778    }
6779
6780    /// Populate notes to financial statements using fully-resolved snapshots.
6781    ///
6782    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6783    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6784    /// can be wired into the notes context.  The method mutates
6785    /// `financial_reporting.notes_to_financial_statements` in-place.
6786    fn generate_notes_to_financial_statements(
6787        &self,
6788        financial_reporting: &mut FinancialReportingSnapshot,
6789        accounting_standards: &AccountingStandardsSnapshot,
6790        tax: &TaxSnapshot,
6791        hr: &HrSnapshot,
6792        audit: &AuditSnapshot,
6793        treasury: &TreasurySnapshot,
6794    ) {
6795        use datasynth_config::schema::AccountingFrameworkConfig;
6796        use datasynth_core::models::StatementType;
6797        use datasynth_generators::period_close::notes_generator::{
6798            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6799        };
6800
6801        let seed = self.seed;
6802        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6803        {
6804            Ok(d) => d,
6805            Err(_) => return,
6806        };
6807
6808        let mut notes_gen = NotesGenerator::new(seed + 4235);
6809
6810        for company in &self.config.companies {
6811            let last_period_end = start_date
6812                + chrono::Months::new(self.config.global.period_months)
6813                - chrono::Days::new(1);
6814            let fiscal_year = last_period_end.year() as u16;
6815
6816            // Extract relevant amounts from the already-generated financial statements
6817            let entity_is = financial_reporting
6818                .standalone_statements
6819                .get(&company.code)
6820                .and_then(|stmts| {
6821                    stmts.iter().find(|s| {
6822                        s.fiscal_year == fiscal_year
6823                            && s.statement_type == StatementType::IncomeStatement
6824                    })
6825                });
6826            let entity_bs = financial_reporting
6827                .standalone_statements
6828                .get(&company.code)
6829                .and_then(|stmts| {
6830                    stmts.iter().find(|s| {
6831                        s.fiscal_year == fiscal_year
6832                            && s.statement_type == StatementType::BalanceSheet
6833                    })
6834                });
6835
6836            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6837            let revenue_amount = entity_is
6838                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6839                .map(|li| li.amount);
6840            let ppe_gross = entity_bs
6841                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6842                .map(|li| li.amount);
6843
6844            let framework = match self
6845                .config
6846                .accounting_standards
6847                .framework
6848                .unwrap_or_default()
6849            {
6850                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6851                    "IFRS".to_string()
6852                }
6853                _ => "US GAAP".to_string(),
6854            };
6855
6856            // ---- Deferred tax (IAS 12 / ASC 740) ----
6857            // Sum closing DTA and DTL from rollforward entries for this entity.
6858            let (entity_dta, entity_dtl) = {
6859                let mut dta = rust_decimal::Decimal::ZERO;
6860                let mut dtl = rust_decimal::Decimal::ZERO;
6861                for rf in &tax.deferred_tax.rollforwards {
6862                    if rf.entity_code == company.code {
6863                        dta += rf.closing_dta;
6864                        dtl += rf.closing_dtl;
6865                    }
6866                }
6867                (
6868                    if dta > rust_decimal::Decimal::ZERO {
6869                        Some(dta)
6870                    } else {
6871                        None
6872                    },
6873                    if dtl > rust_decimal::Decimal::ZERO {
6874                        Some(dtl)
6875                    } else {
6876                        None
6877                    },
6878                )
6879            };
6880
6881            // ---- Provisions (IAS 37 / ASC 450) ----
6882            // Filter provisions to this entity; sum best_estimate amounts.
6883            let entity_provisions: Vec<_> = accounting_standards
6884                .provisions
6885                .iter()
6886                .filter(|p| p.entity_code == company.code)
6887                .collect();
6888            let provision_count = entity_provisions.len();
6889            let total_provisions = if provision_count > 0 {
6890                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6891            } else {
6892                None
6893            };
6894
6895            // ---- Pension data from HR snapshot ----
6896            let entity_pension_plan_count = hr
6897                .pension_plans
6898                .iter()
6899                .filter(|p| p.entity_code == company.code)
6900                .count();
6901            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6902                let sum: rust_decimal::Decimal = hr
6903                    .pension_disclosures
6904                    .iter()
6905                    .filter(|d| {
6906                        hr.pension_plans
6907                            .iter()
6908                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6909                    })
6910                    .map(|d| d.net_pension_liability)
6911                    .sum();
6912                let plan_assets_sum: rust_decimal::Decimal = hr
6913                    .pension_plan_assets
6914                    .iter()
6915                    .filter(|a| {
6916                        hr.pension_plans
6917                            .iter()
6918                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6919                    })
6920                    .map(|a| a.fair_value_closing)
6921                    .sum();
6922                if entity_pension_plan_count > 0 {
6923                    Some(sum + plan_assets_sum)
6924                } else {
6925                    None
6926                }
6927            };
6928            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6929                let sum: rust_decimal::Decimal = hr
6930                    .pension_plan_assets
6931                    .iter()
6932                    .filter(|a| {
6933                        hr.pension_plans
6934                            .iter()
6935                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6936                    })
6937                    .map(|a| a.fair_value_closing)
6938                    .sum();
6939                if entity_pension_plan_count > 0 {
6940                    Some(sum)
6941                } else {
6942                    None
6943                }
6944            };
6945
6946            // ---- Audit data: related parties + subsequent events ----
6947            // Audit snapshot covers all entities; use total counts (common case = single entity).
6948            let rp_count = audit.related_party_transactions.len();
6949            let se_count = audit.subsequent_events.len();
6950            let adjusting_count = audit
6951                .subsequent_events
6952                .iter()
6953                .filter(|e| {
6954                    matches!(
6955                        e.classification,
6956                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6957                    )
6958                })
6959                .count();
6960
6961            let ctx = NotesGeneratorContext {
6962                entity_code: company.code.clone(),
6963                framework,
6964                period: format!("FY{}", fiscal_year),
6965                period_end: last_period_end,
6966                currency: company.currency.clone(),
6967                revenue_amount,
6968                total_ppe_gross: ppe_gross,
6969                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6970                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6971                deferred_tax_asset: entity_dta,
6972                deferred_tax_liability: entity_dtl,
6973                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6974                provision_count,
6975                total_provisions,
6976                // Pension data from HR snapshot
6977                pension_plan_count: entity_pension_plan_count,
6978                total_dbo: entity_total_dbo,
6979                total_plan_assets: entity_total_plan_assets,
6980                // Audit data
6981                related_party_transaction_count: rp_count,
6982                subsequent_event_count: se_count,
6983                adjusting_event_count: adjusting_count,
6984                ..NotesGeneratorContext::default()
6985            };
6986
6987            let entity_notes = notes_gen.generate(&ctx);
6988            let standard_note_count = entity_notes.len() as u32;
6989            info!(
6990                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6991                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6992            );
6993            financial_reporting
6994                .notes_to_financial_statements
6995                .extend(entity_notes);
6996
6997            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6998            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6999                .debt_instruments
7000                .iter()
7001                .filter(|d| d.entity_id == company.code)
7002                .map(|d| {
7003                    (
7004                        format!("{:?}", d.instrument_type),
7005                        d.principal,
7006                        d.maturity_date.to_string(),
7007                    )
7008                })
7009                .collect();
7010
7011            let hedge_count = treasury.hedge_relationships.len();
7012            let effective_hedges = treasury
7013                .hedge_relationships
7014                .iter()
7015                .filter(|h| h.is_effective)
7016                .count();
7017            let total_notional: rust_decimal::Decimal = treasury
7018                .hedging_instruments
7019                .iter()
7020                .map(|h| h.notional_amount)
7021                .sum();
7022            let total_fair_value: rust_decimal::Decimal = treasury
7023                .hedging_instruments
7024                .iter()
7025                .map(|h| h.fair_value)
7026                .sum();
7027
7028            // Join provision_movements with provisions to get entity/type info
7029            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7030                .provisions
7031                .iter()
7032                .filter(|p| p.entity_code == company.code)
7033                .map(|p| p.id.as_str())
7034                .collect();
7035            let provision_movements: Vec<(
7036                String,
7037                rust_decimal::Decimal,
7038                rust_decimal::Decimal,
7039                rust_decimal::Decimal,
7040            )> = accounting_standards
7041                .provision_movements
7042                .iter()
7043                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7044                .map(|m| {
7045                    let prov_type = accounting_standards
7046                        .provisions
7047                        .iter()
7048                        .find(|p| p.id == m.provision_id)
7049                        .map(|p| format!("{:?}", p.provision_type))
7050                        .unwrap_or_else(|| "Unknown".to_string());
7051                    (prov_type, m.opening, m.additions, m.closing)
7052                })
7053                .collect();
7054
7055            let enhanced_ctx = EnhancedNotesContext {
7056                entity_code: company.code.clone(),
7057                period: format!("FY{}", fiscal_year),
7058                currency: company.currency.clone(),
7059                // Inventory breakdown: best-effort using zero (would need balance tracker)
7060                finished_goods_value: rust_decimal::Decimal::ZERO,
7061                wip_value: rust_decimal::Decimal::ZERO,
7062                raw_materials_value: rust_decimal::Decimal::ZERO,
7063                debt_instruments,
7064                hedge_count,
7065                effective_hedges,
7066                total_notional,
7067                total_fair_value,
7068                provision_movements,
7069            };
7070
7071            let enhanced_notes =
7072                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7073            if !enhanced_notes.is_empty() {
7074                info!(
7075                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7076                    company.code,
7077                    enhanced_notes.len(),
7078                    enhanced_ctx.debt_instruments.len(),
7079                    hedge_count,
7080                    enhanced_ctx.provision_movements.len(),
7081                );
7082                financial_reporting
7083                    .notes_to_financial_statements
7084                    .extend(enhanced_notes);
7085            }
7086        }
7087    }
7088
7089    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
7090    ///
7091    /// This ensures the trial balance is coherent with the JEs: every debit and credit
7092    /// posted in the journal entries flows through to the trial balance, using the real
7093    /// GL account numbers from the CoA.
7094    fn build_trial_balance_from_entries(
7095        journal_entries: &[JournalEntry],
7096        coa: &ChartOfAccounts,
7097        company_code: &str,
7098        fiscal_year: u16,
7099        fiscal_period: u8,
7100        framework: &str,
7101    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7102        use rust_decimal::Decimal;
7103
7104        // Accumulate total debits and credits per GL account
7105        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7106        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7107
7108        for je in journal_entries {
7109            // Filter to matching company, fiscal year, and period
7110            if je.header.company_code != company_code
7111                || je.header.fiscal_year != fiscal_year
7112                || je.header.fiscal_period != fiscal_period
7113            {
7114                continue;
7115            }
7116
7117            for line in &je.lines {
7118                let acct = &line.gl_account;
7119                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7120                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7121            }
7122        }
7123
7124        // Build a TrialBalanceEntry for each account that had activity
7125        let mut all_accounts: Vec<&String> = account_debits
7126            .keys()
7127            .chain(account_credits.keys())
7128            .collect::<std::collections::HashSet<_>>()
7129            .into_iter()
7130            .collect();
7131        all_accounts.sort();
7132
7133        let mut entries = Vec::new();
7134
7135        for acct_number in all_accounts {
7136            let debit = account_debits
7137                .get(acct_number)
7138                .copied()
7139                .unwrap_or(Decimal::ZERO);
7140            let credit = account_credits
7141                .get(acct_number)
7142                .copied()
7143                .unwrap_or(Decimal::ZERO);
7144
7145            if debit.is_zero() && credit.is_zero() {
7146                continue;
7147            }
7148
7149            // Look up account name from CoA, fall back to "Account {code}"
7150            let account_name = coa
7151                .get_account(acct_number)
7152                .map(|gl| gl.short_description.clone())
7153                .unwrap_or_else(|| format!("Account {acct_number}"));
7154
7155            // Map account code prefix to the category strings expected by
7156            // FinancialStatementGenerator (Cash, Receivables, Inventory,
7157            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
7158            // OperatingExpenses).
7159            let category = Self::category_from_account_code(acct_number, framework);
7160
7161            entries.push(datasynth_generators::TrialBalanceEntry {
7162                account_code: acct_number.clone(),
7163                account_name,
7164                category,
7165                debit_balance: debit,
7166                credit_balance: credit,
7167            });
7168        }
7169
7170        entries
7171    }
7172
7173    /// Build a cumulative trial balance by aggregating all JEs from the start up to
7174    /// (and including) the given period end date.
7175    ///
7176    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
7177    /// while income statement accounts (revenue, expenses) show only the current period.
7178    /// The two are merged into a single Vec for the FinancialStatementGenerator.
7179    #[allow(clippy::too_many_arguments)]
7180    fn build_cumulative_trial_balance(
7181        journal_entries: &[JournalEntry],
7182        coa: &ChartOfAccounts,
7183        company_code: &str,
7184        start_date: NaiveDate,
7185        period_end: NaiveDate,
7186        fiscal_year: u16,
7187        fiscal_period: u8,
7188        framework: &str,
7189    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7190        use rust_decimal::Decimal;
7191
7192        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
7193        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7194        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7195
7196        // Accumulate debits/credits for income statement accounts (current period only)
7197        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7198        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7199
7200        for je in journal_entries {
7201            if je.header.company_code != company_code {
7202                continue;
7203            }
7204
7205            for line in &je.lines {
7206                let acct = &line.gl_account;
7207                // Framework-aware BS bucketing — fixes the Defect A
7208                // mis-classification where US-style prefix tables routed
7209                // SKR/PCG balance-sheet accounts through the P&L bucket
7210                // (or vice versa), giving the resulting TB an asymmetric
7211                // time window with no integrity invariant left to test.
7212                let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7213
7214                if is_bs_account {
7215                    // Balance sheet: accumulate from start through period_end
7216                    if je.header.document_date <= period_end
7217                        && je.header.document_date >= start_date
7218                    {
7219                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7220                            line.debit_amount;
7221                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7222                            line.credit_amount;
7223                    }
7224                } else {
7225                    // Income statement: current period only
7226                    if je.header.fiscal_year == fiscal_year
7227                        && je.header.fiscal_period == fiscal_period
7228                    {
7229                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7230                            line.debit_amount;
7231                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7232                            line.credit_amount;
7233                    }
7234                }
7235            }
7236        }
7237
7238        // Merge all accounts
7239        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7240        all_accounts.extend(bs_debits.keys().cloned());
7241        all_accounts.extend(bs_credits.keys().cloned());
7242        all_accounts.extend(is_debits.keys().cloned());
7243        all_accounts.extend(is_credits.keys().cloned());
7244
7245        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7246        sorted_accounts.sort();
7247
7248        let mut entries = Vec::new();
7249
7250        for acct_number in &sorted_accounts {
7251            let category = Self::category_from_account_code(acct_number, framework);
7252            let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7253
7254            let (debit, credit) = if is_bs_account {
7255                (
7256                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7257                    bs_credits
7258                        .get(acct_number)
7259                        .copied()
7260                        .unwrap_or(Decimal::ZERO),
7261                )
7262            } else {
7263                (
7264                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7265                    is_credits
7266                        .get(acct_number)
7267                        .copied()
7268                        .unwrap_or(Decimal::ZERO),
7269                )
7270            };
7271
7272            if debit.is_zero() && credit.is_zero() {
7273                continue;
7274            }
7275
7276            let account_name = coa
7277                .get_account(acct_number)
7278                .map(|gl| gl.short_description.clone())
7279                .unwrap_or_else(|| format!("Account {acct_number}"));
7280
7281            entries.push(datasynth_generators::TrialBalanceEntry {
7282                account_code: acct_number.clone(),
7283                account_name,
7284                category,
7285                debit_balance: debit,
7286                credit_balance: credit,
7287            });
7288        }
7289
7290        entries
7291    }
7292
7293    /// Build a JE-derived cash flow statement using the indirect method.
7294    ///
7295    /// Compares current and prior cumulative trial balances to derive working capital
7296    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7297    fn build_cash_flow_from_trial_balances(
7298        current_tb: &[datasynth_generators::TrialBalanceEntry],
7299        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7300        net_income: rust_decimal::Decimal,
7301    ) -> Vec<CashFlowItem> {
7302        use rust_decimal::Decimal;
7303
7304        // Helper: aggregate a TB by category and return net (debit - credit)
7305        let aggregate =
7306            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7307                let mut map: HashMap<String, Decimal> = HashMap::new();
7308                for entry in tb {
7309                    let net = entry.debit_balance - entry.credit_balance;
7310                    *map.entry(entry.category.clone()).or_default() += net;
7311                }
7312                map
7313            };
7314
7315        let current = aggregate(current_tb);
7316        let prior = prior_tb.map(aggregate);
7317
7318        // Get balance for a category, defaulting to zero
7319        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7320            *map.get(key).unwrap_or(&Decimal::ZERO)
7321        };
7322
7323        // Compute change: current - prior (or current if no prior)
7324        let change = |key: &str| -> Decimal {
7325            let curr = get(&current, key);
7326            match &prior {
7327                Some(p) => curr - get(p, key),
7328                None => curr,
7329            }
7330        };
7331
7332        // Operating activities (indirect method)
7333        // Depreciation add-back: approximate from FixedAssets decrease
7334        let fixed_asset_change = change("FixedAssets");
7335        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7336            -fixed_asset_change
7337        } else {
7338            Decimal::ZERO
7339        };
7340
7341        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7342        let ar_change = change("Receivables");
7343        let inventory_change = change("Inventory");
7344        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7345        let ap_change = change("Payables");
7346        let accrued_change = change("AccruedLiabilities");
7347
7348        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7349            + (-ap_change)
7350            + (-accrued_change);
7351
7352        // Investing activities
7353        let capex = if fixed_asset_change > Decimal::ZERO {
7354            -fixed_asset_change
7355        } else {
7356            Decimal::ZERO
7357        };
7358        let investing_cf = capex;
7359
7360        // Financing activities
7361        let debt_change = -change("LongTermDebt");
7362        let equity_change = -change("Equity");
7363        let financing_cf = debt_change + equity_change;
7364
7365        let net_change = operating_cf + investing_cf + financing_cf;
7366
7367        vec![
7368            CashFlowItem {
7369                item_code: "CF-NI".to_string(),
7370                label: "Net Income".to_string(),
7371                category: CashFlowCategory::Operating,
7372                amount: net_income,
7373                amount_prior: None,
7374                sort_order: 1,
7375                is_total: false,
7376            },
7377            CashFlowItem {
7378                item_code: "CF-DEP".to_string(),
7379                label: "Depreciation & Amortization".to_string(),
7380                category: CashFlowCategory::Operating,
7381                amount: depreciation_addback,
7382                amount_prior: None,
7383                sort_order: 2,
7384                is_total: false,
7385            },
7386            CashFlowItem {
7387                item_code: "CF-AR".to_string(),
7388                label: "Change in Accounts Receivable".to_string(),
7389                category: CashFlowCategory::Operating,
7390                amount: -ar_change,
7391                amount_prior: None,
7392                sort_order: 3,
7393                is_total: false,
7394            },
7395            CashFlowItem {
7396                item_code: "CF-AP".to_string(),
7397                label: "Change in Accounts Payable".to_string(),
7398                category: CashFlowCategory::Operating,
7399                amount: -ap_change,
7400                amount_prior: None,
7401                sort_order: 4,
7402                is_total: false,
7403            },
7404            CashFlowItem {
7405                item_code: "CF-INV".to_string(),
7406                label: "Change in Inventory".to_string(),
7407                category: CashFlowCategory::Operating,
7408                amount: -inventory_change,
7409                amount_prior: None,
7410                sort_order: 5,
7411                is_total: false,
7412            },
7413            CashFlowItem {
7414                item_code: "CF-OP".to_string(),
7415                label: "Net Cash from Operating Activities".to_string(),
7416                category: CashFlowCategory::Operating,
7417                amount: operating_cf,
7418                amount_prior: None,
7419                sort_order: 6,
7420                is_total: true,
7421            },
7422            CashFlowItem {
7423                item_code: "CF-CAPEX".to_string(),
7424                label: "Capital Expenditures".to_string(),
7425                category: CashFlowCategory::Investing,
7426                amount: capex,
7427                amount_prior: None,
7428                sort_order: 7,
7429                is_total: false,
7430            },
7431            CashFlowItem {
7432                item_code: "CF-INV-T".to_string(),
7433                label: "Net Cash from Investing Activities".to_string(),
7434                category: CashFlowCategory::Investing,
7435                amount: investing_cf,
7436                amount_prior: None,
7437                sort_order: 8,
7438                is_total: true,
7439            },
7440            CashFlowItem {
7441                item_code: "CF-DEBT".to_string(),
7442                label: "Net Borrowings / (Repayments)".to_string(),
7443                category: CashFlowCategory::Financing,
7444                amount: debt_change,
7445                amount_prior: None,
7446                sort_order: 9,
7447                is_total: false,
7448            },
7449            CashFlowItem {
7450                item_code: "CF-EQ".to_string(),
7451                label: "Equity Changes".to_string(),
7452                category: CashFlowCategory::Financing,
7453                amount: equity_change,
7454                amount_prior: None,
7455                sort_order: 10,
7456                is_total: false,
7457            },
7458            CashFlowItem {
7459                item_code: "CF-FIN-T".to_string(),
7460                label: "Net Cash from Financing Activities".to_string(),
7461                category: CashFlowCategory::Financing,
7462                amount: financing_cf,
7463                amount_prior: None,
7464                sort_order: 11,
7465                is_total: true,
7466            },
7467            CashFlowItem {
7468                item_code: "CF-NET".to_string(),
7469                label: "Net Change in Cash".to_string(),
7470                category: CashFlowCategory::Operating,
7471                amount: net_change,
7472                amount_prior: None,
7473                sort_order: 12,
7474                is_total: true,
7475            },
7476        ]
7477    }
7478
7479    /// Calculate net income from a set of trial balance entries.
7480    ///
7481    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7482    fn calculate_net_income_from_tb(
7483        tb: &[datasynth_generators::TrialBalanceEntry],
7484    ) -> rust_decimal::Decimal {
7485        use rust_decimal::Decimal;
7486
7487        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7488        for entry in tb {
7489            let net = entry.debit_balance - entry.credit_balance;
7490            *aggregated.entry(entry.category.clone()).or_default() += net;
7491        }
7492
7493        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7494        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7495        let opex = *aggregated
7496            .get("OperatingExpenses")
7497            .unwrap_or(&Decimal::ZERO);
7498        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7499        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7500
7501        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7502        // other_income is typically negative (credit), other_expenses is typically positive
7503        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7504        let tax_rate = Decimal::new(25, 2); // 0.25
7505        let tax = operating_income * tax_rate;
7506        operating_income - tax
7507    }
7508
7509    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7510    ///
7511    /// Uses the first two digits of the account code to classify into the categories
7512    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7513    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7514    /// OperatingExpenses, OtherIncome, OtherExpenses.
7515    /// Map an account code to the orchestrator's 13-bucket category string
7516    /// (`"Cash"` / `"Receivables"` / `"Inventory"` / `"FixedAssets"` /
7517    /// `"Payables"` / `"AccruedLiabilities"` / `"LongTermDebt"` /
7518    /// `"Equity"` / `"Revenue"` / `"CostOfSales"` / `"OperatingExpenses"`
7519    /// / `"OtherIncome"` / `"OtherExpenses"`).
7520    ///
7521    /// `framework` controls which numbering convention is applied:
7522    ///
7523    /// - `"us_gaap"` / `"ifrs"` / `"dual_reporting"` — US-style 4-digit
7524    ///   chart (1xxx assets, 2xxx liabilities, 3xxx equity, 4xxx revenue,
7525    ///   5xxx COGS, 6xxx OpEx, 7xxx other income, 8xxx other expense).
7526    /// - `"french_gaap"` — French PCG (1 = capital/liabilities, 2 = fixed
7527    ///   assets, 3 = inventory, 4 = third parties, 5 = cash, 6 = expenses,
7528    ///   7 = revenue).
7529    /// - `"german_gaap"` / `"hgb"` — German SKR04 (0 = fixed assets,
7530    ///   1 = current assets, 2 = equity, 3 = liabilities, 4 = revenue,
7531    ///   5 = COGS, 6 = OpEx, 7 = financial, 8 = tax/extraordinary).
7532    ///
7533    /// Unknown frameworks fall back to US-style.
7534    fn category_from_account_code(code: &str, framework: &str) -> String {
7535        match framework {
7536            "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7537            "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7538            _ => Self::us_gaap_category(code),
7539        }
7540        .to_string()
7541    }
7542
7543    fn us_gaap_category(code: &str) -> &'static str {
7544        let prefix: String = code.chars().take(2).collect();
7545        match prefix.as_str() {
7546            "10" => "Cash",
7547            "11" => "Receivables",
7548            "12" | "13" | "14" => "Inventory",
7549            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7550            "20" => "Payables",
7551            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7552            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7553            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7554            "40" | "41" | "42" | "43" | "44" => "Revenue",
7555            "50" | "51" | "52" => "CostOfSales",
7556            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7557                "OperatingExpenses"
7558            }
7559            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7560            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7561            _ => "OperatingExpenses",
7562        }
7563    }
7564
7565    /// SKR04 (German GAAP) prefix → orchestrator category.
7566    ///
7567    /// 0 = fixed assets, 1 = current assets (10-12 cash, 13-14 receivables,
7568    /// 15-19 inventory), 2 = equity, 3 = liabilities (3-31 payables,
7569    /// 32-37 accrued, 38-39 long-term debt), 4 = revenue, 5 = COGS,
7570    /// 6 = OpEx, 7 = financial income, 8 = tax/extraordinary expense.
7571    fn skr_category(code: &str) -> &'static str {
7572        let first = code.chars().next().and_then(|c| c.to_digit(10));
7573        let prefix: String = code.chars().take(2).collect();
7574        match first {
7575            Some(0) => "FixedAssets",
7576            Some(1) => match prefix.as_str() {
7577                "10" | "11" | "12" => "Cash",
7578                "13" | "14" => "Receivables",
7579                _ => "Inventory",
7580            },
7581            Some(2) => "Equity",
7582            Some(3) => match prefix.as_str() {
7583                "30" | "31" => "Payables",
7584                "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7585                _ => "LongTermDebt",
7586            },
7587            Some(4) => "Revenue",
7588            Some(5) => "CostOfSales",
7589            Some(6) => "OperatingExpenses",
7590            Some(7) => "OtherIncome",
7591            Some(8) => "OtherExpenses",
7592            _ => "OperatingExpenses",
7593        }
7594    }
7595
7596    /// French PCG prefix → orchestrator category.
7597    ///
7598    /// 10-14 = equity, 15-19 = liabilities (provisions, debts),
7599    /// 2 = fixed assets, 3 = inventory, 40 = payables, 41 = receivables,
7600    /// 42-49 = liabilities (personnel, tax, group), 5 = cash, 6 = expenses,
7601    /// 7 = revenue.
7602    fn pcg_category(code: &str) -> &'static str {
7603        let first = code.chars().next().and_then(|c| c.to_digit(10));
7604        let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7605        match first {
7606            Some(1) => match second {
7607                Some(0..=4) => "Equity",
7608                Some(5) => "AccruedLiabilities",
7609                _ => "LongTermDebt",
7610            },
7611            Some(2) => "FixedAssets",
7612            Some(3) => "Inventory",
7613            Some(4) => match second {
7614                Some(0) => "Payables",
7615                Some(1) => "Receivables",
7616                _ => "AccruedLiabilities",
7617            },
7618            Some(5) => "Cash",
7619            Some(6) => "OperatingExpenses",
7620            Some(7) => "Revenue",
7621            Some(8) | Some(9) => "OperatingExpenses",
7622            _ => "OperatingExpenses",
7623        }
7624    }
7625
7626    /// Test whether an account code maps to a balance-sheet line under
7627    /// the given framework. Drives the cumulative-vs-period bucketing in
7628    /// [`Self::build_cumulative_trial_balance`].
7629    ///
7630    /// Delegates to the framework-aware classifier in
7631    /// `datasynth-core::framework_accounts` so SKR (German) and PCG
7632    /// (French) codes are recognised, not silently routed through a
7633    /// US-style prefix table.
7634    fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7635        // `AccountType` here is the `balance::AccountType` imported at
7636        // the top of the file; `FrameworkAccounts::classify_account_type`
7637        // returns the same enum, so no cross-namespace mapping is needed.
7638        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7639        matches!(
7640            fa.classify_account_type(code),
7641            AccountType::Asset
7642                | AccountType::ContraAsset
7643                | AccountType::Liability
7644                | AccountType::ContraLiability
7645                | AccountType::Equity
7646                | AccountType::ContraEquity
7647        )
7648    }
7649
7650    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7651    fn phase_hr_data(
7652        &mut self,
7653        stats: &mut EnhancedGenerationStatistics,
7654    ) -> SynthResult<HrSnapshot> {
7655        if !self.phase_config.generate_hr {
7656            debug!("Phase 16: Skipped (HR generation disabled)");
7657            return Ok(HrSnapshot::default());
7658        }
7659
7660        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7661
7662        let seed = self.seed;
7663        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7664            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7665        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7666        let company_code = self
7667            .config
7668            .companies
7669            .first()
7670            .map(|c| c.code.as_str())
7671            .unwrap_or("1000");
7672        let currency = self
7673            .config
7674            .companies
7675            .first()
7676            .map(|c| c.currency.as_str())
7677            .unwrap_or("USD");
7678
7679        let employee_ids: Vec<String> = self
7680            .master_data
7681            .employees
7682            .iter()
7683            .map(|e| e.employee_id.clone())
7684            .collect();
7685
7686        if employee_ids.is_empty() {
7687            debug!("Phase 16: Skipped (no employees available)");
7688            return Ok(HrSnapshot::default());
7689        }
7690
7691        // Extract cost-center pool from master data employees for cross-reference
7692        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7693        let cost_center_ids: Vec<String> = self
7694            .master_data
7695            .employees
7696            .iter()
7697            .filter_map(|e| e.cost_center.clone())
7698            .collect::<std::collections::HashSet<_>>()
7699            .into_iter()
7700            .collect();
7701
7702        let mut snapshot = HrSnapshot::default();
7703
7704        // Generate payroll runs (one per month)
7705        if self.config.hr.payroll.enabled {
7706            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7707                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7708
7709            // Look up country pack for payroll deductions and labels
7710            let payroll_pack = self.primary_pack();
7711
7712            // Store the pack on the generator so generate() resolves
7713            // localized deduction rates and labels from it.
7714            payroll_gen.set_country_pack(payroll_pack.clone());
7715
7716            let employees_with_salary: Vec<(
7717                String,
7718                rust_decimal::Decimal,
7719                Option<String>,
7720                Option<String>,
7721            )> = self
7722                .master_data
7723                .employees
7724                .iter()
7725                .map(|e| {
7726                    // Use the employee's actual annual base salary.
7727                    // Fall back to $60,000 / yr if somehow zero.
7728                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7729                        e.base_salary
7730                    } else {
7731                        rust_decimal::Decimal::from(60_000)
7732                    };
7733                    (
7734                        e.employee_id.clone(),
7735                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7736                        e.cost_center.clone(),
7737                        e.department_id.clone(),
7738                    )
7739                })
7740                .collect();
7741
7742            // Use generate_with_changes when employee change history is available
7743            // so that salary adjustments, transfers, etc. are reflected in payroll.
7744            let change_history = &self.master_data.employee_change_history;
7745            let has_changes = !change_history.is_empty();
7746            if has_changes {
7747                debug!(
7748                    "Payroll will incorporate {} employee change events",
7749                    change_history.len()
7750                );
7751            }
7752
7753            for month in 0..self.config.global.period_months {
7754                let period_start = start_date + chrono::Months::new(month);
7755                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7756                let (run, items) = if has_changes {
7757                    payroll_gen.generate_with_changes(
7758                        company_code,
7759                        &employees_with_salary,
7760                        period_start,
7761                        period_end,
7762                        currency,
7763                        change_history,
7764                    )
7765                } else {
7766                    payroll_gen.generate(
7767                        company_code,
7768                        &employees_with_salary,
7769                        period_start,
7770                        period_end,
7771                        currency,
7772                    )
7773                };
7774                snapshot.payroll_runs.push(run);
7775                snapshot.payroll_run_count += 1;
7776                snapshot.payroll_line_item_count += items.len();
7777                snapshot.payroll_line_items.extend(items);
7778            }
7779        }
7780
7781        // Generate time entries
7782        if self.config.hr.time_attendance.enabled {
7783            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7784                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7785            // v3.4.2: when a temporal context is configured, time entries
7786            // respect holidays (not just weekends) and submitted_at lag
7787            // snaps to business days.
7788            if let Some(ctx) = &self.temporal_context {
7789                time_gen.set_temporal_context(Arc::clone(ctx));
7790            }
7791            let entries = time_gen.generate(
7792                &employee_ids,
7793                start_date,
7794                end_date,
7795                &self.config.hr.time_attendance,
7796            );
7797            snapshot.time_entry_count = entries.len();
7798            snapshot.time_entries = entries;
7799        }
7800
7801        // Generate expense reports
7802        if self.config.hr.expenses.enabled {
7803            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7804                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7805            expense_gen.set_country_pack(self.primary_pack().clone());
7806            // v3.4.2: snap submission / approval / paid / line-item dates
7807            // to business days when temporal_context is present.
7808            if let Some(ctx) = &self.temporal_context {
7809                expense_gen.set_temporal_context(Arc::clone(ctx));
7810            }
7811            let company_currency = self
7812                .config
7813                .companies
7814                .first()
7815                .map(|c| c.currency.as_str())
7816                .unwrap_or("USD");
7817            let reports = expense_gen.generate_with_currency(
7818                &employee_ids,
7819                start_date,
7820                end_date,
7821                &self.config.hr.expenses,
7822                company_currency,
7823            );
7824            snapshot.expense_report_count = reports.len();
7825            snapshot.expense_reports = reports;
7826        }
7827
7828        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7829        if self.config.hr.payroll.enabled {
7830            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7831            let employee_pairs: Vec<(String, String)> = self
7832                .master_data
7833                .employees
7834                .iter()
7835                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7836                .collect();
7837            let enrollments =
7838                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7839            snapshot.benefit_enrollment_count = enrollments.len();
7840            snapshot.benefit_enrollments = enrollments;
7841        }
7842
7843        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7844        if self.phase_config.generate_hr {
7845            let entity_name = self
7846                .config
7847                .companies
7848                .first()
7849                .map(|c| c.name.as_str())
7850                .unwrap_or("Entity");
7851            let period_months = self.config.global.period_months;
7852            let period_label = {
7853                let y = start_date.year();
7854                let m = start_date.month();
7855                if period_months >= 12 {
7856                    format!("FY{y}")
7857                } else {
7858                    format!("{y}-{m:02}")
7859                }
7860            };
7861            let reporting_date =
7862                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7863
7864            // Compute average annual salary from actual payroll data when available.
7865            // PayrollRun.total_gross covers all employees for one pay period; we sum
7866            // across all runs and divide by employee_count to get per-employee total,
7867            // then annualise for sub-annual periods.
7868            let avg_salary: Option<rust_decimal::Decimal> = {
7869                let employee_count = employee_ids.len();
7870                if self.config.hr.payroll.enabled
7871                    && employee_count > 0
7872                    && !snapshot.payroll_runs.is_empty()
7873                {
7874                    // Sum total gross pay across all payroll runs for this company
7875                    let total_gross: rust_decimal::Decimal = snapshot
7876                        .payroll_runs
7877                        .iter()
7878                        .filter(|r| r.company_code == company_code)
7879                        .map(|r| r.total_gross)
7880                        .sum();
7881                    if total_gross > rust_decimal::Decimal::ZERO {
7882                        // Annualise: total_gross covers `period_months` months of pay
7883                        let annual_total = if period_months > 0 && period_months < 12 {
7884                            total_gross * rust_decimal::Decimal::from(12u32)
7885                                / rust_decimal::Decimal::from(period_months)
7886                        } else {
7887                            total_gross
7888                        };
7889                        Some(
7890                            (annual_total / rust_decimal::Decimal::from(employee_count))
7891                                .round_dp(2),
7892                        )
7893                    } else {
7894                        None
7895                    }
7896                } else {
7897                    None
7898                }
7899            };
7900
7901            let mut pension_gen =
7902                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7903            let pension_snap = pension_gen.generate(
7904                company_code,
7905                entity_name,
7906                &period_label,
7907                reporting_date,
7908                employee_ids.len(),
7909                currency,
7910                avg_salary,
7911                period_months,
7912            );
7913            snapshot.pension_plan_count = pension_snap.plans.len();
7914            snapshot.pension_plans = pension_snap.plans;
7915            snapshot.pension_obligations = pension_snap.obligations;
7916            snapshot.pension_plan_assets = pension_snap.plan_assets;
7917            snapshot.pension_disclosures = pension_snap.disclosures;
7918            // Pension JEs are returned here so they can be added to entries
7919            // in the caller (stored temporarily on snapshot for transfer).
7920            // We embed them in the hr snapshot for simplicity; the orchestrator
7921            // will extract and extend `entries`.
7922            snapshot.pension_journal_entries = pension_snap.journal_entries;
7923        }
7924
7925        // Generate stock-based compensation (ASC 718 / IFRS 2)
7926        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7927            let period_months = self.config.global.period_months;
7928            let period_label = {
7929                let y = start_date.year();
7930                let m = start_date.month();
7931                if period_months >= 12 {
7932                    format!("FY{y}")
7933                } else {
7934                    format!("{y}-{m:02}")
7935                }
7936            };
7937            let reporting_date =
7938                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7939
7940            let mut stock_comp_gen =
7941                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7942            let stock_snap = stock_comp_gen.generate(
7943                company_code,
7944                &employee_ids,
7945                start_date,
7946                &period_label,
7947                reporting_date,
7948                currency,
7949            );
7950            snapshot.stock_grant_count = stock_snap.grants.len();
7951            snapshot.stock_grants = stock_snap.grants;
7952            snapshot.stock_comp_expenses = stock_snap.expenses;
7953            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7954        }
7955
7956        stats.payroll_run_count = snapshot.payroll_run_count;
7957        stats.time_entry_count = snapshot.time_entry_count;
7958        stats.expense_report_count = snapshot.expense_report_count;
7959        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7960        stats.pension_plan_count = snapshot.pension_plan_count;
7961        stats.stock_grant_count = snapshot.stock_grant_count;
7962
7963        info!(
7964            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7965            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7966            snapshot.time_entry_count, snapshot.expense_report_count,
7967            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7968            snapshot.stock_grant_count
7969        );
7970        self.check_resources_with_log("post-hr")?;
7971
7972        Ok(snapshot)
7973    }
7974
7975    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7976    fn phase_accounting_standards(
7977        &mut self,
7978        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7979        journal_entries: &[JournalEntry],
7980        stats: &mut EnhancedGenerationStatistics,
7981    ) -> SynthResult<AccountingStandardsSnapshot> {
7982        if !self.phase_config.generate_accounting_standards {
7983            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7984            return Ok(AccountingStandardsSnapshot::default());
7985        }
7986        info!("Phase 17: Generating Accounting Standards Data");
7987
7988        let seed = self.seed;
7989        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7990            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7991        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7992        let company_code = self
7993            .config
7994            .companies
7995            .first()
7996            .map(|c| c.code.as_str())
7997            .unwrap_or("1000");
7998        let currency = self
7999            .config
8000            .companies
8001            .first()
8002            .map(|c| c.currency.as_str())
8003            .unwrap_or("USD");
8004
8005        // Convert config framework to standards framework.
8006        // If the user explicitly set a framework in the YAML config, use that.
8007        // Otherwise, fall back to the country pack's accounting.framework field,
8008        // and if that is also absent or unrecognised, default to US GAAP.
8009        let framework = match self.config.accounting_standards.framework {
8010            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8011                datasynth_standards::framework::AccountingFramework::UsGaap
8012            }
8013            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8014                datasynth_standards::framework::AccountingFramework::Ifrs
8015            }
8016            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8017                datasynth_standards::framework::AccountingFramework::DualReporting
8018            }
8019            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8020                datasynth_standards::framework::AccountingFramework::FrenchGaap
8021            }
8022            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8023                datasynth_standards::framework::AccountingFramework::GermanGaap
8024            }
8025            None => {
8026                // Derive framework from the primary company's country pack
8027                let pack = self.primary_pack();
8028                let pack_fw = pack.accounting.framework.as_str();
8029                match pack_fw {
8030                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8031                    "dual_reporting" => {
8032                        datasynth_standards::framework::AccountingFramework::DualReporting
8033                    }
8034                    "french_gaap" => {
8035                        datasynth_standards::framework::AccountingFramework::FrenchGaap
8036                    }
8037                    "german_gaap" | "hgb" => {
8038                        datasynth_standards::framework::AccountingFramework::GermanGaap
8039                    }
8040                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
8041                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8042                }
8043            }
8044        };
8045
8046        let mut snapshot = AccountingStandardsSnapshot::default();
8047
8048        // Revenue recognition
8049        if self.config.accounting_standards.revenue_recognition.enabled {
8050            let customer_ids: Vec<String> = self
8051                .master_data
8052                .customers
8053                .iter()
8054                .map(|c| c.customer_id.clone())
8055                .collect();
8056
8057            if !customer_ids.is_empty() {
8058                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8059                let contracts = rev_gen.generate(
8060                    company_code,
8061                    &customer_ids,
8062                    start_date,
8063                    end_date,
8064                    currency,
8065                    &self.config.accounting_standards.revenue_recognition,
8066                    framework,
8067                );
8068                snapshot.revenue_contract_count = contracts.len();
8069                snapshot.contracts = contracts;
8070            }
8071        }
8072
8073        // Impairment testing
8074        if self.config.accounting_standards.impairment.enabled {
8075            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8076                .master_data
8077                .assets
8078                .iter()
8079                .map(|a| {
8080                    (
8081                        a.asset_id.clone(),
8082                        a.description.clone(),
8083                        a.acquisition_cost,
8084                    )
8085                })
8086                .collect();
8087
8088            if !asset_data.is_empty() {
8089                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8090                let tests = imp_gen.generate(
8091                    company_code,
8092                    &asset_data,
8093                    end_date,
8094                    &self.config.accounting_standards.impairment,
8095                    framework,
8096                );
8097                snapshot.impairment_test_count = tests.len();
8098                snapshot.impairment_tests = tests;
8099            }
8100        }
8101
8102        // Business combinations (IFRS 3 / ASC 805)
8103        if self
8104            .config
8105            .accounting_standards
8106            .business_combinations
8107            .enabled
8108        {
8109            let bc_config = &self.config.accounting_standards.business_combinations;
8110            let framework_str = match framework {
8111                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8112                _ => "US_GAAP",
8113            };
8114            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8115            let bc_snap = bc_gen.generate(
8116                company_code,
8117                currency,
8118                start_date,
8119                end_date,
8120                bc_config.acquisition_count,
8121                framework_str,
8122            );
8123            snapshot.business_combination_count = bc_snap.combinations.len();
8124            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8125            snapshot.business_combinations = bc_snap.combinations;
8126        }
8127
8128        // Expected Credit Loss (IFRS 9 / ASC 326)
8129        if self
8130            .config
8131            .accounting_standards
8132            .expected_credit_loss
8133            .enabled
8134        {
8135            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8136            let framework_str = match framework {
8137                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8138                _ => "ASC_326",
8139            };
8140
8141            // Use AR aging data from the subledger snapshot if available;
8142            // otherwise generate synthetic bucket exposures.
8143            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8144
8145            let mut ecl_gen = EclGenerator::new(seed + 43);
8146
8147            // Collect combined bucket totals across all company AR aging reports.
8148            let bucket_exposures: Vec<(
8149                datasynth_core::models::subledger::ar::AgingBucket,
8150                rust_decimal::Decimal,
8151            )> = if ar_aging_reports.is_empty() {
8152                // No AR aging data — synthesise plausible bucket exposures.
8153                use datasynth_core::models::subledger::ar::AgingBucket;
8154                vec![
8155                    (
8156                        AgingBucket::Current,
8157                        rust_decimal::Decimal::from(500_000_u32),
8158                    ),
8159                    (
8160                        AgingBucket::Days1To30,
8161                        rust_decimal::Decimal::from(120_000_u32),
8162                    ),
8163                    (
8164                        AgingBucket::Days31To60,
8165                        rust_decimal::Decimal::from(45_000_u32),
8166                    ),
8167                    (
8168                        AgingBucket::Days61To90,
8169                        rust_decimal::Decimal::from(15_000_u32),
8170                    ),
8171                    (
8172                        AgingBucket::Over90Days,
8173                        rust_decimal::Decimal::from(8_000_u32),
8174                    ),
8175                ]
8176            } else {
8177                use datasynth_core::models::subledger::ar::AgingBucket;
8178                // Sum bucket totals from all reports.
8179                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8180                    std::collections::HashMap::new();
8181                for report in ar_aging_reports {
8182                    for (bucket, amount) in &report.bucket_totals {
8183                        *totals.entry(*bucket).or_default() += amount;
8184                    }
8185                }
8186                AgingBucket::all()
8187                    .into_iter()
8188                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8189                    .collect()
8190            };
8191
8192            let ecl_snap = ecl_gen.generate(
8193                company_code,
8194                end_date,
8195                &bucket_exposures,
8196                ecl_config,
8197                &period_label,
8198                framework_str,
8199            );
8200
8201            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8202            snapshot.ecl_models = ecl_snap.ecl_models;
8203            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8204            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8205        }
8206
8207        // Provisions and contingencies (IAS 37 / ASC 450)
8208        {
8209            let framework_str = match framework {
8210                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8211                _ => "US_GAAP",
8212            };
8213
8214            // Compute actual revenue from the journal entries generated so far.
8215            // The `journal_entries` slice passed to this phase contains all GL entries
8216            // up to and including Period Close. Fall back to a minimum of 100_000 to
8217            // avoid degenerate zero-based provision amounts on first-period datasets.
8218            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8219                .max(rust_decimal::Decimal::from(100_000_u32));
8220
8221            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8222
8223            let mut prov_gen = ProvisionGenerator::new(seed + 44);
8224            let prov_snap = prov_gen.generate(
8225                company_code,
8226                currency,
8227                revenue_proxy,
8228                end_date,
8229                &period_label,
8230                framework_str,
8231                None, // prior_opening: no carry-forward data in single-period runs
8232            );
8233
8234            snapshot.provision_count = prov_snap.provisions.len();
8235            snapshot.provisions = prov_snap.provisions;
8236            snapshot.provision_movements = prov_snap.movements;
8237            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8238            snapshot.provision_journal_entries = prov_snap.journal_entries;
8239        }
8240
8241        // IAS 21 Functional Currency Translation
8242        // For each company whose functional currency differs from the presentation
8243        // currency, generate a CurrencyTranslationResult with CTA (OCI).
8244        {
8245            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8246
8247            let presentation_currency = self
8248                .config
8249                .global
8250                .presentation_currency
8251                .clone()
8252                .unwrap_or_else(|| self.config.global.group_currency.clone());
8253
8254            // Build a minimal rate table populated with approximate rates from
8255            // the FX model base rates (USD-based) so we can do the translation.
8256            let mut rate_table = FxRateTable::new(&presentation_currency);
8257
8258            // Populate with base rates against USD; if presentation_currency is
8259            // not USD we do a best-effort two-step conversion using the table's
8260            // triangulation support.
8261            let base_rates = base_rates_usd();
8262            for (ccy, rate) in &base_rates {
8263                rate_table.add_rate(FxRate::new(
8264                    ccy,
8265                    "USD",
8266                    RateType::Closing,
8267                    end_date,
8268                    *rate,
8269                    "SYNTHETIC",
8270                ));
8271                // Average rate = 98% of closing (approximation).
8272                // 0.98 = 98/100 = Decimal::new(98, 2)
8273                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8274                rate_table.add_rate(FxRate::new(
8275                    ccy,
8276                    "USD",
8277                    RateType::Average,
8278                    end_date,
8279                    avg,
8280                    "SYNTHETIC",
8281                ));
8282            }
8283
8284            let mut translation_results = Vec::new();
8285            for company in &self.config.companies {
8286                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
8287                // to ensure the translation produces non-trivial CTA amounts.
8288                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8289                    .max(rust_decimal::Decimal::from(100_000_u32));
8290
8291                let func_ccy = company
8292                    .functional_currency
8293                    .clone()
8294                    .unwrap_or_else(|| company.currency.clone());
8295
8296                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8297                    &company.code,
8298                    &func_ccy,
8299                    &presentation_currency,
8300                    &ias21_period_label,
8301                    end_date,
8302                    company_revenue,
8303                    &rate_table,
8304                );
8305                translation_results.push(result);
8306            }
8307
8308            snapshot.currency_translation_count = translation_results.len();
8309            snapshot.currency_translation_results = translation_results;
8310        }
8311
8312        stats.revenue_contract_count = snapshot.revenue_contract_count;
8313        stats.impairment_test_count = snapshot.impairment_test_count;
8314        stats.business_combination_count = snapshot.business_combination_count;
8315        stats.ecl_model_count = snapshot.ecl_model_count;
8316        stats.provision_count = snapshot.provision_count;
8317
8318        // ------------------------------------------------------------
8319        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
8320        // ------------------------------------------------------------
8321        if self.config.accounting_standards.leases.enabled {
8322            use datasynth_generators::standards::LeaseGenerator;
8323            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8324                .unwrap_or_else(|_| {
8325                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8326                });
8327            let framework =
8328                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8329            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8330            for company in &self.config.companies {
8331                let leases = lease_gen.generate(
8332                    &company.code,
8333                    start_date,
8334                    &self.config.accounting_standards.leases,
8335                    framework,
8336                );
8337                snapshot.lease_count += leases.len();
8338                snapshot.leases.extend(leases);
8339            }
8340            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8341        }
8342
8343        // ------------------------------------------------------------
8344        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
8345        // ------------------------------------------------------------
8346        if self.config.accounting_standards.fair_value.enabled {
8347            use datasynth_generators::standards::FairValueGenerator;
8348            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8349                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8350                + chrono::Months::new(self.config.global.period_months);
8351            let framework =
8352                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8353            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8354            for company in &self.config.companies {
8355                let measurements = fv_gen.generate(
8356                    &company.code,
8357                    end_date,
8358                    &company.currency,
8359                    &self.config.accounting_standards.fair_value,
8360                    framework,
8361                );
8362                snapshot.fair_value_measurement_count += measurements.len();
8363                snapshot.fair_value_measurements.extend(measurements);
8364            }
8365            info!(
8366                "v3.3.1 fair value measurements: {}",
8367                snapshot.fair_value_measurement_count
8368            );
8369        }
8370
8371        // ------------------------------------------------------------
8372        // v3.3.1: Framework reconciliation (dual reporting only)
8373        // ------------------------------------------------------------
8374        if self.config.accounting_standards.generate_differences
8375            && matches!(
8376                self.config.accounting_standards.framework,
8377                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8378            )
8379        {
8380            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8381            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8382                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8383                + chrono::Months::new(self.config.global.period_months);
8384            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8385            for company in &self.config.companies {
8386                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8387                snapshot.framework_difference_count += records.len();
8388                snapshot.framework_differences.extend(records);
8389                snapshot.framework_reconciliations.push(reconciliation);
8390            }
8391            info!(
8392                "v3.3.1 framework reconciliation: {} differences across {} entities",
8393                snapshot.framework_difference_count,
8394                snapshot.framework_reconciliations.len()
8395            );
8396        }
8397
8398        info!(
8399            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8400            snapshot.revenue_contract_count,
8401            snapshot.impairment_test_count,
8402            snapshot.business_combination_count,
8403            snapshot.ecl_model_count,
8404            snapshot.provision_count,
8405            snapshot.currency_translation_count,
8406            snapshot.lease_count,
8407            snapshot.fair_value_measurement_count,
8408            snapshot.framework_difference_count,
8409        );
8410        self.check_resources_with_log("post-accounting-standards")?;
8411
8412        Ok(snapshot)
8413    }
8414
8415    /// v3.3.1: helper to resolve the accounting-standards framework enum
8416    /// from config into the `datasynth_standards::framework::AccountingFramework`
8417    /// type expected by standards generators. Falls back to US GAAP.
8418    fn resolve_accounting_framework(
8419        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8420    ) -> datasynth_standards::framework::AccountingFramework {
8421        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8422        use datasynth_standards::framework::AccountingFramework as Fw;
8423        match cfg {
8424            Some(Cfg::Ifrs) => Fw::Ifrs,
8425            Some(Cfg::DualReporting) => Fw::DualReporting,
8426            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8427            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8428            _ => Fw::UsGaap,
8429        }
8430    }
8431
8432    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8433    fn phase_manufacturing(
8434        &mut self,
8435        stats: &mut EnhancedGenerationStatistics,
8436    ) -> SynthResult<ManufacturingSnapshot> {
8437        if !self.phase_config.generate_manufacturing {
8438            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8439            return Ok(ManufacturingSnapshot::default());
8440        }
8441        info!("Phase 18: Generating Manufacturing Data");
8442
8443        let seed = self.seed;
8444        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8445            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8446        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8447        let company_code = self
8448            .config
8449            .companies
8450            .first()
8451            .map(|c| c.code.as_str())
8452            .unwrap_or("1000");
8453
8454        let material_data: Vec<(String, String)> = self
8455            .master_data
8456            .materials
8457            .iter()
8458            .map(|m| (m.material_id.clone(), m.description.clone()))
8459            .collect();
8460
8461        if material_data.is_empty() {
8462            debug!("Phase 18: Skipped (no materials available)");
8463            return Ok(ManufacturingSnapshot::default());
8464        }
8465
8466        let mut snapshot = ManufacturingSnapshot::default();
8467
8468        // Generate production orders
8469        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8470        // v3.4.3: snap planned / actual / operation dates to business days.
8471        if let Some(ctx) = &self.temporal_context {
8472            prod_gen.set_temporal_context(Arc::clone(ctx));
8473        }
8474        let production_orders = prod_gen.generate(
8475            company_code,
8476            &material_data,
8477            start_date,
8478            end_date,
8479            &self.config.manufacturing.production_orders,
8480            &self.config.manufacturing.costing,
8481            &self.config.manufacturing.routing,
8482        );
8483        snapshot.production_order_count = production_orders.len();
8484
8485        // Generate quality inspections from production orders
8486        let inspection_data: Vec<(String, String, String)> = production_orders
8487            .iter()
8488            .map(|po| {
8489                (
8490                    po.order_id.clone(),
8491                    po.material_id.clone(),
8492                    po.material_description.clone(),
8493                )
8494            })
8495            .collect();
8496
8497        snapshot.production_orders = production_orders;
8498
8499        if !inspection_data.is_empty() {
8500            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8501            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8502            snapshot.quality_inspection_count = inspections.len();
8503            snapshot.quality_inspections = inspections;
8504        }
8505
8506        // Generate cycle counts (one per month)
8507        let storage_locations: Vec<(String, String)> = material_data
8508            .iter()
8509            .enumerate()
8510            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8511            .collect();
8512
8513        let employee_ids: Vec<String> = self
8514            .master_data
8515            .employees
8516            .iter()
8517            .map(|e| e.employee_id.clone())
8518            .collect();
8519        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8520            .with_employee_pool(employee_ids);
8521        let mut cycle_count_total = 0usize;
8522        for month in 0..self.config.global.period_months {
8523            let count_date = start_date + chrono::Months::new(month);
8524            let items_per_count = storage_locations.len().clamp(10, 50);
8525            let cc = cc_gen.generate(
8526                company_code,
8527                &storage_locations,
8528                count_date,
8529                items_per_count,
8530            );
8531            snapshot.cycle_counts.push(cc);
8532            cycle_count_total += 1;
8533        }
8534        snapshot.cycle_count_count = cycle_count_total;
8535
8536        // Generate BOM components
8537        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8538        let bom_components = bom_gen.generate(company_code, &material_data);
8539        snapshot.bom_component_count = bom_components.len();
8540        snapshot.bom_components = bom_components;
8541
8542        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8543        let currency = self
8544            .config
8545            .companies
8546            .first()
8547            .map(|c| c.currency.as_str())
8548            .unwrap_or("USD");
8549        let production_order_ids: Vec<String> = snapshot
8550            .production_orders
8551            .iter()
8552            .map(|po| po.order_id.clone())
8553            .collect();
8554        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8555        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8556            company_code,
8557            &material_data,
8558            start_date,
8559            end_date,
8560            2,
8561            currency,
8562            &production_order_ids,
8563        );
8564        snapshot.inventory_movement_count = inventory_movements.len();
8565        snapshot.inventory_movements = inventory_movements;
8566
8567        stats.production_order_count = snapshot.production_order_count;
8568        stats.quality_inspection_count = snapshot.quality_inspection_count;
8569        stats.cycle_count_count = snapshot.cycle_count_count;
8570        stats.bom_component_count = snapshot.bom_component_count;
8571        stats.inventory_movement_count = snapshot.inventory_movement_count;
8572
8573        info!(
8574            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8575            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8576            snapshot.bom_component_count, snapshot.inventory_movement_count
8577        );
8578        self.check_resources_with_log("post-manufacturing")?;
8579
8580        Ok(snapshot)
8581    }
8582
8583    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8584    fn phase_sales_kpi_budgets(
8585        &mut self,
8586        coa: &Arc<ChartOfAccounts>,
8587        financial_reporting: &FinancialReportingSnapshot,
8588        stats: &mut EnhancedGenerationStatistics,
8589    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8590        if !self.phase_config.generate_sales_kpi_budgets {
8591            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8592            return Ok(SalesKpiBudgetsSnapshot::default());
8593        }
8594        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8595
8596        let seed = self.seed;
8597        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8598            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8599        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8600        let company_code = self
8601            .config
8602            .companies
8603            .first()
8604            .map(|c| c.code.as_str())
8605            .unwrap_or("1000");
8606
8607        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8608
8609        // Sales Quotes
8610        if self.config.sales_quotes.enabled {
8611            let customer_data: Vec<(String, String)> = self
8612                .master_data
8613                .customers
8614                .iter()
8615                .map(|c| (c.customer_id.clone(), c.name.clone()))
8616                .collect();
8617            let material_data: Vec<(String, String)> = self
8618                .master_data
8619                .materials
8620                .iter()
8621                .map(|m| (m.material_id.clone(), m.description.clone()))
8622                .collect();
8623
8624            if !customer_data.is_empty() && !material_data.is_empty() {
8625                let employee_ids: Vec<String> = self
8626                    .master_data
8627                    .employees
8628                    .iter()
8629                    .map(|e| e.employee_id.clone())
8630                    .collect();
8631                let customer_ids: Vec<String> = self
8632                    .master_data
8633                    .customers
8634                    .iter()
8635                    .map(|c| c.customer_id.clone())
8636                    .collect();
8637                let company_currency = self
8638                    .config
8639                    .companies
8640                    .first()
8641                    .map(|c| c.currency.as_str())
8642                    .unwrap_or("USD");
8643
8644                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8645                    .with_pools(employee_ids, customer_ids);
8646                let quotes = quote_gen.generate_with_currency(
8647                    company_code,
8648                    &customer_data,
8649                    &material_data,
8650                    start_date,
8651                    end_date,
8652                    &self.config.sales_quotes,
8653                    company_currency,
8654                );
8655                snapshot.sales_quote_count = quotes.len();
8656                snapshot.sales_quotes = quotes;
8657            }
8658        }
8659
8660        // Management KPIs
8661        if self.config.financial_reporting.management_kpis.enabled {
8662            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8663            let mut kpis = kpi_gen.generate(
8664                company_code,
8665                start_date,
8666                end_date,
8667                &self.config.financial_reporting.management_kpis,
8668            );
8669
8670            // Override financial KPIs with actual data from financial statements
8671            {
8672                use rust_decimal::Decimal;
8673
8674                if let Some(income_stmt) =
8675                    financial_reporting.financial_statements.iter().find(|fs| {
8676                        fs.statement_type == StatementType::IncomeStatement
8677                            && fs.company_code == company_code
8678                    })
8679                {
8680                    // Extract revenue and COGS from income statement line items
8681                    let total_revenue: Decimal = income_stmt
8682                        .line_items
8683                        .iter()
8684                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8685                        .map(|li| li.amount)
8686                        .sum();
8687                    let total_cogs: Decimal = income_stmt
8688                        .line_items
8689                        .iter()
8690                        .filter(|li| {
8691                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8692                                && !li.is_total
8693                        })
8694                        .map(|li| li.amount.abs())
8695                        .sum();
8696                    let total_opex: Decimal = income_stmt
8697                        .line_items
8698                        .iter()
8699                        .filter(|li| {
8700                            li.section.contains("Expense")
8701                                && !li.is_total
8702                                && !li.section.contains("Cost")
8703                        })
8704                        .map(|li| li.amount.abs())
8705                        .sum();
8706
8707                    if total_revenue > Decimal::ZERO {
8708                        let hundred = Decimal::from(100);
8709                        let gross_margin_pct =
8710                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8711                        let operating_income = total_revenue - total_cogs - total_opex;
8712                        let op_margin_pct =
8713                            (operating_income * hundred / total_revenue).round_dp(2);
8714
8715                        // Override gross margin and operating margin KPIs
8716                        for kpi in &mut kpis {
8717                            if kpi.name == "Gross Margin" {
8718                                kpi.value = gross_margin_pct;
8719                            } else if kpi.name == "Operating Margin" {
8720                                kpi.value = op_margin_pct;
8721                            }
8722                        }
8723                    }
8724                }
8725
8726                // Override Current Ratio from balance sheet
8727                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8728                    fs.statement_type == StatementType::BalanceSheet
8729                        && fs.company_code == company_code
8730                }) {
8731                    let current_assets: Decimal = bs
8732                        .line_items
8733                        .iter()
8734                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8735                        .map(|li| li.amount)
8736                        .sum();
8737                    let current_liabilities: Decimal = bs
8738                        .line_items
8739                        .iter()
8740                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8741                        .map(|li| li.amount.abs())
8742                        .sum();
8743
8744                    if current_liabilities > Decimal::ZERO {
8745                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8746                        for kpi in &mut kpis {
8747                            if kpi.name == "Current Ratio" {
8748                                kpi.value = current_ratio;
8749                            }
8750                        }
8751                    }
8752                }
8753            }
8754
8755            snapshot.kpi_count = kpis.len();
8756            snapshot.kpis = kpis;
8757        }
8758
8759        // Budgets
8760        if self.config.financial_reporting.budgets.enabled {
8761            let account_data: Vec<(String, String)> = coa
8762                .accounts
8763                .iter()
8764                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8765                .collect();
8766
8767            if !account_data.is_empty() {
8768                let fiscal_year = start_date.year() as u32;
8769                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8770                let budget = budget_gen.generate(
8771                    company_code,
8772                    fiscal_year,
8773                    &account_data,
8774                    &self.config.financial_reporting.budgets,
8775                );
8776                snapshot.budget_line_count = budget.line_items.len();
8777                snapshot.budgets.push(budget);
8778            }
8779        }
8780
8781        stats.sales_quote_count = snapshot.sales_quote_count;
8782        stats.kpi_count = snapshot.kpi_count;
8783        stats.budget_line_count = snapshot.budget_line_count;
8784
8785        info!(
8786            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8787            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8788        );
8789        self.check_resources_with_log("post-sales-kpi-budgets")?;
8790
8791        Ok(snapshot)
8792    }
8793
8794    /// Compute pre-tax income for a single company from actual journal entries.
8795    ///
8796    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8797    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8798    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8799    /// and the period-close engine so that all three use a consistent definition.
8800    fn compute_pre_tax_income(
8801        company_code: &str,
8802        journal_entries: &[JournalEntry],
8803    ) -> rust_decimal::Decimal {
8804        use datasynth_core::accounts::AccountCategory;
8805        use rust_decimal::Decimal;
8806
8807        let mut total_revenue = Decimal::ZERO;
8808        let mut total_expenses = Decimal::ZERO;
8809
8810        for je in journal_entries {
8811            if je.header.company_code != company_code {
8812                continue;
8813            }
8814            for line in &je.lines {
8815                let cat = AccountCategory::from_account(&line.gl_account);
8816                match cat {
8817                    AccountCategory::Revenue => {
8818                        total_revenue += line.credit_amount - line.debit_amount;
8819                    }
8820                    AccountCategory::Cogs
8821                    | AccountCategory::OperatingExpense
8822                    | AccountCategory::OtherIncomeExpense => {
8823                        total_expenses += line.debit_amount - line.credit_amount;
8824                    }
8825                    _ => {}
8826                }
8827            }
8828        }
8829
8830        let pti = (total_revenue - total_expenses).round_dp(2);
8831        if pti == rust_decimal::Decimal::ZERO {
8832            // No income statement activity yet — fall back to a synthetic value so the
8833            // tax provision generator can still produce meaningful output.
8834            rust_decimal::Decimal::from(1_000_000u32)
8835        } else {
8836            pti
8837        }
8838    }
8839
8840    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8841    fn phase_tax_generation(
8842        &mut self,
8843        document_flows: &DocumentFlowSnapshot,
8844        journal_entries: &[JournalEntry],
8845        stats: &mut EnhancedGenerationStatistics,
8846    ) -> SynthResult<TaxSnapshot> {
8847        if !self.phase_config.generate_tax {
8848            debug!("Phase 20: Skipped (tax generation disabled)");
8849            return Ok(TaxSnapshot::default());
8850        }
8851        info!("Phase 20: Generating Tax Data");
8852
8853        let seed = self.seed;
8854        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8855            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8856        let fiscal_year = start_date.year();
8857        let company_code = self
8858            .config
8859            .companies
8860            .first()
8861            .map(|c| c.code.as_str())
8862            .unwrap_or("1000");
8863
8864        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8865            seed + 370,
8866            self.config.tax.clone(),
8867        );
8868
8869        let pack = self.primary_pack().clone();
8870        let (jurisdictions, codes) =
8871            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8872
8873        // Generate tax provisions for each company
8874        let mut provisions = Vec::new();
8875        if self.config.tax.provisions.enabled {
8876            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8877            for company in &self.config.companies {
8878                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8879                let statutory_rate = rust_decimal::Decimal::new(
8880                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8881                    2,
8882                );
8883                let provision = provision_gen.generate(
8884                    &company.code,
8885                    start_date,
8886                    pre_tax_income,
8887                    statutory_rate,
8888                );
8889                provisions.push(provision);
8890            }
8891        }
8892
8893        // Generate tax lines from document invoices
8894        let mut tax_lines = Vec::new();
8895        if !codes.is_empty() {
8896            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8897                datasynth_generators::TaxLineGeneratorConfig::default(),
8898                codes.clone(),
8899                seed + 372,
8900            );
8901
8902            // Tax lines from vendor invoices (input tax)
8903            // Use the first company's country as buyer country
8904            let buyer_country = self
8905                .config
8906                .companies
8907                .first()
8908                .map(|c| c.country.as_str())
8909                .unwrap_or("US");
8910            for vi in &document_flows.vendor_invoices {
8911                let lines = tax_line_gen.generate_for_document(
8912                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8913                    &vi.header.document_id,
8914                    buyer_country, // seller approx same country
8915                    buyer_country,
8916                    vi.payable_amount,
8917                    vi.header.document_date,
8918                    None,
8919                );
8920                tax_lines.extend(lines);
8921            }
8922
8923            // Tax lines from customer invoices (output tax)
8924            for ci in &document_flows.customer_invoices {
8925                let lines = tax_line_gen.generate_for_document(
8926                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8927                    &ci.header.document_id,
8928                    buyer_country, // seller is the company
8929                    buyer_country,
8930                    ci.total_gross_amount,
8931                    ci.header.document_date,
8932                    None,
8933                );
8934                tax_lines.extend(lines);
8935            }
8936        }
8937
8938        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8939        let deferred_tax = {
8940            let companies: Vec<(&str, &str)> = self
8941                .config
8942                .companies
8943                .iter()
8944                .map(|c| (c.code.as_str(), c.country.as_str()))
8945                .collect();
8946            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8947            deferred_gen.generate(&companies, start_date, journal_entries)
8948        };
8949
8950        // Build a document_id → posting_date map so each tax JE uses its
8951        // source document's date rather than a blanket period-end date.
8952        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8953            std::collections::HashMap::new();
8954        for vi in &document_flows.vendor_invoices {
8955            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8956        }
8957        for ci in &document_flows.customer_invoices {
8958            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8959        }
8960
8961        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8962        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8963        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8964            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8965                &tax_lines,
8966                company_code,
8967                &doc_dates,
8968                end_date,
8969            );
8970            debug!("Generated {} tax posting JEs", jes.len());
8971            jes
8972        } else {
8973            Vec::new()
8974        };
8975
8976        let snapshot = TaxSnapshot {
8977            jurisdiction_count: jurisdictions.len(),
8978            code_count: codes.len(),
8979            jurisdictions,
8980            codes,
8981            tax_provisions: provisions,
8982            tax_lines,
8983            tax_returns: Vec::new(),
8984            withholding_records: Vec::new(),
8985            tax_anomaly_labels: Vec::new(),
8986            deferred_tax,
8987            tax_posting_journal_entries,
8988        };
8989
8990        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8991        stats.tax_code_count = snapshot.code_count;
8992        stats.tax_provision_count = snapshot.tax_provisions.len();
8993        stats.tax_line_count = snapshot.tax_lines.len();
8994
8995        info!(
8996            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8997            snapshot.jurisdiction_count,
8998            snapshot.code_count,
8999            snapshot.tax_provisions.len(),
9000            snapshot.deferred_tax.temporary_differences.len(),
9001            snapshot.deferred_tax.journal_entries.len(),
9002            snapshot.tax_posting_journal_entries.len(),
9003        );
9004        self.check_resources_with_log("post-tax")?;
9005
9006        Ok(snapshot)
9007    }
9008
9009    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
9010    fn phase_esg_generation(
9011        &mut self,
9012        document_flows: &DocumentFlowSnapshot,
9013        manufacturing: &ManufacturingSnapshot,
9014        stats: &mut EnhancedGenerationStatistics,
9015    ) -> SynthResult<EsgSnapshot> {
9016        if !self.phase_config.generate_esg {
9017            debug!("Phase 21: Skipped (ESG generation disabled)");
9018            return Ok(EsgSnapshot::default());
9019        }
9020        let degradation = self.check_resources()?;
9021        if degradation >= DegradationLevel::Reduced {
9022            debug!(
9023                "Phase skipped due to resource pressure (degradation: {:?})",
9024                degradation
9025            );
9026            return Ok(EsgSnapshot::default());
9027        }
9028        info!("Phase 21: Generating ESG Data");
9029
9030        let seed = self.seed;
9031        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9032            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9033        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9034        let entity_id = self
9035            .config
9036            .companies
9037            .first()
9038            .map(|c| c.code.as_str())
9039            .unwrap_or("1000");
9040
9041        let esg_cfg = &self.config.esg;
9042        let mut snapshot = EsgSnapshot::default();
9043
9044        // Energy consumption (feeds into scope 1 & 2 emissions)
9045        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9046            esg_cfg.environmental.energy.clone(),
9047            seed + 80,
9048        );
9049        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9050
9051        // Water usage
9052        let facility_count = esg_cfg.environmental.energy.facility_count;
9053        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9054        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9055
9056        // Waste
9057        let mut waste_gen = datasynth_generators::WasteGenerator::new(
9058            seed + 82,
9059            esg_cfg.environmental.waste.diversion_target,
9060            facility_count,
9061        );
9062        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9063
9064        // Emissions (scope 1, 2, 3)
9065        let mut emission_gen =
9066            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9067
9068        // Build EnergyInput from energy_records
9069        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9070            .iter()
9071            .map(|e| datasynth_generators::EnergyInput {
9072                facility_id: e.facility_id.clone(),
9073                energy_type: match e.energy_source {
9074                    EnergySourceType::NaturalGas => {
9075                        datasynth_generators::EnergyInputType::NaturalGas
9076                    }
9077                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9078                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9079                    _ => datasynth_generators::EnergyInputType::Electricity,
9080                },
9081                consumption_kwh: e.consumption_kwh,
9082                period: e.period,
9083            })
9084            .collect();
9085
9086        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
9087        if !manufacturing.production_orders.is_empty() {
9088            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9089                &manufacturing.production_orders,
9090                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
9091                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
9092            );
9093            if !mfg_energy.is_empty() {
9094                info!(
9095                    "ESG: {} energy inputs derived from {} production orders",
9096                    mfg_energy.len(),
9097                    manufacturing.production_orders.len(),
9098                );
9099                energy_inputs.extend(mfg_energy);
9100            }
9101        }
9102
9103        let mut emissions = Vec::new();
9104        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9105        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9106
9107        // Scope 3: use vendor spend data from actual payments
9108        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9109            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9110            for payment in &document_flows.payments {
9111                if payment.is_vendor {
9112                    *totals
9113                        .entry(payment.business_partner_id.clone())
9114                        .or_default() += payment.amount;
9115                }
9116            }
9117            totals
9118        };
9119        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9120            .master_data
9121            .vendors
9122            .iter()
9123            .map(|v| {
9124                let spend = vendor_payment_totals
9125                    .get(&v.vendor_id)
9126                    .copied()
9127                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9128                datasynth_generators::VendorSpendInput {
9129                    vendor_id: v.vendor_id.clone(),
9130                    category: format!("{:?}", v.vendor_type).to_lowercase(),
9131                    spend,
9132                    country: v.country.clone(),
9133                }
9134            })
9135            .collect();
9136        if !vendor_spend.is_empty() {
9137            emissions.extend(emission_gen.generate_scope3_purchased_goods(
9138                entity_id,
9139                &vendor_spend,
9140                start_date,
9141                end_date,
9142            ));
9143        }
9144
9145        // Business travel & commuting (scope 3)
9146        let headcount = self.master_data.employees.len() as u32;
9147        if headcount > 0 {
9148            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9149            emissions.extend(emission_gen.generate_scope3_business_travel(
9150                entity_id,
9151                travel_spend,
9152                start_date,
9153            ));
9154            emissions
9155                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9156        }
9157
9158        snapshot.emission_count = emissions.len();
9159        snapshot.emissions = emissions;
9160        snapshot.energy = energy_records;
9161
9162        // Social: Workforce diversity, pay equity, safety
9163        let mut workforce_gen =
9164            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9165        let total_headcount = headcount.max(100);
9166        snapshot.diversity =
9167            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9168        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9169
9170        // v2.4: Derive additional workforce diversity metrics from actual employee data
9171        if !self.master_data.employees.is_empty() {
9172            let hr_diversity = workforce_gen.generate_diversity_from_employees(
9173                entity_id,
9174                &self.master_data.employees,
9175                end_date,
9176            );
9177            if !hr_diversity.is_empty() {
9178                info!(
9179                    "ESG: {} diversity metrics derived from {} actual employees",
9180                    hr_diversity.len(),
9181                    self.master_data.employees.len(),
9182                );
9183                snapshot.diversity.extend(hr_diversity);
9184            }
9185        }
9186
9187        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9188            entity_id,
9189            facility_count,
9190            start_date,
9191            end_date,
9192        );
9193
9194        // Compute safety metrics
9195        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
9196        let safety_metric = workforce_gen.compute_safety_metrics(
9197            entity_id,
9198            &snapshot.safety_incidents,
9199            total_hours,
9200            start_date,
9201        );
9202        snapshot.safety_metrics = vec![safety_metric];
9203
9204        // Governance
9205        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9206            seed + 85,
9207            esg_cfg.governance.board_size,
9208            esg_cfg.governance.independence_target,
9209        );
9210        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9211
9212        // Supplier ESG assessments
9213        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9214            esg_cfg.supply_chain_esg.clone(),
9215            seed + 86,
9216        );
9217        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9218            .master_data
9219            .vendors
9220            .iter()
9221            .map(|v| datasynth_generators::VendorInput {
9222                vendor_id: v.vendor_id.clone(),
9223                country: v.country.clone(),
9224                industry: format!("{:?}", v.vendor_type).to_lowercase(),
9225                quality_score: None,
9226            })
9227            .collect();
9228        snapshot.supplier_assessments =
9229            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9230
9231        // Disclosures
9232        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9233            seed + 87,
9234            esg_cfg.reporting.clone(),
9235            esg_cfg.climate_scenarios.clone(),
9236        );
9237        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9238        snapshot.disclosures = disclosure_gen.generate_disclosures(
9239            entity_id,
9240            &snapshot.materiality,
9241            start_date,
9242            end_date,
9243        );
9244        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9245        snapshot.disclosure_count = snapshot.disclosures.len();
9246
9247        // Anomaly injection
9248        if esg_cfg.anomaly_rate > 0.0 {
9249            let mut anomaly_injector =
9250                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9251            let mut labels = Vec::new();
9252            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9253            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9254            labels.extend(
9255                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9256            );
9257            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9258            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9259            snapshot.anomaly_labels = labels;
9260        }
9261
9262        stats.esg_emission_count = snapshot.emission_count;
9263        stats.esg_disclosure_count = snapshot.disclosure_count;
9264
9265        info!(
9266            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9267            snapshot.emission_count,
9268            snapshot.disclosure_count,
9269            snapshot.supplier_assessments.len()
9270        );
9271        self.check_resources_with_log("post-esg")?;
9272
9273        Ok(snapshot)
9274    }
9275
9276    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
9277    fn phase_treasury_data(
9278        &mut self,
9279        document_flows: &DocumentFlowSnapshot,
9280        subledger: &SubledgerSnapshot,
9281        intercompany: &IntercompanySnapshot,
9282        stats: &mut EnhancedGenerationStatistics,
9283    ) -> SynthResult<TreasurySnapshot> {
9284        if !self.phase_config.generate_treasury {
9285            debug!("Phase 22: Skipped (treasury generation disabled)");
9286            return Ok(TreasurySnapshot::default());
9287        }
9288        let degradation = self.check_resources()?;
9289        if degradation >= DegradationLevel::Reduced {
9290            debug!(
9291                "Phase skipped due to resource pressure (degradation: {:?})",
9292                degradation
9293            );
9294            return Ok(TreasurySnapshot::default());
9295        }
9296        info!("Phase 22: Generating Treasury Data");
9297
9298        let seed = self.seed;
9299        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9300            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9301        let currency = self
9302            .config
9303            .companies
9304            .first()
9305            .map(|c| c.currency.as_str())
9306            .unwrap_or("USD");
9307        let entity_id = self
9308            .config
9309            .companies
9310            .first()
9311            .map(|c| c.code.as_str())
9312            .unwrap_or("1000");
9313
9314        let mut snapshot = TreasurySnapshot::default();
9315
9316        // Generate debt instruments
9317        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9318            self.config.treasury.debt.clone(),
9319            seed + 90,
9320        );
9321        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9322
9323        // Generate hedging instruments (IR swaps for floating-rate debt)
9324        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9325            self.config.treasury.hedging.clone(),
9326            seed + 91,
9327        );
9328        for debt in &snapshot.debt_instruments {
9329            if debt.rate_type == InterestRateType::Variable {
9330                let swap = hedge_gen.generate_ir_swap(
9331                    currency,
9332                    debt.principal,
9333                    debt.origination_date,
9334                    debt.maturity_date,
9335                );
9336                snapshot.hedging_instruments.push(swap);
9337            }
9338        }
9339
9340        // Build FX exposures from foreign-currency payments and generate
9341        // FX forwards + hedge relationship designations via generate() API.
9342        {
9343            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9344            for payment in &document_flows.payments {
9345                if payment.currency != currency {
9346                    let entry = fx_map
9347                        .entry(payment.currency.clone())
9348                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9349                    entry.0 += payment.amount;
9350                    // Use the latest settlement date among grouped payments
9351                    if payment.header.document_date > entry.1 {
9352                        entry.1 = payment.header.document_date;
9353                    }
9354                }
9355            }
9356            if !fx_map.is_empty() {
9357                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9358                    .into_iter()
9359                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9360                        datasynth_generators::treasury::FxExposure {
9361                            currency_pair: format!("{foreign_ccy}/{currency}"),
9362                            foreign_currency: foreign_ccy,
9363                            net_amount,
9364                            settlement_date,
9365                            description: "AP payment FX exposure".to_string(),
9366                        }
9367                    })
9368                    .collect();
9369                let (fx_instruments, fx_relationships) =
9370                    hedge_gen.generate(start_date, &fx_exposures);
9371                snapshot.hedging_instruments.extend(fx_instruments);
9372                snapshot.hedge_relationships.extend(fx_relationships);
9373            }
9374        }
9375
9376        // Inject anomalies if configured
9377        if self.config.treasury.anomaly_rate > 0.0 {
9378            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9379                seed + 92,
9380                self.config.treasury.anomaly_rate,
9381            );
9382            let mut labels = Vec::new();
9383            labels.extend(
9384                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9385            );
9386            snapshot.treasury_anomaly_labels = labels;
9387        }
9388
9389        // Generate cash positions from payment flows
9390        if self.config.treasury.cash_positioning.enabled {
9391            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9392
9393            // AP payments as outflows
9394            for payment in &document_flows.payments {
9395                cash_flows.push(datasynth_generators::treasury::CashFlow {
9396                    date: payment.header.document_date,
9397                    account_id: format!("{entity_id}-MAIN"),
9398                    amount: payment.amount,
9399                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9400                });
9401            }
9402
9403            // Customer receipts (from O2C chains) as inflows
9404            for chain in &document_flows.o2c_chains {
9405                if let Some(ref receipt) = chain.customer_receipt {
9406                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9407                        date: receipt.header.document_date,
9408                        account_id: format!("{entity_id}-MAIN"),
9409                        amount: receipt.amount,
9410                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9411                    });
9412                }
9413                // Remainder receipts (follow-up to partial payments)
9414                for receipt in &chain.remainder_receipts {
9415                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9416                        date: receipt.header.document_date,
9417                        account_id: format!("{entity_id}-MAIN"),
9418                        amount: receipt.amount,
9419                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9420                    });
9421                }
9422            }
9423
9424            if !cash_flows.is_empty() {
9425                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9426                    self.config.treasury.cash_positioning.clone(),
9427                    seed + 93,
9428                );
9429                let account_id = format!("{entity_id}-MAIN");
9430                snapshot.cash_positions = cash_gen.generate(
9431                    entity_id,
9432                    &account_id,
9433                    currency,
9434                    &cash_flows,
9435                    start_date,
9436                    start_date + chrono::Months::new(self.config.global.period_months),
9437                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9438                );
9439            }
9440        }
9441
9442        // Generate cash forecasts from AR/AP aging
9443        if self.config.treasury.cash_forecasting.enabled {
9444            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9445
9446            // Build AR aging items from subledger AR invoices
9447            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9448                .ar_invoices
9449                .iter()
9450                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9451                .map(|inv| {
9452                    let days_past_due = if inv.due_date < end_date {
9453                        (end_date - inv.due_date).num_days().max(0) as u32
9454                    } else {
9455                        0
9456                    };
9457                    datasynth_generators::treasury::ArAgingItem {
9458                        expected_date: inv.due_date,
9459                        amount: inv.amount_remaining,
9460                        days_past_due,
9461                        document_id: inv.invoice_number.clone(),
9462                    }
9463                })
9464                .collect();
9465
9466            // Build AP aging items from subledger AP invoices
9467            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9468                .ap_invoices
9469                .iter()
9470                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9471                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9472                    payment_date: inv.due_date,
9473                    amount: inv.amount_remaining,
9474                    document_id: inv.invoice_number.clone(),
9475                })
9476                .collect();
9477
9478            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9479                self.config.treasury.cash_forecasting.clone(),
9480                seed + 94,
9481            );
9482            let forecast = forecast_gen.generate(
9483                entity_id,
9484                currency,
9485                end_date,
9486                &ar_items,
9487                &ap_items,
9488                &[], // scheduled disbursements - empty for now
9489            );
9490            snapshot.cash_forecasts.push(forecast);
9491        }
9492
9493        // Generate cash pools and sweeps
9494        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9495            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9496            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9497                self.config.treasury.cash_pooling.clone(),
9498                seed + 95,
9499            );
9500
9501            // Create a pool from available accounts
9502            let account_ids: Vec<String> = snapshot
9503                .cash_positions
9504                .iter()
9505                .map(|cp| cp.bank_account_id.clone())
9506                .collect::<std::collections::HashSet<_>>()
9507                .into_iter()
9508                .collect();
9509
9510            if let Some(pool) =
9511                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9512            {
9513                // Generate sweeps - build participant balances from last cash position per account
9514                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9515                for cp in &snapshot.cash_positions {
9516                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9517                }
9518
9519                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9520                    latest_balances
9521                        .into_iter()
9522                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9523                        .map(
9524                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9525                                account_id: id,
9526                                balance,
9527                            },
9528                        )
9529                        .collect();
9530
9531                let sweeps =
9532                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9533                snapshot.cash_pool_sweeps = sweeps;
9534                snapshot.cash_pools.push(pool);
9535            }
9536        }
9537
9538        // Generate bank guarantees
9539        if self.config.treasury.bank_guarantees.enabled {
9540            let vendor_names: Vec<String> = self
9541                .master_data
9542                .vendors
9543                .iter()
9544                .map(|v| v.name.clone())
9545                .collect();
9546            if !vendor_names.is_empty() {
9547                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9548                    self.config.treasury.bank_guarantees.clone(),
9549                    seed + 96,
9550                );
9551                snapshot.bank_guarantees =
9552                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9553            }
9554        }
9555
9556        // Generate netting runs from intercompany matched pairs
9557        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9558            let entity_ids: Vec<String> = self
9559                .config
9560                .companies
9561                .iter()
9562                .map(|c| c.code.clone())
9563                .collect();
9564            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9565                .matched_pairs
9566                .iter()
9567                .map(|mp| {
9568                    (
9569                        mp.seller_company.clone(),
9570                        mp.buyer_company.clone(),
9571                        mp.amount,
9572                    )
9573                })
9574                .collect();
9575            if entity_ids.len() >= 2 {
9576                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9577                    self.config.treasury.netting.clone(),
9578                    seed + 97,
9579                );
9580                snapshot.netting_runs = netting_gen.generate(
9581                    &entity_ids,
9582                    currency,
9583                    start_date,
9584                    self.config.global.period_months,
9585                    &ic_amounts,
9586                );
9587            }
9588        }
9589
9590        // Generate treasury journal entries from the instruments we just created.
9591        {
9592            use datasynth_generators::treasury::TreasuryAccounting;
9593
9594            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9595            let mut treasury_jes = Vec::new();
9596
9597            // Debt interest accrual JEs
9598            if !snapshot.debt_instruments.is_empty() {
9599                let debt_jes =
9600                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9601                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9602                treasury_jes.extend(debt_jes);
9603            }
9604
9605            // Hedge mark-to-market JEs
9606            if !snapshot.hedging_instruments.is_empty() {
9607                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9608                    &snapshot.hedging_instruments,
9609                    &snapshot.hedge_relationships,
9610                    end_date,
9611                    entity_id,
9612                );
9613                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9614                treasury_jes.extend(hedge_jes);
9615            }
9616
9617            // Cash pool sweep JEs
9618            if !snapshot.cash_pool_sweeps.is_empty() {
9619                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9620                    &snapshot.cash_pool_sweeps,
9621                    entity_id,
9622                );
9623                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9624                treasury_jes.extend(sweep_jes);
9625            }
9626
9627            if !treasury_jes.is_empty() {
9628                debug!("Total treasury journal entries: {}", treasury_jes.len());
9629            }
9630            snapshot.journal_entries = treasury_jes;
9631        }
9632
9633        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9634        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9635        stats.cash_position_count = snapshot.cash_positions.len();
9636        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9637        stats.cash_pool_count = snapshot.cash_pools.len();
9638
9639        info!(
9640            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9641            snapshot.debt_instruments.len(),
9642            snapshot.hedging_instruments.len(),
9643            snapshot.cash_positions.len(),
9644            snapshot.cash_forecasts.len(),
9645            snapshot.cash_pools.len(),
9646            snapshot.bank_guarantees.len(),
9647            snapshot.netting_runs.len(),
9648            snapshot.journal_entries.len(),
9649        );
9650        self.check_resources_with_log("post-treasury")?;
9651
9652        Ok(snapshot)
9653    }
9654
9655    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9656    fn phase_project_accounting(
9657        &mut self,
9658        document_flows: &DocumentFlowSnapshot,
9659        hr: &HrSnapshot,
9660        stats: &mut EnhancedGenerationStatistics,
9661    ) -> SynthResult<ProjectAccountingSnapshot> {
9662        if !self.phase_config.generate_project_accounting {
9663            debug!("Phase 23: Skipped (project accounting disabled)");
9664            return Ok(ProjectAccountingSnapshot::default());
9665        }
9666        let degradation = self.check_resources()?;
9667        if degradation >= DegradationLevel::Reduced {
9668            debug!(
9669                "Phase skipped due to resource pressure (degradation: {:?})",
9670                degradation
9671            );
9672            return Ok(ProjectAccountingSnapshot::default());
9673        }
9674        info!("Phase 23: Generating Project Accounting Data");
9675
9676        let seed = self.seed;
9677        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9678            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9679        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9680        let company_code = self
9681            .config
9682            .companies
9683            .first()
9684            .map(|c| c.code.as_str())
9685            .unwrap_or("1000");
9686
9687        let mut snapshot = ProjectAccountingSnapshot::default();
9688
9689        // Generate projects with WBS hierarchies
9690        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9691            self.config.project_accounting.clone(),
9692            seed + 95,
9693        );
9694        let pool = project_gen.generate(company_code, start_date, end_date);
9695        snapshot.projects = pool.projects.clone();
9696
9697        // Link source documents to projects for cost allocation
9698        {
9699            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9700                Vec::new();
9701
9702            // Time entries
9703            for te in &hr.time_entries {
9704                let total_hours = te.hours_regular + te.hours_overtime;
9705                if total_hours > 0.0 {
9706                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9707                        id: te.entry_id.clone(),
9708                        entity_id: company_code.to_string(),
9709                        date: te.date,
9710                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9711                            .unwrap_or(rust_decimal::Decimal::ZERO),
9712                        source_type: CostSourceType::TimeEntry,
9713                        hours: Some(
9714                            rust_decimal::Decimal::from_f64_retain(total_hours)
9715                                .unwrap_or(rust_decimal::Decimal::ZERO),
9716                        ),
9717                    });
9718                }
9719            }
9720
9721            // Expense reports
9722            for er in &hr.expense_reports {
9723                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9724                    id: er.report_id.clone(),
9725                    entity_id: company_code.to_string(),
9726                    date: er.submission_date,
9727                    amount: er.total_amount,
9728                    source_type: CostSourceType::ExpenseReport,
9729                    hours: None,
9730                });
9731            }
9732
9733            // Purchase orders
9734            for po in &document_flows.purchase_orders {
9735                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9736                    id: po.header.document_id.clone(),
9737                    entity_id: company_code.to_string(),
9738                    date: po.header.document_date,
9739                    amount: po.total_net_amount,
9740                    source_type: CostSourceType::PurchaseOrder,
9741                    hours: None,
9742                });
9743            }
9744
9745            // Vendor invoices
9746            for vi in &document_flows.vendor_invoices {
9747                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9748                    id: vi.header.document_id.clone(),
9749                    entity_id: company_code.to_string(),
9750                    date: vi.header.document_date,
9751                    amount: vi.payable_amount,
9752                    source_type: CostSourceType::VendorInvoice,
9753                    hours: None,
9754                });
9755            }
9756
9757            if !source_docs.is_empty() && !pool.projects.is_empty() {
9758                let mut cost_gen =
9759                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9760                        self.config.project_accounting.cost_allocation.clone(),
9761                        seed + 99,
9762                    );
9763                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9764            }
9765        }
9766
9767        // Generate change orders
9768        if self.config.project_accounting.change_orders.enabled {
9769            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9770                self.config.project_accounting.change_orders.clone(),
9771                seed + 96,
9772            );
9773            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9774        }
9775
9776        // Generate milestones
9777        if self.config.project_accounting.milestones.enabled {
9778            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9779                self.config.project_accounting.milestones.clone(),
9780                seed + 97,
9781            );
9782            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9783        }
9784
9785        // Generate earned value metrics (needs cost lines, so only if we have projects)
9786        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9787            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9788                self.config.project_accounting.earned_value.clone(),
9789                seed + 98,
9790            );
9791            snapshot.earned_value_metrics =
9792                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9793        }
9794
9795        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9796        if self.config.project_accounting.revenue_recognition.enabled
9797            && !snapshot.projects.is_empty()
9798            && !snapshot.cost_lines.is_empty()
9799        {
9800            use datasynth_generators::project_accounting::RevenueGenerator;
9801            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9802            let avg_contract_value =
9803                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9804                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9805
9806            // Build contract value tuples: only customer-type projects get revenue recognition.
9807            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9808            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9809                snapshot
9810                    .projects
9811                    .iter()
9812                    .filter(|p| {
9813                        matches!(
9814                            p.project_type,
9815                            datasynth_core::models::ProjectType::Customer
9816                        )
9817                    })
9818                    .map(|p| {
9819                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9820                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9821                        // budget × 1.25 → contract value
9822                        } else {
9823                            avg_contract_value
9824                        };
9825                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9826                        (p.project_id.clone(), cv, etc)
9827                    })
9828                    .collect();
9829
9830            if !contract_values.is_empty() {
9831                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9832                snapshot.revenue_records = rev_gen.generate(
9833                    &snapshot.projects,
9834                    &snapshot.cost_lines,
9835                    &contract_values,
9836                    start_date,
9837                    end_date,
9838                );
9839                debug!(
9840                    "Generated {} revenue recognition records for {} customer projects",
9841                    snapshot.revenue_records.len(),
9842                    contract_values.len()
9843                );
9844            }
9845        }
9846
9847        stats.project_count = snapshot.projects.len();
9848        stats.project_change_order_count = snapshot.change_orders.len();
9849        stats.project_cost_line_count = snapshot.cost_lines.len();
9850
9851        info!(
9852            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9853            snapshot.projects.len(),
9854            snapshot.change_orders.len(),
9855            snapshot.milestones.len(),
9856            snapshot.earned_value_metrics.len()
9857        );
9858        self.check_resources_with_log("post-project-accounting")?;
9859
9860        Ok(snapshot)
9861    }
9862
9863    /// Phase 24: Generate process evolution and organizational events.
9864    fn phase_evolution_events(
9865        &mut self,
9866        stats: &mut EnhancedGenerationStatistics,
9867    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9868        if !self.phase_config.generate_evolution_events {
9869            debug!("Phase 24: Skipped (evolution events disabled)");
9870            return Ok((Vec::new(), Vec::new()));
9871        }
9872        info!("Phase 24: Generating Process Evolution + Organizational Events");
9873
9874        let seed = self.seed;
9875        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9876            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9877        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9878
9879        // Process evolution events
9880        let mut proc_gen =
9881            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9882                seed + 100,
9883            );
9884        let process_events = proc_gen.generate_events(start_date, end_date);
9885
9886        // Organizational events
9887        let company_codes: Vec<String> = self
9888            .config
9889            .companies
9890            .iter()
9891            .map(|c| c.code.clone())
9892            .collect();
9893        let mut org_gen =
9894            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9895                seed + 101,
9896            );
9897        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9898
9899        stats.process_evolution_event_count = process_events.len();
9900        stats.organizational_event_count = org_events.len();
9901
9902        info!(
9903            "Evolution events generated: {} process evolution, {} organizational",
9904            process_events.len(),
9905            org_events.len()
9906        );
9907        self.check_resources_with_log("post-evolution-events")?;
9908
9909        Ok((process_events, org_events))
9910    }
9911
9912    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9913    /// data recovery, and regulatory changes).
9914    fn phase_disruption_events(
9915        &self,
9916        stats: &mut EnhancedGenerationStatistics,
9917    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9918        if !self.config.organizational_events.enabled {
9919            debug!("Phase 24b: Skipped (organizational events disabled)");
9920            return Ok(Vec::new());
9921        }
9922        info!("Phase 24b: Generating Disruption Events");
9923
9924        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9925            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9926        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9927
9928        let company_codes: Vec<String> = self
9929            .config
9930            .companies
9931            .iter()
9932            .map(|c| c.code.clone())
9933            .collect();
9934
9935        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9936        let events = gen.generate(start_date, end_date, &company_codes);
9937
9938        stats.disruption_event_count = events.len();
9939        info!("Disruption events generated: {} events", events.len());
9940        self.check_resources_with_log("post-disruption-events")?;
9941
9942        Ok(events)
9943    }
9944
9945    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9946    ///
9947    /// Produces paired examples where each pair contains the original clean JE
9948    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9949    /// split transaction). Useful for training anomaly detection models with
9950    /// known ground truth.
9951    fn phase_counterfactuals(
9952        &self,
9953        journal_entries: &[JournalEntry],
9954        stats: &mut EnhancedGenerationStatistics,
9955    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9956        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9957            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9958            return Ok(Vec::new());
9959        }
9960        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9961
9962        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9963
9964        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9965
9966        // Rotating set of specs to produce diverse mutation types
9967        let specs = [
9968            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9969            CounterfactualSpec::ShiftDate { days: -14 },
9970            CounterfactualSpec::SelfApprove,
9971            CounterfactualSpec::SplitTransaction { split_count: 3 },
9972        ];
9973
9974        let pairs: Vec<_> = journal_entries
9975            .iter()
9976            .enumerate()
9977            .map(|(i, je)| {
9978                let spec = &specs[i % specs.len()];
9979                gen.generate(je, spec)
9980            })
9981            .collect();
9982
9983        stats.counterfactual_pair_count = pairs.len();
9984        info!(
9985            "Counterfactual pairs generated: {} pairs from {} journal entries",
9986            pairs.len(),
9987            journal_entries.len()
9988        );
9989        self.check_resources_with_log("post-counterfactuals")?;
9990
9991        Ok(pairs)
9992    }
9993
9994    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9995    ///
9996    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9997    /// fraudulent, then generates probabilistic red flags on all chain documents.
9998    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9999    /// to produce realistic ML training data.
10000    fn phase_red_flags(
10001        &self,
10002        anomaly_labels: &AnomalyLabels,
10003        document_flows: &DocumentFlowSnapshot,
10004        stats: &mut EnhancedGenerationStatistics,
10005    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10006        if !self.config.fraud.enabled {
10007            debug!("Phase 26: Skipped (fraud generation disabled)");
10008            return Ok(Vec::new());
10009        }
10010        info!("Phase 26: Generating Fraud Red-Flag Indicators");
10011
10012        use datasynth_generators::fraud::RedFlagGenerator;
10013
10014        let generator = RedFlagGenerator::new();
10015        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10016
10017        // Build a set of document IDs that are known-fraudulent from anomaly labels.
10018        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10019            .labels
10020            .iter()
10021            .filter(|label| label.anomaly_type.is_intentional())
10022            .map(|label| label.document_id.as_str())
10023            .collect();
10024
10025        let mut flags = Vec::new();
10026
10027        // Iterate P2P chains: use the purchase order document ID as the chain key.
10028        for chain in &document_flows.p2p_chains {
10029            let doc_id = &chain.purchase_order.header.document_id;
10030            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10031            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10032        }
10033
10034        // Iterate O2C chains: use the sales order document ID as the chain key.
10035        for chain in &document_flows.o2c_chains {
10036            let doc_id = &chain.sales_order.header.document_id;
10037            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10038            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10039        }
10040
10041        stats.red_flag_count = flags.len();
10042        info!(
10043            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10044            flags.len(),
10045            document_flows.p2p_chains.len(),
10046            document_flows.o2c_chains.len(),
10047            fraud_doc_ids.len()
10048        );
10049        self.check_resources_with_log("post-red-flags")?;
10050
10051        Ok(flags)
10052    }
10053
10054    /// Phase 26b: Generate collusion rings from employee/vendor pools.
10055    ///
10056    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
10057    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
10058    /// advance them over the simulation period.
10059    fn phase_collusion_rings(
10060        &mut self,
10061        stats: &mut EnhancedGenerationStatistics,
10062    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10063        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10064            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10065            return Ok(Vec::new());
10066        }
10067        info!("Phase 26b: Generating Collusion Rings");
10068
10069        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10070            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10071        let months = self.config.global.period_months;
10072
10073        let employee_ids: Vec<String> = self
10074            .master_data
10075            .employees
10076            .iter()
10077            .map(|e| e.employee_id.clone())
10078            .collect();
10079        let vendor_ids: Vec<String> = self
10080            .master_data
10081            .vendors
10082            .iter()
10083            .map(|v| v.vendor_id.clone())
10084            .collect();
10085
10086        let mut generator =
10087            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10088        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10089
10090        stats.collusion_ring_count = rings.len();
10091        info!(
10092            "Collusion rings generated: {} rings, total members: {}",
10093            rings.len(),
10094            rings
10095                .iter()
10096                .map(datasynth_generators::fraud::CollusionRing::size)
10097                .sum::<usize>()
10098        );
10099        self.check_resources_with_log("post-collusion-rings")?;
10100
10101        Ok(rings)
10102    }
10103
10104    /// Phase 27: Generate bi-temporal version chains for vendor entities.
10105    ///
10106    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
10107    /// master data changes over time, supporting bi-temporal audit queries.
10108    fn phase_temporal_attributes(
10109        &mut self,
10110        stats: &mut EnhancedGenerationStatistics,
10111    ) -> SynthResult<
10112        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10113    > {
10114        if !self.config.temporal_attributes.enabled {
10115            debug!("Phase 27: Skipped (temporal attributes disabled)");
10116            return Ok(Vec::new());
10117        }
10118        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10119
10120        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10121            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10122
10123        // Build a TemporalAttributeConfig from the user's config.
10124        // Since Phase 27 is already gated on temporal_attributes.enabled,
10125        // default to enabling version chains so users get actual mutations.
10126        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10127            || self.config.temporal_attributes.enabled;
10128        let temporal_config = {
10129            let ta = &self.config.temporal_attributes;
10130            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10131                .enabled(ta.enabled)
10132                .closed_probability(ta.valid_time.closed_probability)
10133                .avg_validity_days(ta.valid_time.avg_validity_days)
10134                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10135                .with_version_chains(if generate_version_chains {
10136                    ta.avg_versions_per_entity
10137                } else {
10138                    1.0
10139                })
10140                .build()
10141        };
10142        // Apply backdating settings if configured
10143        let temporal_config = if self
10144            .config
10145            .temporal_attributes
10146            .transaction_time
10147            .allow_backdating
10148        {
10149            let mut c = temporal_config;
10150            c.transaction_time.allow_backdating = true;
10151            c.transaction_time.backdating_probability = self
10152                .config
10153                .temporal_attributes
10154                .transaction_time
10155                .backdating_probability;
10156            c.transaction_time.max_backdate_days = self
10157                .config
10158                .temporal_attributes
10159                .transaction_time
10160                .max_backdate_days;
10161            c
10162        } else {
10163            temporal_config
10164        };
10165        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10166            temporal_config,
10167            self.seed + 130,
10168            start_date,
10169        );
10170
10171        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10172            self.seed + 130,
10173            datasynth_core::GeneratorType::Vendor,
10174        );
10175
10176        let chains: Vec<_> = self
10177            .master_data
10178            .vendors
10179            .iter()
10180            .map(|vendor| {
10181                let id = uuid_factory.next();
10182                gen.generate_version_chain(vendor.clone(), id)
10183            })
10184            .collect();
10185
10186        stats.temporal_version_chain_count = chains.len();
10187        info!("Temporal version chains generated: {} chains", chains.len());
10188        self.check_resources_with_log("post-temporal-attributes")?;
10189
10190        Ok(chains)
10191    }
10192
10193    /// Phase 28: Build entity relationship graph and cross-process links.
10194    ///
10195    /// Part 1 (gated on `relationship_strength.enabled`): builds an
10196    /// `EntityGraph` from master-data vendor/customer entities and
10197    /// journal-entry-derived transaction summaries.
10198    ///
10199    /// Part 2 (gated on `cross_process_links.enabled`): extracts
10200    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
10201    /// generates inventory-movement cross-process links.
10202    fn phase_entity_relationships(
10203        &self,
10204        journal_entries: &[JournalEntry],
10205        document_flows: &DocumentFlowSnapshot,
10206        stats: &mut EnhancedGenerationStatistics,
10207    ) -> SynthResult<(
10208        Option<datasynth_core::models::EntityGraph>,
10209        Vec<datasynth_core::models::CrossProcessLink>,
10210    )> {
10211        use datasynth_generators::relationships::{
10212            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10213            TransactionSummary,
10214        };
10215
10216        let rs_enabled = self.config.relationship_strength.enabled;
10217        let cpl_enabled = self.config.cross_process_links.enabled
10218            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10219
10220        if !rs_enabled && !cpl_enabled {
10221            debug!(
10222                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10223            );
10224            return Ok((None, Vec::new()));
10225        }
10226
10227        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10228
10229        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10230            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10231
10232        let company_code = self
10233            .config
10234            .companies
10235            .first()
10236            .map(|c| c.code.as_str())
10237            .unwrap_or("1000");
10238
10239        // Build the generator with matching config flags
10240        let gen_config = EntityGraphConfig {
10241            enabled: rs_enabled,
10242            cross_process: datasynth_generators::relationships::CrossProcessConfig {
10243                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10244                enable_return_flows: false,
10245                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10246                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10247                // Use higher link rate for small datasets to avoid probabilistic empty results
10248                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10249                    1.0
10250                } else {
10251                    0.30
10252                },
10253                ..Default::default()
10254            },
10255            strength_config: datasynth_generators::relationships::StrengthConfig {
10256                transaction_volume_weight: self
10257                    .config
10258                    .relationship_strength
10259                    .calculation
10260                    .transaction_volume_weight,
10261                transaction_count_weight: self
10262                    .config
10263                    .relationship_strength
10264                    .calculation
10265                    .transaction_count_weight,
10266                duration_weight: self
10267                    .config
10268                    .relationship_strength
10269                    .calculation
10270                    .relationship_duration_weight,
10271                recency_weight: self.config.relationship_strength.calculation.recency_weight,
10272                mutual_connections_weight: self
10273                    .config
10274                    .relationship_strength
10275                    .calculation
10276                    .mutual_connections_weight,
10277                recency_half_life_days: self
10278                    .config
10279                    .relationship_strength
10280                    .calculation
10281                    .recency_half_life_days,
10282            },
10283            ..Default::default()
10284        };
10285
10286        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10287
10288        // --- Part 1: Entity Relationship Graph ---
10289        let entity_graph = if rs_enabled {
10290            // Build EntitySummary lists from master data
10291            let vendor_summaries: Vec<EntitySummary> = self
10292                .master_data
10293                .vendors
10294                .iter()
10295                .map(|v| {
10296                    EntitySummary::new(
10297                        &v.vendor_id,
10298                        &v.name,
10299                        datasynth_core::models::GraphEntityType::Vendor,
10300                        start_date,
10301                    )
10302                })
10303                .collect();
10304
10305            let customer_summaries: Vec<EntitySummary> = self
10306                .master_data
10307                .customers
10308                .iter()
10309                .map(|c| {
10310                    EntitySummary::new(
10311                        &c.customer_id,
10312                        &c.name,
10313                        datasynth_core::models::GraphEntityType::Customer,
10314                        start_date,
10315                    )
10316                })
10317                .collect();
10318
10319            // Build transaction summaries from journal entries.
10320            // Key = (company_code, trading_partner) for entries that have a
10321            // trading partner.  This captures intercompany flows and any JE
10322            // whose line items carry a trading_partner reference.
10323            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10324                std::collections::HashMap::new();
10325
10326            for je in journal_entries {
10327                let cc = je.header.company_code.clone();
10328                let posting_date = je.header.posting_date;
10329                for line in &je.lines {
10330                    if let Some(ref tp) = line.trading_partner {
10331                        let amount = if line.debit_amount > line.credit_amount {
10332                            line.debit_amount
10333                        } else {
10334                            line.credit_amount
10335                        };
10336                        let entry = txn_summaries
10337                            .entry((cc.clone(), tp.clone()))
10338                            .or_insert_with(|| TransactionSummary {
10339                                total_volume: rust_decimal::Decimal::ZERO,
10340                                transaction_count: 0,
10341                                first_transaction_date: posting_date,
10342                                last_transaction_date: posting_date,
10343                                related_entities: std::collections::HashSet::new(),
10344                            });
10345                        entry.total_volume += amount;
10346                        entry.transaction_count += 1;
10347                        if posting_date < entry.first_transaction_date {
10348                            entry.first_transaction_date = posting_date;
10349                        }
10350                        if posting_date > entry.last_transaction_date {
10351                            entry.last_transaction_date = posting_date;
10352                        }
10353                        entry.related_entities.insert(cc.clone());
10354                    }
10355                }
10356            }
10357
10358            // Also extract transaction relationships from document flow chains.
10359            // P2P chains: Company → Vendor relationships
10360            for chain in &document_flows.p2p_chains {
10361                let cc = chain.purchase_order.header.company_code.clone();
10362                let vendor_id = chain.purchase_order.vendor_id.clone();
10363                let po_date = chain.purchase_order.header.document_date;
10364                let amount = chain.purchase_order.total_net_amount;
10365
10366                let entry = txn_summaries
10367                    .entry((cc.clone(), vendor_id))
10368                    .or_insert_with(|| TransactionSummary {
10369                        total_volume: rust_decimal::Decimal::ZERO,
10370                        transaction_count: 0,
10371                        first_transaction_date: po_date,
10372                        last_transaction_date: po_date,
10373                        related_entities: std::collections::HashSet::new(),
10374                    });
10375                entry.total_volume += amount;
10376                entry.transaction_count += 1;
10377                if po_date < entry.first_transaction_date {
10378                    entry.first_transaction_date = po_date;
10379                }
10380                if po_date > entry.last_transaction_date {
10381                    entry.last_transaction_date = po_date;
10382                }
10383                entry.related_entities.insert(cc);
10384            }
10385
10386            // O2C chains: Company → Customer relationships
10387            for chain in &document_flows.o2c_chains {
10388                let cc = chain.sales_order.header.company_code.clone();
10389                let customer_id = chain.sales_order.customer_id.clone();
10390                let so_date = chain.sales_order.header.document_date;
10391                let amount = chain.sales_order.total_net_amount;
10392
10393                let entry = txn_summaries
10394                    .entry((cc.clone(), customer_id))
10395                    .or_insert_with(|| TransactionSummary {
10396                        total_volume: rust_decimal::Decimal::ZERO,
10397                        transaction_count: 0,
10398                        first_transaction_date: so_date,
10399                        last_transaction_date: so_date,
10400                        related_entities: std::collections::HashSet::new(),
10401                    });
10402                entry.total_volume += amount;
10403                entry.transaction_count += 1;
10404                if so_date < entry.first_transaction_date {
10405                    entry.first_transaction_date = so_date;
10406                }
10407                if so_date > entry.last_transaction_date {
10408                    entry.last_transaction_date = so_date;
10409                }
10410                entry.related_entities.insert(cc);
10411            }
10412
10413            let as_of_date = journal_entries
10414                .last()
10415                .map(|je| je.header.posting_date)
10416                .unwrap_or(start_date);
10417
10418            let graph = gen.generate_entity_graph(
10419                company_code,
10420                as_of_date,
10421                &vendor_summaries,
10422                &customer_summaries,
10423                &txn_summaries,
10424            );
10425
10426            info!(
10427                "Entity relationship graph: {} nodes, {} edges",
10428                graph.nodes.len(),
10429                graph.edges.len()
10430            );
10431            stats.entity_relationship_node_count = graph.nodes.len();
10432            stats.entity_relationship_edge_count = graph.edges.len();
10433            Some(graph)
10434        } else {
10435            None
10436        };
10437
10438        // --- Part 2: Cross-Process Links ---
10439        let cross_process_links = if cpl_enabled {
10440            // Build GoodsReceiptRef from P2P chains
10441            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10442                .p2p_chains
10443                .iter()
10444                .flat_map(|chain| {
10445                    let vendor_id = chain.purchase_order.vendor_id.clone();
10446                    let cc = chain.purchase_order.header.company_code.clone();
10447                    chain.goods_receipts.iter().flat_map(move |gr| {
10448                        gr.items.iter().filter_map({
10449                            let doc_id = gr.header.document_id.clone();
10450                            let v_id = vendor_id.clone();
10451                            let company = cc.clone();
10452                            let receipt_date = gr.header.document_date;
10453                            move |item| {
10454                                item.base
10455                                    .material_id
10456                                    .as_ref()
10457                                    .map(|mat_id| GoodsReceiptRef {
10458                                        document_id: doc_id.clone(),
10459                                        material_id: mat_id.clone(),
10460                                        quantity: item.base.quantity,
10461                                        receipt_date,
10462                                        vendor_id: v_id.clone(),
10463                                        company_code: company.clone(),
10464                                    })
10465                            }
10466                        })
10467                    })
10468                })
10469                .collect();
10470
10471            // Build DeliveryRef from O2C chains
10472            let del_refs: Vec<DeliveryRef> = document_flows
10473                .o2c_chains
10474                .iter()
10475                .flat_map(|chain| {
10476                    let customer_id = chain.sales_order.customer_id.clone();
10477                    let cc = chain.sales_order.header.company_code.clone();
10478                    chain.deliveries.iter().flat_map(move |del| {
10479                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10480                        del.items.iter().filter_map({
10481                            let doc_id = del.header.document_id.clone();
10482                            let c_id = customer_id.clone();
10483                            let company = cc.clone();
10484                            move |item| {
10485                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10486                                    document_id: doc_id.clone(),
10487                                    material_id: mat_id.clone(),
10488                                    quantity: item.base.quantity,
10489                                    delivery_date,
10490                                    customer_id: c_id.clone(),
10491                                    company_code: company.clone(),
10492                                })
10493                            }
10494                        })
10495                    })
10496                })
10497                .collect();
10498
10499            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10500            info!("Cross-process links generated: {} links", links.len());
10501            stats.cross_process_link_count = links.len();
10502            links
10503        } else {
10504            Vec::new()
10505        };
10506
10507        self.check_resources_with_log("post-entity-relationships")?;
10508        Ok((entity_graph, cross_process_links))
10509    }
10510
10511    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10512    fn phase_industry_data(
10513        &self,
10514        stats: &mut EnhancedGenerationStatistics,
10515    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10516        if !self.config.industry_specific.enabled {
10517            return None;
10518        }
10519        info!("Phase 29: Generating industry-specific data");
10520        let output = datasynth_generators::industry::factory::generate_industry_output(
10521            self.config.global.industry,
10522        );
10523        stats.industry_gl_account_count = output.gl_accounts.len();
10524        info!(
10525            "Industry data generated: {} GL accounts for {:?}",
10526            output.gl_accounts.len(),
10527            self.config.global.industry
10528        );
10529        Some(output)
10530    }
10531
10532    /// Phase 3b: Generate opening balances for each company.
10533    fn phase_opening_balances(
10534        &mut self,
10535        coa: &Arc<ChartOfAccounts>,
10536        stats: &mut EnhancedGenerationStatistics,
10537    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10538        if !self.config.balance.generate_opening_balances {
10539            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10540            return Ok(Vec::new());
10541        }
10542        info!("Phase 3b: Generating Opening Balances");
10543
10544        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10545            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10546        let fiscal_year = start_date.year();
10547
10548        // **v5.3** — When the shard context supplies prior-period
10549        // opening-balance carryovers, use them directly instead of
10550        // calling `OpeningBalanceGenerator`.  This implements multi-
10551        // period continuity: period N+1 opens with period N's closing
10552        // BS positions exactly, rather than re-rolling the industry-
10553        // mix generator and losing the audit trail.
10554        //
10555        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10556        // through to the generator path — byte-identical behaviour
10557        // for single-period engagements.
10558        if let Some(ctx) = &self.shard_context {
10559            if !ctx.opening_balances.is_empty() {
10560                debug!(
10561                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10562                    ctx.opening_balances.len()
10563                );
10564                let mut results = Vec::new();
10565                for company in &self.config.companies {
10566                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10567                        .opening_balances
10568                        .iter()
10569                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10570                        .collect();
10571                    let total_assets = ctx
10572                        .opening_balances
10573                        .iter()
10574                        .filter(|ob| {
10575                            matches!(
10576                                ob.account_type,
10577                                AccountType::Asset | AccountType::ContraAsset
10578                            )
10579                        })
10580                        .map(|ob| ob.net_balance())
10581                        .sum::<rust_decimal::Decimal>();
10582                    let total_liabilities = ctx
10583                        .opening_balances
10584                        .iter()
10585                        .filter(|ob| {
10586                            matches!(
10587                                ob.account_type,
10588                                AccountType::Liability | AccountType::ContraLiability
10589                            )
10590                        })
10591                        .map(|ob| ob.net_balance())
10592                        .sum::<rust_decimal::Decimal>();
10593                    let total_equity = ctx
10594                        .opening_balances
10595                        .iter()
10596                        .filter(|ob| {
10597                            matches!(
10598                                ob.account_type,
10599                                AccountType::Equity | AccountType::ContraEquity
10600                            )
10601                        })
10602                        .map(|ob| ob.net_balance())
10603                        .sum::<rust_decimal::Decimal>();
10604                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10605                        < rust_decimal::Decimal::ONE;
10606                    results.push(GeneratedOpeningBalance {
10607                        company_code: company.code.clone(),
10608                        as_of_date: start_date,
10609                        balances,
10610                        total_assets,
10611                        total_liabilities,
10612                        total_equity,
10613                        is_balanced,
10614                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10615                            current_ratio: None,
10616                            quick_ratio: None,
10617                            debt_to_equity: None,
10618                            working_capital: rust_decimal::Decimal::ZERO,
10619                        },
10620                    });
10621                }
10622                stats.opening_balance_count = results.len();
10623                info!(
10624                    "Phase 3b: opening-balance carryover applied ({} companies)",
10625                    results.len()
10626                );
10627                self.check_resources_with_log("post-opening-balances")?;
10628                return Ok(results);
10629            }
10630        }
10631
10632        let industry = match self.config.global.industry {
10633            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10634            IndustrySector::Retail => IndustryType::Retail,
10635            IndustrySector::FinancialServices => IndustryType::Financial,
10636            IndustrySector::Healthcare => IndustryType::Healthcare,
10637            IndustrySector::Technology => IndustryType::Technology,
10638            _ => IndustryType::Manufacturing,
10639        };
10640
10641        let config = datasynth_generators::OpeningBalanceConfig {
10642            industry,
10643            ..Default::default()
10644        };
10645        let mut gen =
10646            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10647
10648        let mut results = Vec::new();
10649        for company in &self.config.companies {
10650            let spec = OpeningBalanceSpec::new(
10651                company.code.clone(),
10652                start_date,
10653                fiscal_year,
10654                company.currency.clone(),
10655                rust_decimal::Decimal::new(10_000_000, 0),
10656                industry,
10657            );
10658            let ob = gen.generate(&spec, coa, start_date, &company.code);
10659            results.push(ob);
10660        }
10661
10662        stats.opening_balance_count = results.len();
10663        info!("Opening balances generated: {} companies", results.len());
10664        self.check_resources_with_log("post-opening-balances")?;
10665
10666        Ok(results)
10667    }
10668
10669    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10670    fn phase_subledger_reconciliation(
10671        &mut self,
10672        subledger: &SubledgerSnapshot,
10673        entries: &[JournalEntry],
10674        stats: &mut EnhancedGenerationStatistics,
10675    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10676        if !self.config.balance.reconcile_subledgers {
10677            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10678            return Ok(Vec::new());
10679        }
10680        info!("Phase 9b: Reconciling GL to subledger balances");
10681
10682        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10683            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10684            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10685
10686        // Build GL balance map from journal entries using a balance tracker
10687        let tracker_config = BalanceTrackerConfig {
10688            validate_on_each_entry: false,
10689            track_history: false,
10690            fail_on_validation_error: false,
10691            ..Default::default()
10692        };
10693        let recon_currency = self
10694            .config
10695            .companies
10696            .first()
10697            .map(|c| c.currency.clone())
10698            .unwrap_or_else(|| "USD".to_string());
10699        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10700        let validation_errors = tracker.apply_entries(entries);
10701        if !validation_errors.is_empty() {
10702            warn!(
10703                error_count = validation_errors.len(),
10704                "Balance tracker encountered validation errors during subledger reconciliation"
10705            );
10706            for err in &validation_errors {
10707                debug!("Balance validation error: {:?}", err);
10708            }
10709        }
10710
10711        let mut engine = datasynth_generators::ReconciliationEngine::new(
10712            datasynth_generators::ReconciliationConfig::default(),
10713        );
10714
10715        let mut results = Vec::new();
10716        let company_code = self
10717            .config
10718            .companies
10719            .first()
10720            .map(|c| c.code.as_str())
10721            .unwrap_or("1000");
10722
10723        // Reconcile AR
10724        if !subledger.ar_invoices.is_empty() {
10725            let gl_balance = tracker
10726                .get_account_balance(
10727                    company_code,
10728                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10729                )
10730                .map(|b| b.closing_balance)
10731                .unwrap_or_default();
10732            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10733            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10734        }
10735
10736        // Reconcile AP
10737        if !subledger.ap_invoices.is_empty() {
10738            let gl_balance = tracker
10739                .get_account_balance(
10740                    company_code,
10741                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10742                )
10743                .map(|b| b.closing_balance)
10744                .unwrap_or_default();
10745            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10746            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10747        }
10748
10749        // Reconcile FA
10750        if !subledger.fa_records.is_empty() {
10751            let gl_asset_balance = tracker
10752                .get_account_balance(
10753                    company_code,
10754                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10755                )
10756                .map(|b| b.closing_balance)
10757                .unwrap_or_default();
10758            let gl_accum_depr_balance = tracker
10759                .get_account_balance(
10760                    company_code,
10761                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10762                )
10763                .map(|b| b.closing_balance)
10764                .unwrap_or_default();
10765            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10766                subledger.fa_records.iter().collect();
10767            let (asset_recon, depr_recon) = engine.reconcile_fa(
10768                company_code,
10769                end_date,
10770                gl_asset_balance,
10771                gl_accum_depr_balance,
10772                &fa_refs,
10773            );
10774            results.push(asset_recon);
10775            results.push(depr_recon);
10776        }
10777
10778        // Reconcile Inventory
10779        if !subledger.inventory_positions.is_empty() {
10780            let gl_balance = tracker
10781                .get_account_balance(
10782                    company_code,
10783                    datasynth_core::accounts::control_accounts::INVENTORY,
10784                )
10785                .map(|b| b.closing_balance)
10786                .unwrap_or_default();
10787            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10788                subledger.inventory_positions.iter().collect();
10789            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10790        }
10791
10792        stats.subledger_reconciliation_count = results.len();
10793        let passed = results.iter().filter(|r| r.is_balanced()).count();
10794        let failed = results.len() - passed;
10795        info!(
10796            "Subledger reconciliation: {} checks, {} passed, {} failed",
10797            results.len(),
10798            passed,
10799            failed
10800        );
10801        self.check_resources_with_log("post-subledger-reconciliation")?;
10802
10803        Ok(results)
10804    }
10805
10806    /// Generate the chart of accounts.
10807    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10808        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10809
10810        let coa_framework = self.resolve_coa_framework();
10811
10812        let mut gen = ChartOfAccountsGenerator::new(
10813            self.config.chart_of_accounts.complexity,
10814            self.config.global.industry,
10815            self.seed,
10816        )
10817        .with_coa_framework(coa_framework)
10818        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10819        .with_expand_industry_subaccounts(
10820            self.config.chart_of_accounts.expand_industry_subaccounts,
10821        );
10822
10823        let mut built = gen.generate();
10824        // v4.4.1: propagate the accounting framework label from config
10825        // onto the CoA struct so SDK consumers can read it without
10826        // cross-referencing the config (they previously saw null).
10827        if self.config.accounting_standards.enabled {
10828            use datasynth_config::schema::AccountingFrameworkConfig;
10829            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10830                match f {
10831                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10832                    AccountingFrameworkConfig::Ifrs => "ifrs",
10833                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10834                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10835                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10836                }
10837                .to_string()
10838            });
10839        }
10840        // SP4.2 W8.2 + W7.1 — remap synthetic account numbers to corpus
10841        // ones first (W8.2), then enrich descriptions via the overlay (W7.1).
10842        // Applied before Arc::new so we only build one Arc (no clone needed).
10843        if let Some(ref cached) = self.cached_priors {
10844            if let Some(ref coa_prior) = cached.coa_semantic {
10845                use datasynth_generators::coa_generator::{
10846                    remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10847                };
10848                // W8.2 — replace synthetic account numbers with corpus
10849                // ones so the W7.1 overlay fires at ~80% instead of ~16%.
10850                let mut rng =
10851                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10852                let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10853                tracing::info!(
10854                    target: "datasynth_runtime::coa",
10855                    remapped,
10856                    total = built.accounts.len(),
10857                    "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10858                );
10859                // W7.1 — now overlay descriptions / class metadata for the
10860                // (now mostly corpus-numbered) accounts.
10861                let applied =
10862                    ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10863                tracing::info!(
10864                    target: "datasynth_runtime::coa",
10865                    applied,
10866                    total = built.accounts.len(),
10867                    "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10868                );
10869            }
10870            // SP6 — taxonomy overlay: run AFTER the semantic overlay so
10871            // taxonomy-templated accounts take precedence over verbatim
10872            // semantic descriptions.  Uses SyntheticExampleResolver because
10873            // the CoA is built before master-data pools are populated (so
10874            // vendor/customer names are not yet available).
10875            if let Some(tx) = cached.text_taxonomy.as_ref() {
10876                use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
10877                use datasynth_generators::coa_generator::overlay_coa_taxonomy;
10878                let mut resolver = SyntheticExampleResolver;
10879                let mut rng =
10880                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
10881                overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
10882                tracing::info!(
10883                    target: "datasynth_runtime::coa",
10884                    total = built.accounts.len(),
10885                    "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
10886                );
10887            }
10888        }
10889
10890        let coa = Arc::new(built);
10891        self.coa = Some(Arc::clone(&coa));
10892
10893        if let Some(pb) = pb {
10894            pb.finish_with_message("Chart of Accounts complete");
10895        }
10896
10897        Ok(coa)
10898    }
10899
10900    /// Generate master data entities.
10901    fn generate_master_data(&mut self) -> SynthResult<()> {
10902        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10903            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10904        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10905
10906        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10907        let pb = self.create_progress_bar(total, "Generating Master Data");
10908
10909        // Resolve country pack once for all companies (uses primary company's country)
10910        let pack = self.primary_pack().clone();
10911
10912        // Capture config values needed inside the parallel closure
10913        let vendors_per_company = self.phase_config.vendors_per_company;
10914        let customers_per_company = self.phase_config.customers_per_company;
10915        let materials_per_company = self.phase_config.materials_per_company;
10916        let assets_per_company = self.phase_config.assets_per_company;
10917        let coa_framework = self.resolve_coa_framework();
10918
10919        // Generate all master data in parallel across companies.
10920        // Each company's data is independent, making this embarrassingly parallel.
10921        let per_company_results: Vec<_> = self
10922            .config
10923            .companies
10924            .par_iter()
10925            .enumerate()
10926            .map(|(i, company)| {
10927                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10928                let pack = pack.clone();
10929
10930                // Generate vendors (offset counter so IDs are globally unique across companies)
10931                let mut vendor_gen = VendorGenerator::new(company_seed);
10932                vendor_gen.set_country_pack(pack.clone());
10933                vendor_gen.set_coa_framework(coa_framework);
10934                vendor_gen.set_counter_offset(i * vendors_per_company);
10935                // v3.2.0+: user-supplied bank names (and future template
10936                // strings) flow through the shared provider.
10937                vendor_gen.set_template_provider(self.template_provider.clone());
10938                // Wire vendor network config when enabled
10939                if self.config.vendor_network.enabled {
10940                    let vn = &self.config.vendor_network;
10941                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10942                        enabled: true,
10943                        depth: vn.depth,
10944                        tier1_count: datasynth_generators::TierCountConfig::new(
10945                            vn.tier1.min,
10946                            vn.tier1.max,
10947                        ),
10948                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10949                            vn.tier2_per_parent.min,
10950                            vn.tier2_per_parent.max,
10951                        ),
10952                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10953                            vn.tier3_per_parent.min,
10954                            vn.tier3_per_parent.max,
10955                        ),
10956                        cluster_distribution: datasynth_generators::ClusterDistribution {
10957                            reliable_strategic: vn.clusters.reliable_strategic,
10958                            standard_operational: vn.clusters.standard_operational,
10959                            transactional: vn.clusters.transactional,
10960                            problematic: vn.clusters.problematic,
10961                        },
10962                        concentration_limits: datasynth_generators::ConcentrationLimits {
10963                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10964                            max_top5: vn.dependencies.top_5_concentration,
10965                        },
10966                        ..datasynth_generators::VendorNetworkConfig::default()
10967                    });
10968                }
10969                let vendor_pool =
10970                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10971
10972                // Generate customers (offset counter so IDs are globally unique across companies)
10973                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10974                customer_gen.set_country_pack(pack.clone());
10975                customer_gen.set_coa_framework(coa_framework);
10976                customer_gen.set_counter_offset(i * customers_per_company);
10977                // v3.2.0+: user-supplied customer names flow through the shared provider.
10978                customer_gen.set_template_provider(self.template_provider.clone());
10979                // Wire customer segmentation config when enabled
10980                if self.config.customer_segmentation.enabled {
10981                    let cs = &self.config.customer_segmentation;
10982                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10983                        enabled: true,
10984                        segment_distribution: datasynth_generators::SegmentDistribution {
10985                            enterprise: cs.value_segments.enterprise.customer_share,
10986                            mid_market: cs.value_segments.mid_market.customer_share,
10987                            smb: cs.value_segments.smb.customer_share,
10988                            consumer: cs.value_segments.consumer.customer_share,
10989                        },
10990                        referral_config: datasynth_generators::ReferralConfig {
10991                            enabled: cs.networks.referrals.enabled,
10992                            referral_rate: cs.networks.referrals.referral_rate,
10993                            ..Default::default()
10994                        },
10995                        hierarchy_config: datasynth_generators::HierarchyConfig {
10996                            enabled: cs.networks.corporate_hierarchies.enabled,
10997                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10998                            ..Default::default()
10999                        },
11000                        ..Default::default()
11001                    };
11002                    customer_gen.set_segmentation_config(seg_cfg);
11003                }
11004                let customer_pool = customer_gen.generate_customer_pool(
11005                    customers_per_company,
11006                    &company.code,
11007                    start_date,
11008                );
11009
11010                // Generate materials (offset counter so IDs are globally unique across companies)
11011                let mut material_gen = MaterialGenerator::new(company_seed + 200);
11012                material_gen.set_country_pack(pack.clone());
11013                material_gen.set_counter_offset(i * materials_per_company);
11014                // v3.2.1+: user-supplied material descriptions flow through shared provider
11015                material_gen.set_template_provider(self.template_provider.clone());
11016                let material_pool = material_gen.generate_material_pool(
11017                    materials_per_company,
11018                    &company.code,
11019                    start_date,
11020                );
11021
11022                // Generate fixed assets
11023                let mut asset_gen = AssetGenerator::new(company_seed + 300);
11024                // v3.2.1+: user-supplied asset descriptions flow through shared provider
11025                asset_gen.set_template_provider(self.template_provider.clone());
11026                let asset_pool = asset_gen.generate_asset_pool(
11027                    assets_per_company,
11028                    &company.code,
11029                    (start_date, end_date),
11030                );
11031
11032                // Generate employees
11033                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11034                employee_gen.set_country_pack(pack);
11035                // v3.2.1+: user-supplied department names flow through shared provider
11036                employee_gen.set_template_provider(self.template_provider.clone());
11037                let employee_pool =
11038                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11039
11040                // Generate employee change history (2-5 events per employee)
11041                let employee_change_history =
11042                    employee_gen.generate_all_change_history(&employee_pool, end_date);
11043
11044                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
11045                let employee_ids: Vec<String> = employee_pool
11046                    .employees
11047                    .iter()
11048                    .map(|e| e.employee_id.clone())
11049                    .collect();
11050                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11051                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11052
11053                // v5.1: profit centre hierarchy (two-level: top-level
11054                // segment / region / product-group nodes + sub-units).
11055                let mut pc_gen =
11056                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11057                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11058
11059                (
11060                    vendor_pool.vendors,
11061                    customer_pool.customers,
11062                    material_pool.materials,
11063                    asset_pool.assets,
11064                    employee_pool.employees,
11065                    employee_change_history,
11066                    cost_centers,
11067                    profit_centers,
11068                )
11069            })
11070            .collect();
11071
11072        // Aggregate results from all companies
11073        for (
11074            vendors,
11075            customers,
11076            materials,
11077            assets,
11078            employees,
11079            change_history,
11080            cost_centers,
11081            profit_centers,
11082        ) in per_company_results
11083        {
11084            self.master_data.vendors.extend(vendors);
11085            self.master_data.customers.extend(customers);
11086            self.master_data.materials.extend(materials);
11087            self.master_data.assets.extend(assets);
11088            self.master_data.employees.extend(employees);
11089            self.master_data.cost_centers.extend(cost_centers);
11090            self.master_data.profit_centers.extend(profit_centers);
11091            self.master_data
11092                .employee_change_history
11093                .extend(change_history);
11094        }
11095
11096        // v3.3.0: one OrganizationalProfile per company. Cheap to
11097        // generate (derived from industry + company_code) so we
11098        // always emit when master data runs; no separate config flag.
11099        {
11100            use datasynth_core::models::IndustrySector;
11101            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11102            let industry = match self.config.global.industry {
11103                IndustrySector::Manufacturing => "manufacturing",
11104                IndustrySector::Retail => "retail",
11105                IndustrySector::FinancialServices => "financial_services",
11106                IndustrySector::Technology => "technology",
11107                IndustrySector::Healthcare => "healthcare",
11108                _ => "other",
11109            };
11110            for (i, company) in self.config.companies.iter().enumerate() {
11111                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11112                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11113                let profile = profile_gen.generate(&company.code, industry);
11114                self.master_data.organizational_profiles.push(profile);
11115            }
11116        }
11117
11118        if let Some(pb) = &pb {
11119            pb.inc(total);
11120        }
11121        if let Some(pb) = pb {
11122            pb.finish_with_message("Master data generation complete");
11123        }
11124
11125        Ok(())
11126    }
11127
11128    /// Generate document flows (P2P and O2C).
11129    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11130        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11131            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11132
11133        // Generate P2P chains
11134        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
11135        let months = (self.config.global.period_months as usize).max(1);
11136        let p2p_count = self
11137            .phase_config
11138            .p2p_chains
11139            .min(self.master_data.vendors.len() * 2 * months);
11140        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11141
11142        // Convert P2P config from schema to generator config
11143        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11144        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11145        p2p_gen.set_country_pack(self.primary_pack().clone());
11146        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
11147        // to business days. No-op when `temporal_patterns.business_days.
11148        // enabled = false`.
11149        if let Some(ctx) = &self.temporal_context {
11150            p2p_gen.set_temporal_context(Arc::clone(ctx));
11151        }
11152
11153        for i in 0..p2p_count {
11154            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11155            let materials: Vec<&Material> = self
11156                .master_data
11157                .materials
11158                .iter()
11159                .skip(i % self.master_data.materials.len().max(1))
11160                .take(2.min(self.master_data.materials.len()))
11161                .collect();
11162
11163            if materials.is_empty() {
11164                continue;
11165            }
11166
11167            let company = &self.config.companies[i % self.config.companies.len()];
11168            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11169            let fiscal_period = po_date.month() as u8;
11170            let created_by = if self.master_data.employees.is_empty() {
11171                "SYSTEM"
11172            } else {
11173                self.master_data.employees[i % self.master_data.employees.len()]
11174                    .user_id
11175                    .as_str()
11176            };
11177
11178            let chain = p2p_gen.generate_chain(
11179                &company.code,
11180                vendor,
11181                &materials,
11182                po_date,
11183                start_date.year() as u16,
11184                fiscal_period,
11185                created_by,
11186            );
11187
11188            // Flatten documents
11189            flows.purchase_orders.push(chain.purchase_order.clone());
11190            flows.goods_receipts.extend(chain.goods_receipts.clone());
11191            if let Some(vi) = &chain.vendor_invoice {
11192                flows.vendor_invoices.push(vi.clone());
11193            }
11194            if let Some(payment) = &chain.payment {
11195                flows.payments.push(payment.clone());
11196            }
11197            for remainder in &chain.remainder_payments {
11198                flows.payments.push(remainder.clone());
11199            }
11200            flows.p2p_chains.push(chain);
11201
11202            if let Some(pb) = &pb {
11203                pb.inc(1);
11204            }
11205        }
11206
11207        if let Some(pb) = pb {
11208            pb.finish_with_message("P2P document flows complete");
11209        }
11210
11211        // Generate O2C chains
11212        // Cap at ~2 SOs per customer per month to keep order volume realistic
11213        let o2c_count = self
11214            .phase_config
11215            .o2c_chains
11216            .min(self.master_data.customers.len() * 2 * months);
11217        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11218
11219        // Convert O2C config from schema to generator config
11220        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11221        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11222        o2c_gen.set_country_pack(self.primary_pack().clone());
11223        // v3.4.1: wire temporal context (no-op when business_days disabled).
11224        if let Some(ctx) = &self.temporal_context {
11225            o2c_gen.set_temporal_context(Arc::clone(ctx));
11226        }
11227
11228        for i in 0..o2c_count {
11229            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11230            let materials: Vec<&Material> = self
11231                .master_data
11232                .materials
11233                .iter()
11234                .skip(i % self.master_data.materials.len().max(1))
11235                .take(2.min(self.master_data.materials.len()))
11236                .collect();
11237
11238            if materials.is_empty() {
11239                continue;
11240            }
11241
11242            let company = &self.config.companies[i % self.config.companies.len()];
11243            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11244            let fiscal_period = so_date.month() as u8;
11245            let created_by = if self.master_data.employees.is_empty() {
11246                "SYSTEM"
11247            } else {
11248                self.master_data.employees[i % self.master_data.employees.len()]
11249                    .user_id
11250                    .as_str()
11251            };
11252
11253            let chain = o2c_gen.generate_chain(
11254                &company.code,
11255                customer,
11256                &materials,
11257                so_date,
11258                start_date.year() as u16,
11259                fiscal_period,
11260                created_by,
11261            );
11262
11263            // Flatten documents
11264            flows.sales_orders.push(chain.sales_order.clone());
11265            flows.deliveries.extend(chain.deliveries.clone());
11266            if let Some(ci) = &chain.customer_invoice {
11267                flows.customer_invoices.push(ci.clone());
11268            }
11269            if let Some(receipt) = &chain.customer_receipt {
11270                flows.payments.push(receipt.clone());
11271            }
11272            // Extract remainder receipts (follow-up to partial payments)
11273            for receipt in &chain.remainder_receipts {
11274                flows.payments.push(receipt.clone());
11275            }
11276            flows.o2c_chains.push(chain);
11277
11278            if let Some(pb) = &pb {
11279                pb.inc(1);
11280            }
11281        }
11282
11283        if let Some(pb) = pb {
11284            pb.finish_with_message("O2C document flows complete");
11285        }
11286
11287        // Collect all document cross-references from document headers.
11288        // Each document embeds references to its predecessor(s) via add_reference(); here we
11289        // denormalise them into a flat list for the document_references.json output file.
11290        {
11291            let mut refs = Vec::new();
11292            for doc in &flows.purchase_orders {
11293                refs.extend(doc.header.document_references.iter().cloned());
11294            }
11295            for doc in &flows.goods_receipts {
11296                refs.extend(doc.header.document_references.iter().cloned());
11297            }
11298            for doc in &flows.vendor_invoices {
11299                refs.extend(doc.header.document_references.iter().cloned());
11300            }
11301            for doc in &flows.sales_orders {
11302                refs.extend(doc.header.document_references.iter().cloned());
11303            }
11304            for doc in &flows.deliveries {
11305                refs.extend(doc.header.document_references.iter().cloned());
11306            }
11307            for doc in &flows.customer_invoices {
11308                refs.extend(doc.header.document_references.iter().cloned());
11309            }
11310            for doc in &flows.payments {
11311                refs.extend(doc.header.document_references.iter().cloned());
11312            }
11313            debug!(
11314                "Collected {} document cross-references from document headers",
11315                refs.len()
11316            );
11317            flows.document_references = refs;
11318        }
11319
11320        Ok(())
11321    }
11322
11323    /// Generate journal entries using parallel generation across multiple cores.
11324    fn generate_journal_entries(
11325        &mut self,
11326        coa: &Arc<ChartOfAccounts>,
11327    ) -> SynthResult<Vec<JournalEntry>> {
11328        use datasynth_core::traits::ParallelGenerator;
11329
11330        let total = self.calculate_total_transactions();
11331        let pb = self.create_progress_bar(total, "Generating Journal Entries");
11332
11333        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11334            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11335        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11336
11337        let company_codes: Vec<String> = self
11338            .config
11339            .companies
11340            .iter()
11341            .map(|c| c.code.clone())
11342            .collect();
11343
11344        let mut generator = JournalEntryGenerator::new_with_params(
11345            self.config.transactions.clone(),
11346            Arc::clone(coa),
11347            company_codes,
11348            start_date,
11349            end_date,
11350            self.seed,
11351        );
11352        // Wire the `business_processes.*_weight` config through (phantom knob
11353        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
11354        let bp = &self.config.business_processes;
11355        generator.set_business_process_weights(
11356            bp.o2c_weight,
11357            bp.p2p_weight,
11358            bp.r2r_weight,
11359            bp.h2r_weight,
11360            bp.a2r_weight,
11361        );
11362        // v3.4.0: wire advanced distributions (mixture models + industry
11363        // profiles). No-op when `distributions.enabled = false` or
11364        // `distributions.amounts.enabled = false`, preserving v3.3.2
11365        // byte-identical output on default configs.
11366        generator
11367            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11368            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11369
11370        // SP3: load and wire industry priors when the config opts in via
11371        //   distributions.industry_profile.priors.enabled = true
11372        // When disabled (or when using the legacy bare-name form), this block
11373        // is a no-op and generation behavior is identical to v5.11.
11374        if let Some(profile) = &self.config.distributions.industry_profile {
11375            if let Some(priors_cfg) = profile.priors() {
11376                if priors_cfg.enabled {
11377                    use datasynth_config::schema::PriorsSource;
11378                    use datasynth_generators::priors_loader::LoadedPriors;
11379
11380                    let mut priors_rng =
11381                        rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11382                    let period_days = i64::from(self.config.global.period_months) * 30;
11383                    let industry_slug = profile.profile_type().slug();
11384
11385                    let loaded = match priors_cfg.source {
11386                        PriorsSource::Bundled => {
11387                            LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11388                                .map_err(|e| {
11389                                    SynthError::config(format!(
11390                                "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11391                            ))
11392                                })?
11393                        }
11394                        PriorsSource::File => {
11395                            let path = priors_cfg.path.as_ref().ok_or_else(|| {
11396                                SynthError::config(
11397                                    "SP3: industry_profile.priors.path required when source = file"
11398                                        .to_string(),
11399                                )
11400                            })?;
11401                            LoadedPriors::load_from_path(
11402                                path,
11403                                &mut priors_rng,
11404                                period_days,
11405                                Some(industry_slug),
11406                            )
11407                            .map_err(|e| {
11408                                SynthError::config(format!(
11409                                    "SP3: failed to load priors from '{}': {e}",
11410                                    path.display()
11411                                ))
11412                            })?
11413                        }
11414                    };
11415
11416                    // SP3.12 — cache priors in Arc so document-flow generator
11417                    // can also apply lines-per-JE padding without re-loading.
11418                    let loaded = std::sync::Arc::new(loaded);
11419                    self.cached_priors = Some(loaded.clone());
11420                    generator.loaded_priors = Some((*loaded).clone());
11421
11422                    // SP3.4 — instantiate VelocityCalibrator when the config
11423                    // opts in.  Default target rates (R7/R9) are a sensible
11424                    // baseline; they can be derived from the loaded priors in
11425                    // a future hardening pass.
11426                    if priors_cfg.velocity_calibration {
11427                        use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11428                        let mut targets = std::collections::HashMap::new();
11429                        targets.insert("R7".to_string(), 0.10);
11430                        targets.insert("R9".to_string(), 0.10);
11431                        let calibrator = VelocityCalibrator::new(targets, 10_000);
11432                        generator.velocity_calibrator = Some(calibrator);
11433                    }
11434                }
11435            }
11436        }
11437
11438        let generator = generator;
11439
11440        // Connect generated master data to ensure JEs reference real entities
11441        // Enable persona-based error injection for realistic human behavior
11442        // Pass fraud configuration for fraud injection
11443        let je_pack = self.primary_pack();
11444
11445        // Master-data CC / PC pools so JE.cost_center and
11446        // JE.profit_center join back to `cost_centers.id` and
11447        // `profit_centers.id` (closes the v5.9.0 linkage gap that
11448        // had `JE.cost_center = "CC1000"` while master used
11449        // `CC-1000-FIN` etc.).  Empty when no master is present —
11450        // the generator falls back to its hardcoded constants.
11451        let cc_pool: Vec<String> = self
11452            .master_data
11453            .cost_centers
11454            .iter()
11455            .map(|c| c.id.clone())
11456            .collect();
11457        let pc_pool: Vec<String> = self
11458            .master_data
11459            .profit_centers
11460            .iter()
11461            .map(|p| p.id.clone())
11462            .collect();
11463
11464        // Build a UserPool from the generated employee master so
11465        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
11466        // closes the third linkage gap (the previous behaviour had
11467        // JeGenerator generate its own UserPool internally with
11468        // ids disjoint from the employee master).
11469        let user_pool_from_employees =
11470            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11471
11472        let mut generator = generator
11473            .with_master_data(
11474                &self.master_data.vendors,
11475                &self.master_data.customers,
11476                &self.master_data.materials,
11477            )
11478            .with_cost_center_pool(cc_pool)
11479            .with_profit_center_pool(pc_pool)
11480            .with_country_pack_names(je_pack)
11481            .with_user_pool(user_pool_from_employees)
11482            .with_country_pack_temporal(
11483                self.config.temporal_patterns.clone(),
11484                self.seed + 200,
11485                je_pack,
11486            )
11487            .with_persona_errors(true)
11488            .with_fraud_config(self.config.fraud.clone());
11489
11490        // Apply temporal drift if configured. v3.5.2+: also merge
11491        // `distributions.regime_changes` (regime events, economic
11492        // cycles, parameter drifts) into the same DriftConfig so both
11493        // knobs flow through the shared DriftController.
11494        let temporal_enabled = self.config.temporal.enabled;
11495        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11496        if temporal_enabled || regimes_enabled {
11497            let mut drift_config = if temporal_enabled {
11498                self.config.temporal.to_core_config()
11499            } else {
11500                // regime-changes only: start from default (drift OFF),
11501                // apply_to flips `enabled = true`.
11502                datasynth_core::distributions::DriftConfig::default()
11503            };
11504            if regimes_enabled {
11505                self.config
11506                    .distributions
11507                    .regime_changes
11508                    .apply_to(&mut drift_config, start_date);
11509            }
11510            generator = generator.with_drift_config(drift_config, self.seed + 100);
11511        }
11512
11513        // Check memory limit at start
11514        self.check_memory_limit()?;
11515
11516        // Determine parallelism: use available cores, but cap at total entries
11517        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11518
11519        // Use parallel generation for datasets with 10K+ entries.
11520        // Below this threshold, the statistical properties of a single-seeded
11521        // generator (e.g. Benford compliance) are better preserved.
11522        let entries = if total >= 10_000 && num_threads > 1 {
11523            // Parallel path: split the generator across cores and generate in parallel.
11524            // Each sub-generator gets a unique seed for deterministic, independent generation.
11525            let sub_generators = generator.split(num_threads);
11526            let entries_per_thread = total as usize / num_threads;
11527            let remainder = total as usize % num_threads;
11528
11529            let batches: Vec<Vec<JournalEntry>> = sub_generators
11530                .into_par_iter()
11531                .enumerate()
11532                .map(|(i, mut gen)| {
11533                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11534                    gen.generate_batch(count)
11535                })
11536                .collect();
11537
11538            // Merge all batches into a single Vec
11539            let entries = JournalEntryGenerator::merge_results(batches);
11540
11541            if let Some(pb) = &pb {
11542                pb.inc(total);
11543            }
11544            entries
11545        } else {
11546            // Sequential path for small datasets (< 1000 entries)
11547            let mut entries = Vec::with_capacity(total as usize);
11548            for _ in 0..total {
11549                let entry = generator.generate();
11550                entries.push(entry);
11551                if let Some(pb) = &pb {
11552                    pb.inc(1);
11553                }
11554            }
11555            entries
11556        };
11557
11558        if let Some(pb) = pb {
11559            pb.finish_with_message("Journal entries complete");
11560        }
11561
11562        Ok(entries)
11563    }
11564
11565    /// Generate journal entries from document flows.
11566    ///
11567    /// This creates proper GL entries for each document in the P2P and O2C flows,
11568    /// ensuring that document activity is reflected in the general ledger.
11569    fn generate_jes_from_document_flows(
11570        &mut self,
11571        flows: &DocumentFlowSnapshot,
11572    ) -> SynthResult<Vec<JournalEntry>> {
11573        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11574        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11575
11576        let je_config = match self.resolve_coa_framework() {
11577            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11578            CoAFramework::GermanSkr04 => {
11579                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11580                DocumentFlowJeConfig::from(&fa)
11581            }
11582            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11583        };
11584
11585        let populate_fec = je_config.populate_fec_fields;
11586        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11587
11588        // SP3.12 — propagate cached priors so document-flow JEs receive
11589        // the same lines-per-JE padding as standalone JEs.
11590        if let Some(ref priors) = self.cached_priors {
11591            generator.set_loaded_priors(priors.clone());
11592        }
11593
11594        // Master-data CC / PC pools so document-flow-derived JEs
11595        // (P2P / O2C postings) reference IDs that join back to the
11596        // cost-centers / profit-centers masters.  Same plumbing as
11597        // for `JeGenerator` above; falls back to hardcoded const
11598        // pools when masters are absent.
11599        let cc_pool: Vec<String> = self
11600            .master_data
11601            .cost_centers
11602            .iter()
11603            .map(|c| c.id.clone())
11604            .collect();
11605        let pc_pool: Vec<String> = self
11606            .master_data
11607            .profit_centers
11608            .iter()
11609            .map(|p| p.id.clone())
11610            .collect();
11611        if !cc_pool.is_empty() {
11612            generator.set_cost_center_pool(cc_pool);
11613        }
11614        if !pc_pool.is_empty() {
11615            generator.set_profit_center_pool(pc_pool);
11616        }
11617
11618        // Build auxiliary account lookup from vendor/customer master data so that
11619        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11620        // PCG "4010001") instead of raw partner IDs.
11621        if populate_fec {
11622            let mut aux_lookup = std::collections::HashMap::new();
11623            for vendor in &self.master_data.vendors {
11624                if let Some(ref aux) = vendor.auxiliary_gl_account {
11625                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11626                }
11627            }
11628            for customer in &self.master_data.customers {
11629                if let Some(ref aux) = customer.auxiliary_gl_account {
11630                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11631                }
11632            }
11633            if !aux_lookup.is_empty() {
11634                generator.set_auxiliary_account_lookup(aux_lookup);
11635            }
11636        }
11637
11638        let mut entries = Vec::new();
11639
11640        // Generate JEs from P2P chains
11641        for chain in &flows.p2p_chains {
11642            let chain_entries = generator.generate_from_p2p_chain(chain);
11643            entries.extend(chain_entries);
11644            if let Some(pb) = &pb {
11645                pb.inc(1);
11646            }
11647        }
11648
11649        // Generate JEs from O2C chains
11650        for chain in &flows.o2c_chains {
11651            let chain_entries = generator.generate_from_o2c_chain(chain);
11652            entries.extend(chain_entries);
11653            if let Some(pb) = &pb {
11654                pb.inc(1);
11655            }
11656        }
11657
11658        if let Some(pb) = pb {
11659            pb.finish_with_message(format!(
11660                "Generated {} JEs from document flows",
11661                entries.len()
11662            ));
11663        }
11664
11665        Ok(entries)
11666    }
11667
11668    /// Generate journal entries from payroll runs.
11669    ///
11670    /// Creates one JE per payroll run:
11671    /// - DR Salaries & Wages (6100) for gross pay
11672    /// - CR Payroll Clearing (9100) for gross pay
11673    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11674        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11675
11676        let mut jes = Vec::with_capacity(payroll_runs.len());
11677
11678        for run in payroll_runs {
11679            let mut je = JournalEntry::new_simple(
11680                format!("JE-PAYROLL-{}", run.payroll_id),
11681                run.company_code.clone(),
11682                run.run_date,
11683                format!("Payroll {}", run.payroll_id),
11684            );
11685
11686            // Debit Salaries & Wages for gross pay
11687            je.add_line(JournalEntryLine {
11688                line_number: 1,
11689                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11690                debit_amount: run.total_gross,
11691                reference: Some(run.payroll_id.clone()),
11692                text: Some(format!(
11693                    "Payroll {} ({} employees)",
11694                    run.payroll_id, run.employee_count
11695                )),
11696                ..Default::default()
11697            });
11698
11699            // Credit Payroll Clearing for gross pay
11700            je.add_line(JournalEntryLine {
11701                line_number: 2,
11702                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11703                credit_amount: run.total_gross,
11704                reference: Some(run.payroll_id.clone()),
11705                ..Default::default()
11706            });
11707
11708            jes.push(je);
11709        }
11710
11711        jes
11712    }
11713
11714    /// Link document flows to subledger records.
11715    ///
11716    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11717    /// ensuring subledger data is coherent with document flow data.
11718    fn link_document_flows_to_subledgers(
11719        &mut self,
11720        flows: &DocumentFlowSnapshot,
11721    ) -> SynthResult<SubledgerSnapshot> {
11722        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11723        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11724
11725        // Build vendor/customer name maps from master data for realistic subledger names
11726        let vendor_names: std::collections::HashMap<String, String> = self
11727            .master_data
11728            .vendors
11729            .iter()
11730            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11731            .collect();
11732        let customer_names: std::collections::HashMap<String, String> = self
11733            .master_data
11734            .customers
11735            .iter()
11736            .map(|c| (c.customer_id.clone(), c.name.clone()))
11737            .collect();
11738
11739        let mut linker = DocumentFlowLinker::new()
11740            .with_vendor_names(vendor_names)
11741            .with_customer_names(customer_names);
11742
11743        // Convert vendor invoices to AP invoices
11744        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11745        if let Some(pb) = &pb {
11746            pb.inc(flows.vendor_invoices.len() as u64);
11747        }
11748
11749        // Convert customer invoices to AR invoices
11750        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11751        if let Some(pb) = &pb {
11752            pb.inc(flows.customer_invoices.len() as u64);
11753        }
11754
11755        if let Some(pb) = pb {
11756            pb.finish_with_message(format!(
11757                "Linked {} AP and {} AR invoices",
11758                ap_invoices.len(),
11759                ar_invoices.len()
11760            ));
11761        }
11762
11763        Ok(SubledgerSnapshot {
11764            ap_invoices,
11765            ar_invoices,
11766            fa_records: Vec::new(),
11767            inventory_positions: Vec::new(),
11768            inventory_movements: Vec::new(),
11769            // Aging reports are computed after payment settlement in phase_document_flows.
11770            ar_aging_reports: Vec::new(),
11771            ap_aging_reports: Vec::new(),
11772            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11773            depreciation_runs: Vec::new(),
11774            inventory_valuations: Vec::new(),
11775            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11776            dunning_runs: Vec::new(),
11777            dunning_letters: Vec::new(),
11778        })
11779    }
11780
11781    /// Generate OCPM events from document flows.
11782    ///
11783    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11784    /// capturing the object-centric process perspective.
11785    #[allow(clippy::too_many_arguments)]
11786    fn generate_ocpm_events(
11787        &mut self,
11788        flows: &DocumentFlowSnapshot,
11789        sourcing: &SourcingSnapshot,
11790        hr: &HrSnapshot,
11791        manufacturing: &ManufacturingSnapshot,
11792        banking: &BankingSnapshot,
11793        audit: &AuditSnapshot,
11794        financial_reporting: &FinancialReportingSnapshot,
11795    ) -> SynthResult<OcpmSnapshot> {
11796        let total_chains = flows.p2p_chains.len()
11797            + flows.o2c_chains.len()
11798            + sourcing.sourcing_projects.len()
11799            + hr.payroll_runs.len()
11800            + manufacturing.production_orders.len()
11801            + banking.customers.len()
11802            + audit.engagements.len()
11803            + financial_reporting.bank_reconciliations.len();
11804        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11805
11806        // Create OCPM event log with standard types
11807        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11808        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11809
11810        // Configure the OCPM generator
11811        let ocpm_config = OcpmGeneratorConfig {
11812            generate_p2p: true,
11813            generate_o2c: true,
11814            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11815            generate_h2r: !hr.payroll_runs.is_empty(),
11816            generate_mfg: !manufacturing.production_orders.is_empty(),
11817            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11818            generate_bank: !banking.customers.is_empty(),
11819            generate_audit: !audit.engagements.is_empty(),
11820            happy_path_rate: 0.75,
11821            exception_path_rate: 0.20,
11822            error_path_rate: 0.05,
11823            add_duration_variability: true,
11824            duration_std_dev_factor: 0.3,
11825        };
11826        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11827        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11828
11829        // Get available users for resource assignment
11830        let available_users: Vec<String> = self
11831            .master_data
11832            .employees
11833            .iter()
11834            .take(20)
11835            .map(|e| e.user_id.clone())
11836            .collect();
11837
11838        // Deterministic base date from config (avoids Utc::now() non-determinism)
11839        let fallback_date =
11840            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11841        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11842            .unwrap_or(fallback_date);
11843        let base_midnight = base_date
11844            .and_hms_opt(0, 0, 0)
11845            .expect("midnight is always valid");
11846        let base_datetime =
11847            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11848
11849        // Helper closure to add case results to event log
11850        let add_result = |event_log: &mut OcpmEventLog,
11851                          result: datasynth_ocpm::CaseGenerationResult| {
11852            for event in result.events {
11853                event_log.add_event(event);
11854            }
11855            for object in result.objects {
11856                event_log.add_object(object);
11857            }
11858            for relationship in result.relationships {
11859                event_log.add_relationship(relationship);
11860            }
11861            for corr in result.correlation_events {
11862                event_log.add_correlation_event(corr);
11863            }
11864            event_log.add_case(result.case_trace);
11865        };
11866
11867        // Generate events from P2P chains
11868        for chain in &flows.p2p_chains {
11869            let po = &chain.purchase_order;
11870            let documents = P2pDocuments::new(
11871                &po.header.document_id,
11872                &po.vendor_id,
11873                &po.header.company_code,
11874                po.total_net_amount,
11875                &po.header.currency,
11876                &ocpm_uuid_factory,
11877            )
11878            .with_goods_receipt(
11879                chain
11880                    .goods_receipts
11881                    .first()
11882                    .map(|gr| gr.header.document_id.as_str())
11883                    .unwrap_or(""),
11884                &ocpm_uuid_factory,
11885            )
11886            .with_invoice(
11887                chain
11888                    .vendor_invoice
11889                    .as_ref()
11890                    .map(|vi| vi.header.document_id.as_str())
11891                    .unwrap_or(""),
11892                &ocpm_uuid_factory,
11893            )
11894            .with_payment(
11895                chain
11896                    .payment
11897                    .as_ref()
11898                    .map(|p| p.header.document_id.as_str())
11899                    .unwrap_or(""),
11900                &ocpm_uuid_factory,
11901            );
11902
11903            let start_time =
11904                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11905            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11906            add_result(&mut event_log, result);
11907
11908            if let Some(pb) = &pb {
11909                pb.inc(1);
11910            }
11911        }
11912
11913        // Generate events from O2C chains
11914        for chain in &flows.o2c_chains {
11915            let so = &chain.sales_order;
11916            let documents = O2cDocuments::new(
11917                &so.header.document_id,
11918                &so.customer_id,
11919                &so.header.company_code,
11920                so.total_net_amount,
11921                &so.header.currency,
11922                &ocpm_uuid_factory,
11923            )
11924            .with_delivery(
11925                chain
11926                    .deliveries
11927                    .first()
11928                    .map(|d| d.header.document_id.as_str())
11929                    .unwrap_or(""),
11930                &ocpm_uuid_factory,
11931            )
11932            .with_invoice(
11933                chain
11934                    .customer_invoice
11935                    .as_ref()
11936                    .map(|ci| ci.header.document_id.as_str())
11937                    .unwrap_or(""),
11938                &ocpm_uuid_factory,
11939            )
11940            .with_receipt(
11941                chain
11942                    .customer_receipt
11943                    .as_ref()
11944                    .map(|r| r.header.document_id.as_str())
11945                    .unwrap_or(""),
11946                &ocpm_uuid_factory,
11947            );
11948
11949            let start_time =
11950                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11951            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11952            add_result(&mut event_log, result);
11953
11954            if let Some(pb) = &pb {
11955                pb.inc(1);
11956            }
11957        }
11958
11959        // Generate events from S2C sourcing projects
11960        for project in &sourcing.sourcing_projects {
11961            // Find vendor from contracts or qualifications
11962            let vendor_id = sourcing
11963                .contracts
11964                .iter()
11965                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11966                .map(|c| c.vendor_id.clone())
11967                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11968                .or_else(|| {
11969                    self.master_data
11970                        .vendors
11971                        .first()
11972                        .map(|v| v.vendor_id.clone())
11973                })
11974                .unwrap_or_else(|| "V000".to_string());
11975            let mut docs = S2cDocuments::new(
11976                &project.project_id,
11977                &vendor_id,
11978                &project.company_code,
11979                project.estimated_annual_spend,
11980                &ocpm_uuid_factory,
11981            );
11982            // Link RFx if available
11983            if let Some(rfx) = sourcing
11984                .rfx_events
11985                .iter()
11986                .find(|r| r.sourcing_project_id == project.project_id)
11987            {
11988                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11989                // Link winning bid (status == Accepted)
11990                if let Some(bid) = sourcing.bids.iter().find(|b| {
11991                    b.rfx_id == rfx.rfx_id
11992                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11993                }) {
11994                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11995                }
11996            }
11997            // Link contract
11998            if let Some(contract) = sourcing
11999                .contracts
12000                .iter()
12001                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12002            {
12003                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12004            }
12005            let start_time = base_datetime - chrono::Duration::days(90);
12006            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12007            add_result(&mut event_log, result);
12008
12009            if let Some(pb) = &pb {
12010                pb.inc(1);
12011            }
12012        }
12013
12014        // Generate events from H2R payroll runs
12015        for run in &hr.payroll_runs {
12016            // Use first matching payroll line item's employee, or fallback
12017            let employee_id = hr
12018                .payroll_line_items
12019                .iter()
12020                .find(|li| li.payroll_id == run.payroll_id)
12021                .map(|li| li.employee_id.as_str())
12022                .unwrap_or("EMP000");
12023            let docs = H2rDocuments::new(
12024                &run.payroll_id,
12025                employee_id,
12026                &run.company_code,
12027                run.total_gross,
12028                &ocpm_uuid_factory,
12029            )
12030            .with_time_entries(
12031                hr.time_entries
12032                    .iter()
12033                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12034                    .take(5)
12035                    .map(|t| t.entry_id.as_str())
12036                    .collect(),
12037            );
12038            let start_time = base_datetime - chrono::Duration::days(30);
12039            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12040            add_result(&mut event_log, result);
12041
12042            if let Some(pb) = &pb {
12043                pb.inc(1);
12044            }
12045        }
12046
12047        // Generate events from MFG production orders
12048        for order in &manufacturing.production_orders {
12049            let mut docs = MfgDocuments::new(
12050                &order.order_id,
12051                &order.material_id,
12052                &order.company_code,
12053                order.planned_quantity,
12054                &ocpm_uuid_factory,
12055            )
12056            .with_operations(
12057                order
12058                    .operations
12059                    .iter()
12060                    .map(|o| format!("OP-{:04}", o.operation_number))
12061                    .collect::<Vec<_>>()
12062                    .iter()
12063                    .map(std::string::String::as_str)
12064                    .collect(),
12065            );
12066            // Link quality inspection if available (via reference_id matching order_id)
12067            if let Some(insp) = manufacturing
12068                .quality_inspections
12069                .iter()
12070                .find(|i| i.reference_id == order.order_id)
12071            {
12072                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12073            }
12074            // Link cycle count if available (match by material_id in items)
12075            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12076                cc.items
12077                    .iter()
12078                    .any(|item| item.material_id == order.material_id)
12079            }) {
12080                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12081            }
12082            let start_time = base_datetime - chrono::Duration::days(60);
12083            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12084            add_result(&mut event_log, result);
12085
12086            if let Some(pb) = &pb {
12087                pb.inc(1);
12088            }
12089        }
12090
12091        // Generate events from Banking customers
12092        for customer in &banking.customers {
12093            let customer_id_str = customer.customer_id.to_string();
12094            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12095            // Link accounts (primary_owner_id matches customer_id)
12096            if let Some(account) = banking
12097                .accounts
12098                .iter()
12099                .find(|a| a.primary_owner_id == customer.customer_id)
12100            {
12101                let account_id_str = account.account_id.to_string();
12102                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12103                // Link transactions for this account
12104                let txn_strs: Vec<String> = banking
12105                    .transactions
12106                    .iter()
12107                    .filter(|t| t.account_id == account.account_id)
12108                    .take(10)
12109                    .map(|t| t.transaction_id.to_string())
12110                    .collect();
12111                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12112                let txn_amounts: Vec<rust_decimal::Decimal> = banking
12113                    .transactions
12114                    .iter()
12115                    .filter(|t| t.account_id == account.account_id)
12116                    .take(10)
12117                    .map(|t| t.amount)
12118                    .collect();
12119                if !txn_ids.is_empty() {
12120                    docs = docs.with_transactions(txn_ids, txn_amounts);
12121                }
12122            }
12123            let start_time = base_datetime - chrono::Duration::days(180);
12124            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12125            add_result(&mut event_log, result);
12126
12127            if let Some(pb) = &pb {
12128                pb.inc(1);
12129            }
12130        }
12131
12132        // Generate events from Audit engagements
12133        for engagement in &audit.engagements {
12134            let engagement_id_str = engagement.engagement_id.to_string();
12135            let docs = AuditDocuments::new(
12136                &engagement_id_str,
12137                &engagement.client_entity_id,
12138                &ocpm_uuid_factory,
12139            )
12140            .with_workpapers(
12141                audit
12142                    .workpapers
12143                    .iter()
12144                    .filter(|w| w.engagement_id == engagement.engagement_id)
12145                    .take(10)
12146                    .map(|w| w.workpaper_id.to_string())
12147                    .collect::<Vec<_>>()
12148                    .iter()
12149                    .map(std::string::String::as_str)
12150                    .collect(),
12151            )
12152            .with_evidence(
12153                audit
12154                    .evidence
12155                    .iter()
12156                    .filter(|e| e.engagement_id == engagement.engagement_id)
12157                    .take(10)
12158                    .map(|e| e.evidence_id.to_string())
12159                    .collect::<Vec<_>>()
12160                    .iter()
12161                    .map(std::string::String::as_str)
12162                    .collect(),
12163            )
12164            .with_risks(
12165                audit
12166                    .risk_assessments
12167                    .iter()
12168                    .filter(|r| r.engagement_id == engagement.engagement_id)
12169                    .take(5)
12170                    .map(|r| r.risk_id.to_string())
12171                    .collect::<Vec<_>>()
12172                    .iter()
12173                    .map(std::string::String::as_str)
12174                    .collect(),
12175            )
12176            .with_findings(
12177                audit
12178                    .findings
12179                    .iter()
12180                    .filter(|f| f.engagement_id == engagement.engagement_id)
12181                    .take(5)
12182                    .map(|f| f.finding_id.to_string())
12183                    .collect::<Vec<_>>()
12184                    .iter()
12185                    .map(std::string::String::as_str)
12186                    .collect(),
12187            )
12188            .with_judgments(
12189                audit
12190                    .judgments
12191                    .iter()
12192                    .filter(|j| j.engagement_id == engagement.engagement_id)
12193                    .take(5)
12194                    .map(|j| j.judgment_id.to_string())
12195                    .collect::<Vec<_>>()
12196                    .iter()
12197                    .map(std::string::String::as_str)
12198                    .collect(),
12199            );
12200            let start_time = base_datetime - chrono::Duration::days(120);
12201            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12202            add_result(&mut event_log, result);
12203
12204            if let Some(pb) = &pb {
12205                pb.inc(1);
12206            }
12207        }
12208
12209        // Generate events from Bank Reconciliations
12210        for recon in &financial_reporting.bank_reconciliations {
12211            let docs = BankReconDocuments::new(
12212                &recon.reconciliation_id,
12213                &recon.bank_account_id,
12214                &recon.company_code,
12215                recon.bank_ending_balance,
12216                &ocpm_uuid_factory,
12217            )
12218            .with_statement_lines(
12219                recon
12220                    .statement_lines
12221                    .iter()
12222                    .take(20)
12223                    .map(|l| l.line_id.as_str())
12224                    .collect(),
12225            )
12226            .with_reconciling_items(
12227                recon
12228                    .reconciling_items
12229                    .iter()
12230                    .take(10)
12231                    .map(|i| i.item_id.as_str())
12232                    .collect(),
12233            );
12234            let start_time = base_datetime - chrono::Duration::days(30);
12235            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12236            add_result(&mut event_log, result);
12237
12238            if let Some(pb) = &pb {
12239                pb.inc(1);
12240            }
12241        }
12242
12243        // Compute process variants
12244        event_log.compute_variants();
12245
12246        let summary = event_log.summary();
12247
12248        if let Some(pb) = pb {
12249            pb.finish_with_message(format!(
12250                "Generated {} OCPM events, {} objects",
12251                summary.event_count, summary.object_count
12252            ));
12253        }
12254
12255        Ok(OcpmSnapshot {
12256            event_count: summary.event_count,
12257            object_count: summary.object_count,
12258            case_count: summary.case_count,
12259            event_log: Some(event_log),
12260        })
12261    }
12262
12263    /// Inject anomalies into journal entries.
12264    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12265        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12266
12267        // Read anomaly rates from config instead of using hardcoded values.
12268        // Priority: anomaly_injection config > fraud config > default 0.02
12269        let total_rate = if self.config.anomaly_injection.enabled {
12270            self.config.anomaly_injection.rates.total_rate
12271        } else if self.config.fraud.enabled {
12272            self.config.fraud.fraud_rate
12273        } else {
12274            0.02
12275        };
12276
12277        let fraud_rate = if self.config.anomaly_injection.enabled {
12278            self.config.anomaly_injection.rates.fraud_rate
12279        } else {
12280            AnomalyRateConfig::default().fraud_rate
12281        };
12282
12283        let error_rate = if self.config.anomaly_injection.enabled {
12284            self.config.anomaly_injection.rates.error_rate
12285        } else {
12286            AnomalyRateConfig::default().error_rate
12287        };
12288
12289        let process_issue_rate = if self.config.anomaly_injection.enabled {
12290            self.config.anomaly_injection.rates.process_rate
12291        } else {
12292            AnomalyRateConfig::default().process_issue_rate
12293        };
12294
12295        let anomaly_config = AnomalyInjectorConfig {
12296            rates: AnomalyRateConfig {
12297                total_rate,
12298                fraud_rate,
12299                error_rate,
12300                process_issue_rate,
12301                ..Default::default()
12302            },
12303            seed: self.seed + 5000,
12304            ..Default::default()
12305        };
12306
12307        let mut injector = AnomalyInjector::new(anomaly_config);
12308        let result = injector.process_entries(entries);
12309
12310        // Central concentration abstraction (#143, Phase 1): run the post-process
12311        // pipeline AFTER per-entry strategies. The pipeline merges the SOTA-12
12312        // tagger + new passes (trading-partner pool, Phase-2 account substitution)
12313        // through a single integration point — see
12314        // docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md.
12315        //
12316        // Back-compat: the legacy `anomaly_injection.source_conditional_rarity_rate`
12317        // key remains honored. If `concentration.source_conditional_rarity` is also
12318        // set in the same config, the unified DSL field wins.
12319        let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12320            use datasynth_config::schema::{
12321                ConcentrationConfig, ConsolidationOutlierPassConfig,
12322                SourceConditionalRarityPassConfig,
12323            };
12324            use datasynth_generators::concentration::ConcentrationPipeline;
12325
12326            // Decide effective ConcentrationConfig: start from user config, then
12327            // back-fill from the legacy SOTA-12 key if the unified DSL didn't set it.
12328            let mut effective: ConcentrationConfig = self.config.concentration.clone();
12329            if effective.source_conditional_rarity.is_none() {
12330                if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12331                    effective.enabled = true;
12332                    effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12333                        rate,
12334                        min_surprise: None,
12335                        min_per_source_lines: None,
12336                    });
12337                }
12338            }
12339            // v5.30 B2 (#154) — back-compat: surface
12340            // `anomaly_injection.rates.consolidation_outlier_rate` as a
12341            // `ConsolidationOutlierPassConfig` if the unified DSL didn't
12342            // set one. Default 0.001 baseline shipped via the schema's
12343            // `default_consolidation_outlier_rate` — only synthesise the
12344            // pass when the rate is > 0, otherwise it's a no-op anyway.
12345            if effective.consolidation_outlier.is_none() {
12346                let rate = self
12347                    .config
12348                    .anomaly_injection
12349                    .rates
12350                    .consolidation_outlier_rate;
12351                if rate > 0.0 {
12352                    effective.enabled = true;
12353                    effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12354                        rate,
12355                        ..Default::default()
12356                    });
12357                }
12358            }
12359
12360            if !effective.enabled {
12361                (0, 0)
12362            } else {
12363                let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12364                    SynthError::generation(format!(
12365                        "ConcentrationPipeline construction failed: {e}"
12366                    ))
12367                })?;
12368                if !pipeline.is_active() {
12369                    (0, 0)
12370                } else {
12371                    // Per-pipeline seed disjoint from every other generator stream.
12372                    const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12373                    let stats =
12374                        pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12375                    let sota12: usize = stats
12376                        .iter()
12377                        .filter(|s| s.pass == "source_conditional_rarity")
12378                        .map(|s| s.entries_modified)
12379                        .sum();
12380                    let consol: usize = stats
12381                        .iter()
12382                        .filter(|s| s.pass == "consolidation_outlier")
12383                        .map(|s| s.entries_modified)
12384                        .sum();
12385                    (sota12, consol)
12386                }
12387            }
12388        };
12389
12390        if let Some(pb) = &pb {
12391            pb.inc(entries.len() as u64);
12392            pb.finish_with_message("Anomaly injection complete");
12393        }
12394
12395        let mut by_type = HashMap::new();
12396        for label in &result.labels {
12397            *by_type
12398                .entry(format!("{:?}", label.anomaly_type))
12399                .or_insert(0) += 1;
12400        }
12401        if sota12_tagged > 0 {
12402            *by_type
12403                .entry("SourceConditionalRarity".to_string())
12404                .or_insert(0) += sota12_tagged;
12405        }
12406        // v5.30 B2 (#154): record the consolidation-outlier expansion
12407        // count under a stable label key so the orchestrator's run
12408        // report surfaces the heavy-tail emission rate alongside the
12409        // other anomaly buckets.
12410        if consolidation_outlier_expanded > 0 {
12411            *by_type
12412                .entry("ConsolidationOutlier".to_string())
12413                .or_insert(0) += consolidation_outlier_expanded;
12414        }
12415
12416        Ok(AnomalyLabels {
12417            labels: result.labels,
12418            summary: Some(result.summary),
12419            by_type,
12420        })
12421    }
12422
12423    /// Validate journal entries using running balance tracker.
12424    ///
12425    /// Applies all entries to the balance tracker and validates:
12426    /// - Each entry is internally balanced (debits = credits)
12427    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
12428    ///
12429    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
12430    /// excluded from balance validation as they may be intentionally unbalanced.
12431    fn validate_journal_entries(
12432        &mut self,
12433        entries: &[JournalEntry],
12434    ) -> SynthResult<BalanceValidationResult> {
12435        // Filter out entries with human errors as they may be intentionally unbalanced
12436        let clean_entries: Vec<&JournalEntry> = entries
12437            .iter()
12438            .filter(|e| {
12439                e.header
12440                    .header_text
12441                    .as_ref()
12442                    .map(|t| !t.contains("[HUMAN_ERROR:"))
12443                    .unwrap_or(true)
12444            })
12445            .collect();
12446
12447        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12448
12449        // Configure tracker to not fail on errors (collect them instead)
12450        let config = BalanceTrackerConfig {
12451            validate_on_each_entry: false,   // We'll validate at the end
12452            track_history: false,            // Skip history for performance
12453            fail_on_validation_error: false, // Collect errors, don't fail
12454            ..Default::default()
12455        };
12456        let validation_currency = self
12457            .config
12458            .companies
12459            .first()
12460            .map(|c| c.currency.clone())
12461            .unwrap_or_else(|| "USD".to_string());
12462
12463        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12464
12465        // Apply clean entries (without human errors)
12466        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12467        let errors = tracker.apply_entries(&clean_refs);
12468
12469        if let Some(pb) = &pb {
12470            pb.inc(entries.len() as u64);
12471        }
12472
12473        // Check if any entries were unbalanced
12474        // Note: When fail_on_validation_error is false, errors are stored in tracker
12475        let has_unbalanced = tracker
12476            .get_validation_errors()
12477            .iter()
12478            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12479
12480        // Validate balance sheet for each company
12481        // Include both returned errors and collected validation errors
12482        let mut all_errors = errors;
12483        all_errors.extend(tracker.get_validation_errors().iter().cloned());
12484        let company_codes: Vec<String> = self
12485            .config
12486            .companies
12487            .iter()
12488            .map(|c| c.code.clone())
12489            .collect();
12490
12491        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12492            .map(|d| d + chrono::Months::new(self.config.global.period_months))
12493            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12494
12495        for company_code in &company_codes {
12496            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12497                all_errors.push(e);
12498            }
12499        }
12500
12501        // Get statistics after all mutable operations are done
12502        let stats = tracker.get_statistics();
12503
12504        // Determine if balanced overall
12505        let is_balanced = all_errors.is_empty();
12506
12507        if let Some(pb) = pb {
12508            let msg = if is_balanced {
12509                "Balance validation passed"
12510            } else {
12511                "Balance validation completed with errors"
12512            };
12513            pb.finish_with_message(msg);
12514        }
12515
12516        Ok(BalanceValidationResult {
12517            validated: true,
12518            is_balanced,
12519            entries_processed: stats.entries_processed,
12520            total_debits: stats.total_debits,
12521            total_credits: stats.total_credits,
12522            accounts_tracked: stats.accounts_tracked,
12523            companies_tracked: stats.companies_tracked,
12524            validation_errors: all_errors,
12525            has_unbalanced_entries: has_unbalanced,
12526        })
12527    }
12528
12529    /// Inject data quality variations into journal entries.
12530    ///
12531    /// Applies typos, missing values, and format variations to make
12532    /// the synthetic data more realistic for testing data cleaning pipelines.
12533    fn inject_data_quality(
12534        &mut self,
12535        entries: &mut [JournalEntry],
12536    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12537        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12538
12539        // Build config from user-specified schema settings when data_quality is enabled;
12540        // otherwise fall back to the low-rate minimal() preset.
12541        let config = if self.config.data_quality.enabled {
12542            let dq = &self.config.data_quality;
12543            // Propagate per-field rates and protected fields from the schema
12544            // so users can dial in real-production NULL profiles per field
12545            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
12546            let field_rates = dq.missing_values.field_rates.clone();
12547            let mut required_fields: std::collections::HashSet<String> =
12548                dq.missing_values.protected_fields.iter().cloned().collect();
12549            // Always preserve audit-critical identifiers regardless of
12550            // user config — losing these breaks downstream joins.
12551            for f in [
12552                "document_id",
12553                "company_code",
12554                "posting_date",
12555                "fiscal_year",
12556                "fiscal_period",
12557                "gl_account",
12558                "line_number",
12559                "transaction_id",
12560            ] {
12561                required_fields.insert(f.to_string());
12562            }
12563            DataQualityConfig {
12564                enable_missing_values: dq.missing_values.enabled,
12565                missing_values: datasynth_generators::MissingValueConfig {
12566                    global_rate: dq.effective_missing_rate(),
12567                    field_rates,
12568                    required_fields,
12569                    ..Default::default()
12570                },
12571                enable_format_variations: dq.format_variations.enabled,
12572                format_variations: datasynth_generators::FormatVariationConfig {
12573                    date_variation_rate: dq.format_variations.dates.rate,
12574                    amount_variation_rate: dq.format_variations.amounts.rate,
12575                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12576                    ..Default::default()
12577                },
12578                enable_duplicates: dq.duplicates.enabled,
12579                duplicates: datasynth_generators::DuplicateConfig {
12580                    duplicate_rate: dq.effective_duplicate_rate(),
12581                    ..Default::default()
12582                },
12583                enable_typos: dq.typos.enabled,
12584                typos: datasynth_generators::TypoConfig {
12585                    char_error_rate: dq.effective_typo_rate(),
12586                    ..Default::default()
12587                },
12588                enable_encoding_issues: dq.encoding_issues.enabled,
12589                encoding_issue_rate: dq.encoding_issues.rate,
12590                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12591                track_statistics: true,
12592            }
12593        } else {
12594            DataQualityConfig::minimal()
12595        };
12596        let mut injector = DataQualityInjector::new(config);
12597
12598        // Wire country pack for locale-aware format baselines
12599        injector.set_country_pack(self.primary_pack().clone());
12600
12601        // Build context for missing value decisions
12602        let context = HashMap::new();
12603
12604        for entry in entries.iter_mut() {
12605            // Process header_text field (common target for typos)
12606            if let Some(text) = &entry.header.header_text {
12607                let processed = injector.process_text_field(
12608                    "header_text",
12609                    text,
12610                    &entry.header.document_id.to_string(),
12611                    &context,
12612                );
12613                match processed {
12614                    Some(new_text) if new_text != *text => {
12615                        entry.header.header_text = Some(new_text);
12616                    }
12617                    None => {
12618                        entry.header.header_text = None; // Missing value
12619                    }
12620                    _ => {}
12621                }
12622            }
12623
12624            // Process reference field
12625            if let Some(ref_text) = &entry.header.reference {
12626                let processed = injector.process_text_field(
12627                    "reference",
12628                    ref_text,
12629                    &entry.header.document_id.to_string(),
12630                    &context,
12631                );
12632                match processed {
12633                    Some(new_text) if new_text != *ref_text => {
12634                        entry.header.reference = Some(new_text);
12635                    }
12636                    None => {
12637                        entry.header.reference = None;
12638                    }
12639                    _ => {}
12640                }
12641            }
12642
12643            // Process user_persona field (potential for typos in user IDs)
12644            let user_persona = entry.header.user_persona.clone();
12645            if let Some(processed) = injector.process_text_field(
12646                "user_persona",
12647                &user_persona,
12648                &entry.header.document_id.to_string(),
12649                &context,
12650            ) {
12651                if processed != user_persona {
12652                    entry.header.user_persona = processed;
12653                }
12654            }
12655
12656            // Process line items
12657            for line in &mut entry.lines {
12658                // Process line description if present
12659                if let Some(ref text) = line.line_text {
12660                    let processed = injector.process_text_field(
12661                        "line_text",
12662                        text,
12663                        &entry.header.document_id.to_string(),
12664                        &context,
12665                    );
12666                    match processed {
12667                        Some(new_text) if new_text != *text => {
12668                            line.line_text = Some(new_text);
12669                        }
12670                        None => {
12671                            line.line_text = None;
12672                        }
12673                        _ => {}
12674                    }
12675                }
12676
12677                // Process cost_center if present
12678                if let Some(cc) = &line.cost_center {
12679                    let processed = injector.process_text_field(
12680                        "cost_center",
12681                        cc,
12682                        &entry.header.document_id.to_string(),
12683                        &context,
12684                    );
12685                    match processed {
12686                        Some(new_cc) if new_cc != *cc => {
12687                            line.cost_center = Some(new_cc);
12688                        }
12689                        None => {
12690                            line.cost_center = None;
12691                        }
12692                        _ => {}
12693                    }
12694                }
12695
12696                // Extended field coverage (v5.6+): apply NULL injection to
12697                // every Option<String> on the line so users can match
12698                // arbitrary real-production NULL profiles via
12699                // `data_quality.missing_values.field_rates`.
12700                //
12701                // Macro-free helper: process_field returns the new value
12702                // ({Some, None, unchanged}) and we apply it back.
12703                macro_rules! process_opt_field {
12704                    ($field_name:expr, $opt:expr) => {
12705                        if let Some(val) = $opt.as_ref() {
12706                            match injector.process_text_field(
12707                                $field_name,
12708                                val,
12709                                &entry.header.document_id.to_string(),
12710                                &context,
12711                            ) {
12712                                Some(new_val) if new_val != *val => {
12713                                    *$opt = Some(new_val);
12714                                }
12715                                None => {
12716                                    *$opt = None;
12717                                }
12718                                _ => {}
12719                            }
12720                        }
12721                    };
12722                }
12723
12724                process_opt_field!("profit_center", &mut line.profit_center);
12725                process_opt_field!("assignment", &mut line.assignment);
12726                process_opt_field!("tax_code", &mut line.tax_code);
12727                process_opt_field!("account_description", &mut line.account_description);
12728                process_opt_field!(
12729                    "auxiliary_account_number",
12730                    &mut line.auxiliary_account_number
12731                );
12732                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12733                process_opt_field!("lettrage", &mut line.lettrage);
12734            }
12735
12736            if let Some(pb) = &pb {
12737                pb.inc(1);
12738            }
12739        }
12740
12741        if let Some(pb) = pb {
12742            pb.finish_with_message("Data quality injection complete");
12743        }
12744
12745        let quality_issues = injector.issues().to_vec();
12746        Ok((injector.stats().clone(), quality_issues))
12747    }
12748
12749    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12750    ///
12751    /// Creates complete audit documentation for each company in the configuration,
12752    /// following ISA standards:
12753    /// - ISA 210/220: Engagement acceptance and terms
12754    /// - ISA 230: Audit documentation (workpapers)
12755    /// - ISA 265: Control deficiencies (findings)
12756    /// - ISA 315/330: Risk assessment and response
12757    /// - ISA 500: Audit evidence
12758    /// - ISA 200: Professional judgment
12759    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12760        // Check if FSM-driven audit generation is enabled
12761        let use_fsm = self
12762            .config
12763            .audit
12764            .fsm
12765            .as_ref()
12766            .map(|f| f.enabled)
12767            .unwrap_or(false);
12768
12769        if use_fsm {
12770            return self.generate_audit_data_with_fsm(entries);
12771        }
12772
12773        // --- Legacy (non-FSM) audit generation follows ---
12774        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12775            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12776        let fiscal_year = start_date.year() as u16;
12777        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12778
12779        // Calculate rough total revenue from entries for materiality
12780        let total_revenue: rust_decimal::Decimal = entries
12781            .iter()
12782            .flat_map(|e| e.lines.iter())
12783            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12784            .map(|l| l.credit_amount)
12785            .sum();
12786
12787        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12788        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12789
12790        let mut snapshot = AuditSnapshot::default();
12791
12792        // Initialize generators
12793        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12794        // v3.3.2: thread the user-facing audit schema config into the
12795        // engagement generator (team size range).
12796        engagement_gen.set_team_config(&self.config.audit.team);
12797
12798        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12799        // v3.3.2: thread workpaper + review workflow schema config into
12800        // the workpaper generator (per-section count range + review
12801        // delay ranges).
12802        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12803        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12804        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12805        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12806        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12807        finding_gen.set_template_provider(self.template_provider.clone());
12808        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12809        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12810        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12811        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12812        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12813        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12814        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12815
12816        // Get list of accounts from CoA for risk assessment
12817        let accounts: Vec<String> = self
12818            .coa
12819            .as_ref()
12820            .map(|coa| {
12821                coa.get_postable_accounts()
12822                    .iter()
12823                    .map(|acc| acc.account_code().to_string())
12824                    .collect()
12825            })
12826            .unwrap_or_default();
12827
12828        // Generate engagements for each company
12829        for (i, company) in self.config.companies.iter().enumerate() {
12830            // Calculate company-specific revenue (proportional to volume weight)
12831            let company_revenue = total_revenue
12832                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12833
12834            // Generate engagements for this company
12835            let engagements_for_company =
12836                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12837            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12838                1
12839            } else {
12840                0
12841            };
12842
12843            for _eng_idx in 0..(engagements_for_company + extra) {
12844                // v3.3.2: draw engagement type from the user-configured
12845                // distribution instead of always using the default
12846                // (AnnualAudit). Falls back to the default when all
12847                // probabilities are zero.
12848                let eng_type =
12849                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12850
12851                // Generate the engagement
12852                let mut engagement = engagement_gen.generate_engagement(
12853                    &company.code,
12854                    &company.name,
12855                    fiscal_year,
12856                    period_end,
12857                    company_revenue,
12858                    Some(eng_type),
12859                );
12860
12861                // Replace synthetic team IDs with real employee IDs from master data
12862                if !self.master_data.employees.is_empty() {
12863                    let emp_count = self.master_data.employees.len();
12864                    // Use employee IDs deterministically based on engagement index
12865                    let base = (i * 10 + _eng_idx) % emp_count;
12866                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12867                        .employee_id
12868                        .clone();
12869                    engagement.engagement_manager_id = self.master_data.employees
12870                        [(base + 1) % emp_count]
12871                        .employee_id
12872                        .clone();
12873                    let real_team: Vec<String> = engagement
12874                        .team_member_ids
12875                        .iter()
12876                        .enumerate()
12877                        .map(|(j, _)| {
12878                            self.master_data.employees[(base + 2 + j) % emp_count]
12879                                .employee_id
12880                                .clone()
12881                        })
12882                        .collect();
12883                    engagement.team_member_ids = real_team;
12884                }
12885
12886                if let Some(pb) = &pb {
12887                    pb.inc(1);
12888                }
12889
12890                // Get team members from the engagement
12891                let team_members: Vec<String> = engagement.team_member_ids.clone();
12892
12893                // Generate workpapers for the engagement.
12894                // v3.3.2: honor `audit.generate_workpapers` — when false,
12895                // workpapers (and dependent evidence) are skipped while
12896                // the engagement itself, risk assessments, findings, etc.
12897                // still generate normally.
12898                let workpapers = if self.config.audit.generate_workpapers {
12899                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12900                } else {
12901                    Vec::new()
12902                };
12903
12904                for wp in &workpapers {
12905                    if let Some(pb) = &pb {
12906                        pb.inc(1);
12907                    }
12908
12909                    // Generate evidence for each workpaper
12910                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12911                        wp,
12912                        &team_members,
12913                        wp.preparer_date,
12914                    );
12915
12916                    for _ in &evidence {
12917                        if let Some(pb) = &pb {
12918                            pb.inc(1);
12919                        }
12920                    }
12921
12922                    snapshot.evidence.extend(evidence);
12923                }
12924
12925                // Generate risk assessments for the engagement
12926                let risks =
12927                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12928
12929                for _ in &risks {
12930                    if let Some(pb) = &pb {
12931                        pb.inc(1);
12932                    }
12933                }
12934                snapshot.risk_assessments.extend(risks);
12935
12936                // Generate findings for the engagement
12937                let findings = finding_gen.generate_findings_for_engagement(
12938                    &engagement,
12939                    &workpapers,
12940                    &team_members,
12941                );
12942
12943                for _ in &findings {
12944                    if let Some(pb) = &pb {
12945                        pb.inc(1);
12946                    }
12947                }
12948                snapshot.findings.extend(findings);
12949
12950                // Generate professional judgments for the engagement
12951                let judgments =
12952                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12953
12954                for _ in &judgments {
12955                    if let Some(pb) = &pb {
12956                        pb.inc(1);
12957                    }
12958                }
12959                snapshot.judgments.extend(judgments);
12960
12961                // ISA 505: External confirmations and responses
12962                let (confs, resps) =
12963                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12964                snapshot.confirmations.extend(confs);
12965                snapshot.confirmation_responses.extend(resps);
12966
12967                // ISA 330: Procedure steps per workpaper
12968                let team_pairs: Vec<(String, String)> = team_members
12969                    .iter()
12970                    .map(|id| {
12971                        let name = self
12972                            .master_data
12973                            .employees
12974                            .iter()
12975                            .find(|e| e.employee_id == *id)
12976                            .map(|e| e.display_name.clone())
12977                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12978                        (id.clone(), name)
12979                    })
12980                    .collect();
12981                for wp in &workpapers {
12982                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12983                    snapshot.procedure_steps.extend(steps);
12984                }
12985
12986                // ISA 530: Samples per workpaper
12987                for wp in &workpapers {
12988                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12989                        snapshot.samples.push(sample);
12990                    }
12991                }
12992
12993                // ISA 520: Analytical procedures
12994                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12995                snapshot.analytical_results.extend(analytical);
12996
12997                // ISA 610: Internal audit function and reports
12998                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12999                snapshot.ia_functions.push(ia_func);
13000                snapshot.ia_reports.extend(ia_reports);
13001
13002                // ISA 550: Related parties and transactions
13003                let vendor_names: Vec<String> = self
13004                    .master_data
13005                    .vendors
13006                    .iter()
13007                    .map(|v| v.name.clone())
13008                    .collect();
13009                let customer_names: Vec<String> = self
13010                    .master_data
13011                    .customers
13012                    .iter()
13013                    .map(|c| c.name.clone())
13014                    .collect();
13015                let (parties, rp_txns) =
13016                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13017                snapshot.related_parties.extend(parties);
13018                snapshot.related_party_transactions.extend(rp_txns);
13019
13020                // Add workpapers after findings since findings need them
13021                snapshot.workpapers.extend(workpapers);
13022
13023                // Generate audit scope record for this engagement (one per engagement)
13024                {
13025                    let scope_id = format!(
13026                        "SCOPE-{}-{}",
13027                        engagement.engagement_id.simple(),
13028                        &engagement.client_entity_id
13029                    );
13030                    let scope = datasynth_core::models::audit::AuditScope::new(
13031                        scope_id.clone(),
13032                        engagement.engagement_id.to_string(),
13033                        engagement.client_entity_id.clone(),
13034                        engagement.materiality,
13035                    );
13036                    // Wire scope_id back to engagement
13037                    let mut eng = engagement;
13038                    eng.scope_id = Some(scope_id);
13039                    snapshot.audit_scopes.push(scope);
13040                    snapshot.engagements.push(eng);
13041                }
13042            }
13043        }
13044
13045        // ----------------------------------------------------------------
13046        // ISA 600: Group audit — component auditors, plan, instructions, reports
13047        // ----------------------------------------------------------------
13048        if self.config.companies.len() > 1 {
13049            // Use materiality from the first engagement if available, otherwise
13050            // derive a reasonable figure from total revenue.
13051            let group_materiality = snapshot
13052                .engagements
13053                .first()
13054                .map(|e| e.materiality)
13055                .unwrap_or_else(|| {
13056                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13057                    total_revenue * pct
13058                });
13059
13060            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13061            let group_engagement_id = snapshot
13062                .engagements
13063                .first()
13064                .map(|e| e.engagement_id.to_string())
13065                .unwrap_or_else(|| "GROUP-ENG".to_string());
13066
13067            let component_snapshot = component_gen.generate(
13068                &self.config.companies,
13069                group_materiality,
13070                &group_engagement_id,
13071                period_end,
13072            );
13073
13074            snapshot.component_auditors = component_snapshot.component_auditors;
13075            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13076            snapshot.component_instructions = component_snapshot.component_instructions;
13077            snapshot.component_reports = component_snapshot.component_reports;
13078
13079            info!(
13080                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13081                snapshot.component_auditors.len(),
13082                snapshot.component_instructions.len(),
13083                snapshot.component_reports.len(),
13084            );
13085        }
13086
13087        // ----------------------------------------------------------------
13088        // ISA 210: Engagement letters — one per engagement
13089        // ----------------------------------------------------------------
13090        {
13091            let applicable_framework = self
13092                .config
13093                .accounting_standards
13094                .framework
13095                .as_ref()
13096                .map(|f| format!("{f:?}"))
13097                .unwrap_or_else(|| "IFRS".to_string());
13098
13099            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13100            let entity_count = self.config.companies.len();
13101
13102            for engagement in &snapshot.engagements {
13103                let company = self
13104                    .config
13105                    .companies
13106                    .iter()
13107                    .find(|c| c.code == engagement.client_entity_id);
13108                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13109                let letter_date = engagement.planning_start;
13110                let letter = letter_gen.generate(
13111                    &engagement.engagement_id.to_string(),
13112                    &engagement.client_name,
13113                    entity_count,
13114                    engagement.period_end_date,
13115                    currency,
13116                    &applicable_framework,
13117                    letter_date,
13118                );
13119                snapshot.engagement_letters.push(letter);
13120            }
13121
13122            info!(
13123                "ISA 210 engagement letters: {} generated",
13124                snapshot.engagement_letters.len()
13125            );
13126        }
13127
13128        // ----------------------------------------------------------------
13129        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
13130        // ----------------------------------------------------------------
13131        if self.phase_config.generate_legal_documents {
13132            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13133            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13134            for engagement in &snapshot.engagements {
13135                // Build an employee name list for signatory drawing —
13136                // prefer employees from the engaged entity, fall back to
13137                // all employees.
13138                let employee_names: Vec<String> = self
13139                    .master_data
13140                    .employees
13141                    .iter()
13142                    .filter(|e| e.company_code == engagement.client_entity_id)
13143                    .map(|e| e.display_name.clone())
13144                    .collect();
13145                let names_to_use = if !employee_names.is_empty() {
13146                    employee_names
13147                } else {
13148                    self.master_data
13149                        .employees
13150                        .iter()
13151                        .take(10)
13152                        .map(|e| e.display_name.clone())
13153                        .collect()
13154                };
13155                let docs = legal_gen.generate(
13156                    &engagement.client_entity_id,
13157                    engagement.fiscal_year as i32,
13158                    &names_to_use,
13159                );
13160                snapshot.legal_documents.extend(docs);
13161            }
13162            info!(
13163                "v3.3.0 legal documents: {} emitted across {} engagements",
13164                snapshot.legal_documents.len(),
13165                snapshot.engagements.len()
13166            );
13167        }
13168
13169        // ----------------------------------------------------------------
13170        // v3.3.0: IT general controls — access logs + change records
13171        //
13172        // `ItControlsGenerator` runs one pass per company (not per
13173        // engagement) so employee sets and system catalogs stay
13174        // coherent. We derive the period from the earliest engagement's
13175        // planning_start through the latest engagement's period_end_date
13176        // for each company.
13177        // ----------------------------------------------------------------
13178        if self.phase_config.generate_it_controls {
13179            use datasynth_generators::it_controls_generator::ItControlsGenerator;
13180            use std::collections::HashMap;
13181            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13182
13183            // Group engagements by company to produce one IT-controls
13184            // window per entity.
13185            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13186                HashMap::new();
13187            for engagement in &snapshot.engagements {
13188                let entry = by_company
13189                    .entry(engagement.client_entity_id.clone())
13190                    .or_insert((engagement.planning_start, engagement.period_end_date));
13191                if engagement.planning_start < entry.0 {
13192                    entry.0 = engagement.planning_start;
13193                }
13194                if engagement.period_end_date > entry.1 {
13195                    entry.1 = engagement.period_end_date;
13196                }
13197            }
13198
13199            // Standard system catalog — populated from known ERP / app
13200            // names. Keeps the generator's data shape stable when the
13201            // user hasn't configured IT-system naming separately.
13202            let systems: Vec<String> = vec![
13203                "SAP ECC",
13204                "SAP S/4 HANA",
13205                "Oracle EBS",
13206                "Workday",
13207                "NetSuite",
13208                "Active Directory",
13209                "SharePoint",
13210                "Salesforce",
13211                "ServiceNow",
13212                "Jira",
13213                "GitHub Enterprise",
13214                "AWS Console",
13215                "Okta",
13216            ]
13217            .into_iter()
13218            .map(String::from)
13219            .collect();
13220
13221            for (company_code, (start, end)) in by_company {
13222                let emps: Vec<(String, String)> = self
13223                    .master_data
13224                    .employees
13225                    .iter()
13226                    .filter(|e| e.company_code == company_code)
13227                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13228                    .collect();
13229                if emps.is_empty() {
13230                    continue;
13231                }
13232                // Compute period in months, rounded up to the nearest
13233                // whole month (min 1).
13234                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13235                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13236                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13237                snapshot.it_controls_access_logs.extend(access_logs);
13238                snapshot.it_controls_change_records.extend(change_records);
13239            }
13240
13241            info!(
13242                "v3.3.0 IT controls: {} access logs, {} change records",
13243                snapshot.it_controls_access_logs.len(),
13244                snapshot.it_controls_change_records.len()
13245            );
13246        }
13247
13248        // ----------------------------------------------------------------
13249        // ISA 560 / IAS 10: Subsequent events
13250        // ----------------------------------------------------------------
13251        {
13252            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13253            let entity_codes: Vec<String> = self
13254                .config
13255                .companies
13256                .iter()
13257                .map(|c| c.code.clone())
13258                .collect();
13259            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13260            info!(
13261                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13262                subsequent.len(),
13263                subsequent
13264                    .iter()
13265                    .filter(|e| matches!(
13266                        e.classification,
13267                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13268                    ))
13269                    .count(),
13270                subsequent
13271                    .iter()
13272                    .filter(|e| matches!(
13273                        e.classification,
13274                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13275                    ))
13276                    .count(),
13277            );
13278            snapshot.subsequent_events = subsequent;
13279        }
13280
13281        // ----------------------------------------------------------------
13282        // ISA 402: Service organization controls
13283        // ----------------------------------------------------------------
13284        {
13285            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13286            let entity_codes: Vec<String> = self
13287                .config
13288                .companies
13289                .iter()
13290                .map(|c| c.code.clone())
13291                .collect();
13292            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13293            info!(
13294                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13295                soc_snapshot.service_organizations.len(),
13296                soc_snapshot.soc_reports.len(),
13297                soc_snapshot.user_entity_controls.len(),
13298            );
13299            snapshot.service_organizations = soc_snapshot.service_organizations;
13300            snapshot.soc_reports = soc_snapshot.soc_reports;
13301            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13302        }
13303
13304        // ----------------------------------------------------------------
13305        // ISA 570: Going concern assessments
13306        // ----------------------------------------------------------------
13307        {
13308            use datasynth_generators::audit::going_concern_generator::{
13309                GoingConcernGenerator, GoingConcernInput,
13310            };
13311            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13312            let entity_codes: Vec<String> = self
13313                .config
13314                .companies
13315                .iter()
13316                .map(|c| c.code.clone())
13317                .collect();
13318            // Assessment date = period end + 75 days (typical sign-off window).
13319            let assessment_date = period_end + chrono::Duration::days(75);
13320            let period_label = format!("FY{}", period_end.year());
13321
13322            // Build financial inputs from actual journal entries.
13323            //
13324            // We derive approximate P&L, working capital, and operating cash flow
13325            // by aggregating GL account balances from the journal entry population.
13326            // Account ranges used (standard chart):
13327            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
13328            //   Expenses:        6xxx (debit-normal)
13329            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
13330            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
13331            //   Operating CF:    net income adjusted for D&A (rough proxy)
13332            let gc_inputs: Vec<GoingConcernInput> = self
13333                .config
13334                .companies
13335                .iter()
13336                .map(|company| {
13337                    let code = &company.code;
13338                    let mut revenue = rust_decimal::Decimal::ZERO;
13339                    let mut expenses = rust_decimal::Decimal::ZERO;
13340                    let mut current_assets = rust_decimal::Decimal::ZERO;
13341                    let mut current_liabs = rust_decimal::Decimal::ZERO;
13342                    let mut total_debt = rust_decimal::Decimal::ZERO;
13343
13344                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
13345                        for line in &je.lines {
13346                            let acct = line.gl_account.as_str();
13347                            let net = line.debit_amount - line.credit_amount;
13348                            if acct.starts_with('4') {
13349                                // Revenue accounts: credit-normal, so negative net = revenue earned
13350                                revenue -= net;
13351                            } else if acct.starts_with('6') {
13352                                // Expense accounts: debit-normal
13353                                expenses += net;
13354                            }
13355                            // Balance sheet accounts for working capital
13356                            if acct.starts_with('1') {
13357                                // Current asset accounts (1000–1499)
13358                                if let Ok(n) = acct.parse::<u32>() {
13359                                    if (1000..=1499).contains(&n) {
13360                                        current_assets += net;
13361                                    }
13362                                }
13363                            } else if acct.starts_with('2') {
13364                                if let Ok(n) = acct.parse::<u32>() {
13365                                    if (2000..=2499).contains(&n) {
13366                                        // Current liabilities
13367                                        current_liabs -= net; // credit-normal
13368                                    } else if (2500..=2999).contains(&n) {
13369                                        // Long-term debt
13370                                        total_debt -= net;
13371                                    }
13372                                }
13373                            }
13374                        }
13375                    }
13376
13377                    let net_income = revenue - expenses;
13378                    let working_capital = current_assets - current_liabs;
13379                    // Rough operating CF proxy: net income (full accrual CF calculation
13380                    // is done separately in the cash flow statement generator)
13381                    let operating_cash_flow = net_income;
13382
13383                    GoingConcernInput {
13384                        entity_code: code.clone(),
13385                        net_income,
13386                        working_capital,
13387                        operating_cash_flow,
13388                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13389                        assessment_date,
13390                    }
13391                })
13392                .collect();
13393
13394            let assessments = if gc_inputs.is_empty() {
13395                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13396            } else {
13397                gc_gen.generate_for_entities_with_inputs(
13398                    &entity_codes,
13399                    &gc_inputs,
13400                    assessment_date,
13401                    &period_label,
13402                )
13403            };
13404            info!(
13405                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13406                assessments.len(),
13407                assessments.iter().filter(|a| matches!(
13408                    a.auditor_conclusion,
13409                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13410                )).count(),
13411                assessments.iter().filter(|a| matches!(
13412                    a.auditor_conclusion,
13413                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13414                )).count(),
13415                assessments.iter().filter(|a| matches!(
13416                    a.auditor_conclusion,
13417                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13418                )).count(),
13419            );
13420            snapshot.going_concern_assessments = assessments;
13421        }
13422
13423        // ----------------------------------------------------------------
13424        // ISA 540: Accounting estimates
13425        // ----------------------------------------------------------------
13426        {
13427            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13428            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13429            let entity_codes: Vec<String> = self
13430                .config
13431                .companies
13432                .iter()
13433                .map(|c| c.code.clone())
13434                .collect();
13435            let estimates = est_gen.generate_for_entities(&entity_codes);
13436            info!(
13437                "ISA 540 accounting estimates: {} estimates across {} entities \
13438                 ({} with retrospective reviews, {} with auditor point estimates)",
13439                estimates.len(),
13440                entity_codes.len(),
13441                estimates
13442                    .iter()
13443                    .filter(|e| e.retrospective_review.is_some())
13444                    .count(),
13445                estimates
13446                    .iter()
13447                    .filter(|e| e.auditor_point_estimate.is_some())
13448                    .count(),
13449            );
13450            snapshot.accounting_estimates = estimates;
13451        }
13452
13453        // ----------------------------------------------------------------
13454        // ISA 700/701/705/706: Audit opinions (one per engagement)
13455        // ----------------------------------------------------------------
13456        {
13457            use datasynth_generators::audit::audit_opinion_generator::{
13458                AuditOpinionGenerator, AuditOpinionInput,
13459            };
13460
13461            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13462
13463            // Build inputs — one per engagement, linking findings and going concern.
13464            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13465                .engagements
13466                .iter()
13467                .map(|eng| {
13468                    // Collect findings for this engagement.
13469                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13470                        .findings
13471                        .iter()
13472                        .filter(|f| f.engagement_id == eng.engagement_id)
13473                        .cloned()
13474                        .collect();
13475
13476                    // Going concern for this entity.
13477                    let gc = snapshot
13478                        .going_concern_assessments
13479                        .iter()
13480                        .find(|g| g.entity_code == eng.client_entity_id)
13481                        .cloned();
13482
13483                    // Component reports relevant to this engagement.
13484                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13485                        snapshot.component_reports.clone();
13486
13487                    let auditor = self
13488                        .master_data
13489                        .employees
13490                        .first()
13491                        .map(|e| e.display_name.clone())
13492                        .unwrap_or_else(|| "Global Audit LLP".into());
13493
13494                    let partner = self
13495                        .master_data
13496                        .employees
13497                        .get(1)
13498                        .map(|e| e.display_name.clone())
13499                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
13500
13501                    AuditOpinionInput {
13502                        entity_code: eng.client_entity_id.clone(),
13503                        entity_name: eng.client_name.clone(),
13504                        engagement_id: eng.engagement_id,
13505                        period_end: eng.period_end_date,
13506                        findings: eng_findings,
13507                        going_concern: gc,
13508                        component_reports: comp_reports,
13509                        // Mark as US-listed when audit standards include PCAOB.
13510                        is_us_listed: {
13511                            let fw = &self.config.audit_standards.isa_compliance.framework;
13512                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13513                        },
13514                        auditor_name: auditor,
13515                        engagement_partner: partner,
13516                    }
13517                })
13518                .collect();
13519
13520            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13521
13522            for go in &generated_opinions {
13523                snapshot
13524                    .key_audit_matters
13525                    .extend(go.key_audit_matters.clone());
13526            }
13527            snapshot.audit_opinions = generated_opinions
13528                .into_iter()
13529                .map(|go| go.opinion)
13530                .collect();
13531
13532            info!(
13533                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13534                snapshot.audit_opinions.len(),
13535                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13536                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13537                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13538                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13539            );
13540        }
13541
13542        // ----------------------------------------------------------------
13543        // SOX 302 / 404 assessments
13544        // ----------------------------------------------------------------
13545        {
13546            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13547
13548            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13549
13550            for (i, company) in self.config.companies.iter().enumerate() {
13551                // Collect findings for this company's engagements.
13552                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13553                    .engagements
13554                    .iter()
13555                    .filter(|e| e.client_entity_id == company.code)
13556                    .map(|e| e.engagement_id)
13557                    .collect();
13558
13559                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13560                    .findings
13561                    .iter()
13562                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13563                    .cloned()
13564                    .collect();
13565
13566                // Derive executive names from employee list.
13567                let emp_count = self.master_data.employees.len();
13568                let ceo_name = if emp_count > 0 {
13569                    self.master_data.employees[i % emp_count]
13570                        .display_name
13571                        .clone()
13572                } else {
13573                    format!("CEO of {}", company.name)
13574                };
13575                let cfo_name = if emp_count > 1 {
13576                    self.master_data.employees[(i + 1) % emp_count]
13577                        .display_name
13578                        .clone()
13579                } else {
13580                    format!("CFO of {}", company.name)
13581                };
13582
13583                // Use engagement materiality if available.
13584                let materiality = snapshot
13585                    .engagements
13586                    .iter()
13587                    .find(|e| e.client_entity_id == company.code)
13588                    .map(|e| e.materiality)
13589                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13590
13591                let input = SoxGeneratorInput {
13592                    company_code: company.code.clone(),
13593                    company_name: company.name.clone(),
13594                    fiscal_year,
13595                    period_end,
13596                    findings: company_findings,
13597                    ceo_name,
13598                    cfo_name,
13599                    materiality_threshold: materiality,
13600                    revenue_percent: rust_decimal::Decimal::from(100),
13601                    assets_percent: rust_decimal::Decimal::from(100),
13602                    significant_accounts: vec![
13603                        "Revenue".into(),
13604                        "Accounts Receivable".into(),
13605                        "Inventory".into(),
13606                        "Fixed Assets".into(),
13607                        "Accounts Payable".into(),
13608                    ],
13609                };
13610
13611                let (certs, assessment) = sox_gen.generate(&input);
13612                snapshot.sox_302_certifications.extend(certs);
13613                snapshot.sox_404_assessments.push(assessment);
13614            }
13615
13616            info!(
13617                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13618                snapshot.sox_302_certifications.len(),
13619                snapshot.sox_404_assessments.len(),
13620                snapshot
13621                    .sox_404_assessments
13622                    .iter()
13623                    .filter(|a| a.icfr_effective)
13624                    .count(),
13625                snapshot
13626                    .sox_404_assessments
13627                    .iter()
13628                    .filter(|a| !a.icfr_effective)
13629                    .count(),
13630            );
13631        }
13632
13633        // ----------------------------------------------------------------
13634        // ISA 320: Materiality calculations (one per entity)
13635        // ----------------------------------------------------------------
13636        {
13637            use datasynth_generators::audit::materiality_generator::{
13638                MaterialityGenerator, MaterialityInput,
13639            };
13640
13641            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13642
13643            // Compute per-company financials from JEs.
13644            // Asset accounts start with '1', revenue with '4',
13645            // expense accounts with '5' or '6'.
13646            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13647
13648            for company in &self.config.companies {
13649                let company_code = company.code.clone();
13650
13651                // Revenue: credit-side entries on 4xxx accounts
13652                let company_revenue: rust_decimal::Decimal = entries
13653                    .iter()
13654                    .filter(|e| e.company_code() == company_code)
13655                    .flat_map(|e| e.lines.iter())
13656                    .filter(|l| l.account_code.starts_with('4'))
13657                    .map(|l| l.credit_amount)
13658                    .sum();
13659
13660                // Total assets: debit balances on 1xxx accounts
13661                let total_assets: rust_decimal::Decimal = entries
13662                    .iter()
13663                    .filter(|e| e.company_code() == company_code)
13664                    .flat_map(|e| e.lines.iter())
13665                    .filter(|l| l.account_code.starts_with('1'))
13666                    .map(|l| l.debit_amount)
13667                    .sum();
13668
13669                // Expenses: debit-side entries on 5xxx/6xxx accounts
13670                let total_expenses: rust_decimal::Decimal = entries
13671                    .iter()
13672                    .filter(|e| e.company_code() == company_code)
13673                    .flat_map(|e| e.lines.iter())
13674                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13675                    .map(|l| l.debit_amount)
13676                    .sum();
13677
13678                // Equity: credit balances on 3xxx accounts
13679                let equity: rust_decimal::Decimal = entries
13680                    .iter()
13681                    .filter(|e| e.company_code() == company_code)
13682                    .flat_map(|e| e.lines.iter())
13683                    .filter(|l| l.account_code.starts_with('3'))
13684                    .map(|l| l.credit_amount)
13685                    .sum();
13686
13687                let pretax_income = company_revenue - total_expenses;
13688
13689                // If no company-specific data, fall back to proportional share
13690                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13691                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13692                        .unwrap_or(rust_decimal::Decimal::ONE);
13693                    (
13694                        total_revenue * w,
13695                        total_revenue * w * rust_decimal::Decimal::from(3),
13696                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13697                        total_revenue * w * rust_decimal::Decimal::from(2),
13698                    )
13699                } else {
13700                    (company_revenue, total_assets, pretax_income, equity)
13701                };
13702
13703                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13704
13705                materiality_inputs.push(MaterialityInput {
13706                    entity_code: company_code,
13707                    period: format!("FY{}", fiscal_year),
13708                    revenue: rev,
13709                    pretax_income: pti,
13710                    total_assets: assets,
13711                    equity: eq,
13712                    gross_profit,
13713                });
13714            }
13715
13716            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13717
13718            info!(
13719                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13720                 {} total assets, {} equity benchmarks)",
13721                snapshot.materiality_calculations.len(),
13722                snapshot
13723                    .materiality_calculations
13724                    .iter()
13725                    .filter(|m| matches!(
13726                        m.benchmark,
13727                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13728                    ))
13729                    .count(),
13730                snapshot
13731                    .materiality_calculations
13732                    .iter()
13733                    .filter(|m| matches!(
13734                        m.benchmark,
13735                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13736                    ))
13737                    .count(),
13738                snapshot
13739                    .materiality_calculations
13740                    .iter()
13741                    .filter(|m| matches!(
13742                        m.benchmark,
13743                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13744                    ))
13745                    .count(),
13746                snapshot
13747                    .materiality_calculations
13748                    .iter()
13749                    .filter(|m| matches!(
13750                        m.benchmark,
13751                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13752                    ))
13753                    .count(),
13754            );
13755        }
13756
13757        // ----------------------------------------------------------------
13758        // ISA 315: Combined Risk Assessments (per entity, per account area)
13759        // ----------------------------------------------------------------
13760        {
13761            use datasynth_generators::audit::cra_generator::CraGenerator;
13762
13763            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13764
13765            // Build entity → scope_id map from already-generated scopes
13766            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13767                .audit_scopes
13768                .iter()
13769                .map(|s| (s.entity_code.clone(), s.id.clone()))
13770                .collect();
13771
13772            for company in &self.config.companies {
13773                let cras = cra_gen.generate_for_entity(&company.code, None);
13774                let scope_id = entity_scope_map.get(&company.code).cloned();
13775                let cras_with_scope: Vec<_> = cras
13776                    .into_iter()
13777                    .map(|mut cra| {
13778                        cra.scope_id = scope_id.clone();
13779                        cra
13780                    })
13781                    .collect();
13782                snapshot.combined_risk_assessments.extend(cras_with_scope);
13783            }
13784
13785            let significant_count = snapshot
13786                .combined_risk_assessments
13787                .iter()
13788                .filter(|c| c.significant_risk)
13789                .count();
13790            let high_cra_count = snapshot
13791                .combined_risk_assessments
13792                .iter()
13793                .filter(|c| {
13794                    matches!(
13795                        c.combined_risk,
13796                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13797                    )
13798                })
13799                .count();
13800
13801            info!(
13802                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13803                snapshot.combined_risk_assessments.len(),
13804                significant_count,
13805                high_cra_count,
13806            );
13807        }
13808
13809        // ----------------------------------------------------------------
13810        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13811        // ----------------------------------------------------------------
13812        {
13813            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13814
13815            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13816
13817            // Group CRAs by entity and use per-entity tolerable error from materiality
13818            for company in &self.config.companies {
13819                let entity_code = company.code.clone();
13820
13821                // Find tolerable error for this entity (= performance materiality)
13822                let tolerable_error = snapshot
13823                    .materiality_calculations
13824                    .iter()
13825                    .find(|m| m.entity_code == entity_code)
13826                    .map(|m| m.tolerable_error);
13827
13828                // Collect CRAs for this entity
13829                let entity_cras: Vec<_> = snapshot
13830                    .combined_risk_assessments
13831                    .iter()
13832                    .filter(|c| c.entity_code == entity_code)
13833                    .cloned()
13834                    .collect();
13835
13836                if !entity_cras.is_empty() {
13837                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13838                    snapshot.sampling_plans.extend(plans);
13839                    snapshot.sampled_items.extend(items);
13840                }
13841            }
13842
13843            let misstatement_count = snapshot
13844                .sampled_items
13845                .iter()
13846                .filter(|i| i.misstatement_found)
13847                .count();
13848
13849            info!(
13850                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13851                snapshot.sampling_plans.len(),
13852                snapshot.sampled_items.len(),
13853                misstatement_count,
13854            );
13855        }
13856
13857        // ----------------------------------------------------------------
13858        // ISA 315: Significant Classes of Transactions (SCOTS)
13859        // ----------------------------------------------------------------
13860        {
13861            use datasynth_generators::audit::scots_generator::{
13862                ScotsGenerator, ScotsGeneratorConfig,
13863            };
13864
13865            let ic_enabled = self.config.intercompany.enabled;
13866
13867            let config = ScotsGeneratorConfig {
13868                intercompany_enabled: ic_enabled,
13869                ..ScotsGeneratorConfig::default()
13870            };
13871            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13872
13873            for company in &self.config.companies {
13874                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13875                snapshot
13876                    .significant_transaction_classes
13877                    .extend(entity_scots);
13878            }
13879
13880            let estimation_count = snapshot
13881                .significant_transaction_classes
13882                .iter()
13883                .filter(|s| {
13884                    matches!(
13885                        s.transaction_type,
13886                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13887                    )
13888                })
13889                .count();
13890
13891            info!(
13892                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13893                snapshot.significant_transaction_classes.len(),
13894                estimation_count,
13895            );
13896        }
13897
13898        // ----------------------------------------------------------------
13899        // ISA 520: Unusual Item Markers
13900        // ----------------------------------------------------------------
13901        {
13902            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13903
13904            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13905            let entity_codes: Vec<String> = self
13906                .config
13907                .companies
13908                .iter()
13909                .map(|c| c.code.clone())
13910                .collect();
13911            let unusual_flags =
13912                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13913            info!(
13914                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13915                unusual_flags.len(),
13916                unusual_flags
13917                    .iter()
13918                    .filter(|f| matches!(
13919                        f.severity,
13920                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13921                    ))
13922                    .count(),
13923                unusual_flags
13924                    .iter()
13925                    .filter(|f| matches!(
13926                        f.severity,
13927                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13928                    ))
13929                    .count(),
13930                unusual_flags
13931                    .iter()
13932                    .filter(|f| matches!(
13933                        f.severity,
13934                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13935                    ))
13936                    .count(),
13937            );
13938            snapshot.unusual_items = unusual_flags;
13939        }
13940
13941        // ----------------------------------------------------------------
13942        // ISA 520: Analytical Relationships
13943        // ----------------------------------------------------------------
13944        {
13945            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13946
13947            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13948            let entity_codes: Vec<String> = self
13949                .config
13950                .companies
13951                .iter()
13952                .map(|c| c.code.clone())
13953                .collect();
13954            let current_period_label = format!("FY{fiscal_year}");
13955            let prior_period_label = format!("FY{}", fiscal_year - 1);
13956            let analytical_rels = ar_gen.generate_for_entities(
13957                &entity_codes,
13958                entries,
13959                &current_period_label,
13960                &prior_period_label,
13961            );
13962            let out_of_range = analytical_rels
13963                .iter()
13964                .filter(|r| !r.within_expected_range)
13965                .count();
13966            info!(
13967                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13968                analytical_rels.len(),
13969                out_of_range,
13970            );
13971            snapshot.analytical_relationships = analytical_rels;
13972        }
13973
13974        if let Some(pb) = pb {
13975            pb.finish_with_message(format!(
13976                "Audit data: {} engagements, {} workpapers, {} evidence, \
13977                 {} confirmations, {} procedure steps, {} samples, \
13978                 {} analytical, {} IA funcs, {} related parties, \
13979                 {} component auditors, {} letters, {} subsequent events, \
13980                 {} service orgs, {} going concern, {} accounting estimates, \
13981                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13982                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13983                 {} unusual items, {} analytical relationships",
13984                snapshot.engagements.len(),
13985                snapshot.workpapers.len(),
13986                snapshot.evidence.len(),
13987                snapshot.confirmations.len(),
13988                snapshot.procedure_steps.len(),
13989                snapshot.samples.len(),
13990                snapshot.analytical_results.len(),
13991                snapshot.ia_functions.len(),
13992                snapshot.related_parties.len(),
13993                snapshot.component_auditors.len(),
13994                snapshot.engagement_letters.len(),
13995                snapshot.subsequent_events.len(),
13996                snapshot.service_organizations.len(),
13997                snapshot.going_concern_assessments.len(),
13998                snapshot.accounting_estimates.len(),
13999                snapshot.audit_opinions.len(),
14000                snapshot.key_audit_matters.len(),
14001                snapshot.sox_302_certifications.len(),
14002                snapshot.sox_404_assessments.len(),
14003                snapshot.materiality_calculations.len(),
14004                snapshot.combined_risk_assessments.len(),
14005                snapshot.sampling_plans.len(),
14006                snapshot.significant_transaction_classes.len(),
14007                snapshot.unusual_items.len(),
14008                snapshot.analytical_relationships.len(),
14009            ));
14010        }
14011
14012        // ----------------------------------------------------------------
14013        // PCAOB-ISA cross-reference mappings
14014        // ----------------------------------------------------------------
14015        // Always include the standard PCAOB-ISA mappings when audit generation is
14016        // enabled. These are static reference data (no randomness required) so we
14017        // call standard_mappings() directly.
14018        {
14019            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14020            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14021            debug!(
14022                "PCAOB-ISA mappings generated: {} mappings",
14023                snapshot.isa_pcaob_mappings.len()
14024            );
14025        }
14026
14027        // ----------------------------------------------------------------
14028        // ISA standard reference entries
14029        // ----------------------------------------------------------------
14030        // Emit flat ISA standard reference data (number, title, series) so
14031        // consumers get a machine-readable listing of all 34 ISA standards in
14032        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
14033        {
14034            use datasynth_standards::audit::isa_reference::IsaStandard;
14035            snapshot.isa_mappings = IsaStandard::standard_entries();
14036            debug!(
14037                "ISA standard entries generated: {} standards",
14038                snapshot.isa_mappings.len()
14039            );
14040        }
14041
14042        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
14043        // For each RPT, find the chronologically closest JE for the engagement's entity.
14044        {
14045            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14046                .engagements
14047                .iter()
14048                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14049                .collect();
14050
14051            for rpt in &mut snapshot.related_party_transactions {
14052                if rpt.journal_entry_id.is_some() {
14053                    continue; // already set
14054                }
14055                let entity = engagement_by_id
14056                    .get(&rpt.engagement_id.to_string())
14057                    .copied()
14058                    .unwrap_or("");
14059
14060                // Find closest JE by date in the entity's company
14061                let best_je = entries
14062                    .iter()
14063                    .filter(|je| je.header.company_code == entity)
14064                    .min_by_key(|je| {
14065                        (je.header.posting_date - rpt.transaction_date)
14066                            .num_days()
14067                            .abs()
14068                    });
14069
14070                if let Some(je) = best_je {
14071                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
14072                }
14073            }
14074
14075            let linked = snapshot
14076                .related_party_transactions
14077                .iter()
14078                .filter(|t| t.journal_entry_id.is_some())
14079                .count();
14080            debug!(
14081                "Linked {}/{} related party transactions to journal entries",
14082                linked,
14083                snapshot.related_party_transactions.len()
14084            );
14085        }
14086
14087        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
14088        // One opinion per engagement, derived from that engagement's findings,
14089        // going-concern assessment, and any component-auditor reports. Fills
14090        // `audit_opinions` + a flattened `key_audit_matters` for downstream
14091        // export.
14092        if !snapshot.engagements.is_empty() {
14093            use datasynth_generators::audit_opinion_generator::{
14094                AuditOpinionGenerator, AuditOpinionInput,
14095            };
14096
14097            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14098            let inputs: Vec<AuditOpinionInput> = snapshot
14099                .engagements
14100                .iter()
14101                .map(|eng| {
14102                    let findings = snapshot
14103                        .findings
14104                        .iter()
14105                        .filter(|f| f.engagement_id == eng.engagement_id)
14106                        .cloned()
14107                        .collect();
14108                    let going_concern = snapshot
14109                        .going_concern_assessments
14110                        .iter()
14111                        .find(|gc| gc.entity_code == eng.client_entity_id)
14112                        .cloned();
14113                    // ComponentAuditorReport doesn't carry an engagement id, but
14114                    // component scope is keyed by `entity_code`, so filter on that.
14115                    let component_reports = snapshot
14116                        .component_reports
14117                        .iter()
14118                        .filter(|r| r.entity_code == eng.client_entity_id)
14119                        .cloned()
14120                        .collect();
14121
14122                    AuditOpinionInput {
14123                        entity_code: eng.client_entity_id.clone(),
14124                        entity_name: eng.client_name.clone(),
14125                        engagement_id: eng.engagement_id,
14126                        period_end: eng.period_end_date,
14127                        findings,
14128                        going_concern,
14129                        component_reports,
14130                        is_us_listed: matches!(
14131                            eng.engagement_type,
14132                            datasynth_core::audit::EngagementType::IntegratedAudit
14133                                | datasynth_core::audit::EngagementType::Sox404
14134                        ),
14135                        auditor_name: "DataSynth Audit LLP".to_string(),
14136                        engagement_partner: "Engagement Partner".to_string(),
14137                    }
14138                })
14139                .collect();
14140
14141            let generated = opinion_gen.generate_batch(&inputs);
14142            for g in generated {
14143                snapshot.key_audit_matters.extend(g.key_audit_matters);
14144                snapshot.audit_opinions.push(g.opinion);
14145            }
14146            debug!(
14147                "Generated {} audit opinions with {} key audit matters",
14148                snapshot.audit_opinions.len(),
14149                snapshot.key_audit_matters.len()
14150            );
14151        }
14152
14153        Ok(snapshot)
14154    }
14155
14156    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
14157    ///
14158    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
14159    /// from the current orchestrator state, runs the FSM engine, and maps the
14160    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
14161    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
14162    fn generate_audit_data_with_fsm(
14163        &mut self,
14164        entries: &[JournalEntry],
14165    ) -> SynthResult<AuditSnapshot> {
14166        use datasynth_audit_fsm::{
14167            context::EngagementContext,
14168            engine::AuditFsmEngine,
14169            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14170        };
14171        use rand::SeedableRng;
14172        use rand_chacha::ChaCha8Rng;
14173
14174        info!("Audit FSM: generating audit data via FSM engine");
14175
14176        let fsm_config = self
14177            .config
14178            .audit
14179            .fsm
14180            .as_ref()
14181            .expect("FSM config must be present when FSM is enabled");
14182
14183        // 1. Load blueprint from config string.
14184        let bwp = match fsm_config.blueprint.as_str() {
14185            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14186            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14187            _ => {
14188                warn!(
14189                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14190                    fsm_config.blueprint
14191                );
14192                BlueprintWithPreconditions::load_builtin_fsa()
14193            }
14194        }
14195        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14196
14197        // 2. Load overlay from config string.
14198        let overlay = match fsm_config.overlay.as_str() {
14199            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14200            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14201            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14202            _ => {
14203                warn!(
14204                    "Unknown FSM overlay '{}', falling back to builtin:default",
14205                    fsm_config.overlay
14206                );
14207                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14208            }
14209        }
14210        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14211
14212        // 3. Build EngagementContext from orchestrator state.
14213        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14214            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14215        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14216
14217        // Determine the engagement entity early so we can filter JEs.
14218        let company = self.config.companies.first();
14219        let company_code = company
14220            .map(|c| c.code.clone())
14221            .unwrap_or_else(|| "UNKNOWN".to_string());
14222        let company_name = company
14223            .map(|c| c.name.clone())
14224            .unwrap_or_else(|| "Unknown Company".to_string());
14225        let currency = company
14226            .map(|c| c.currency.clone())
14227            .unwrap_or_else(|| "USD".to_string());
14228
14229        // Filter JEs to the engagement entity for single-company coherence.
14230        let entity_entries: Vec<_> = entries
14231            .iter()
14232            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14233            .cloned()
14234            .collect();
14235        let entries = &entity_entries; // Shadow the parameter for remaining usage
14236
14237        // Financial aggregates from journal entries.
14238        let total_revenue: rust_decimal::Decimal = entries
14239            .iter()
14240            .flat_map(|e| e.lines.iter())
14241            .filter(|l| l.account_code.starts_with('4'))
14242            .map(|l| l.credit_amount - l.debit_amount)
14243            .sum();
14244
14245        let total_assets: rust_decimal::Decimal = entries
14246            .iter()
14247            .flat_map(|e| e.lines.iter())
14248            .filter(|l| l.account_code.starts_with('1'))
14249            .map(|l| l.debit_amount - l.credit_amount)
14250            .sum();
14251
14252        let total_expenses: rust_decimal::Decimal = entries
14253            .iter()
14254            .flat_map(|e| e.lines.iter())
14255            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14256            .map(|l| l.debit_amount)
14257            .sum();
14258
14259        let equity: rust_decimal::Decimal = entries
14260            .iter()
14261            .flat_map(|e| e.lines.iter())
14262            .filter(|l| l.account_code.starts_with('3'))
14263            .map(|l| l.credit_amount - l.debit_amount)
14264            .sum();
14265
14266        let total_debt: rust_decimal::Decimal = entries
14267            .iter()
14268            .flat_map(|e| e.lines.iter())
14269            .filter(|l| l.account_code.starts_with('2'))
14270            .map(|l| l.credit_amount - l.debit_amount)
14271            .sum();
14272
14273        let pretax_income = total_revenue - total_expenses;
14274
14275        let cogs: rust_decimal::Decimal = entries
14276            .iter()
14277            .flat_map(|e| e.lines.iter())
14278            .filter(|l| l.account_code.starts_with('5'))
14279            .map(|l| l.debit_amount)
14280            .sum();
14281        let gross_profit = total_revenue - cogs;
14282
14283        let current_assets: rust_decimal::Decimal = entries
14284            .iter()
14285            .flat_map(|e| e.lines.iter())
14286            .filter(|l| {
14287                l.account_code.starts_with("10")
14288                    || l.account_code.starts_with("11")
14289                    || l.account_code.starts_with("12")
14290                    || l.account_code.starts_with("13")
14291            })
14292            .map(|l| l.debit_amount - l.credit_amount)
14293            .sum();
14294        let current_liabilities: rust_decimal::Decimal = entries
14295            .iter()
14296            .flat_map(|e| e.lines.iter())
14297            .filter(|l| {
14298                l.account_code.starts_with("20")
14299                    || l.account_code.starts_with("21")
14300                    || l.account_code.starts_with("22")
14301            })
14302            .map(|l| l.credit_amount - l.debit_amount)
14303            .sum();
14304        let working_capital = current_assets - current_liabilities;
14305
14306        let depreciation: rust_decimal::Decimal = entries
14307            .iter()
14308            .flat_map(|e| e.lines.iter())
14309            .filter(|l| l.account_code.starts_with("60"))
14310            .map(|l| l.debit_amount)
14311            .sum();
14312        let operating_cash_flow = pretax_income + depreciation;
14313
14314        // GL accounts for reference data.
14315        let accounts: Vec<String> = self
14316            .coa
14317            .as_ref()
14318            .map(|coa| {
14319                coa.get_postable_accounts()
14320                    .iter()
14321                    .map(|acc| acc.account_code().to_string())
14322                    .collect()
14323            })
14324            .unwrap_or_default();
14325
14326        // Team member IDs and display names from master data.
14327        let team_member_ids: Vec<String> = self
14328            .master_data
14329            .employees
14330            .iter()
14331            .take(8) // Cap team size
14332            .map(|e| e.employee_id.clone())
14333            .collect();
14334        let team_member_pairs: Vec<(String, String)> = self
14335            .master_data
14336            .employees
14337            .iter()
14338            .take(8)
14339            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14340            .collect();
14341
14342        let vendor_names: Vec<String> = self
14343            .master_data
14344            .vendors
14345            .iter()
14346            .map(|v| v.name.clone())
14347            .collect();
14348        let customer_names: Vec<String> = self
14349            .master_data
14350            .customers
14351            .iter()
14352            .map(|c| c.name.clone())
14353            .collect();
14354
14355        let entity_codes: Vec<String> = self
14356            .config
14357            .companies
14358            .iter()
14359            .map(|c| c.code.clone())
14360            .collect();
14361
14362        // Journal entry IDs for evidence tracing (sample up to 50).
14363        let journal_entry_ids: Vec<String> = entries
14364            .iter()
14365            .take(50)
14366            .map(|e| e.header.document_id.to_string())
14367            .collect();
14368
14369        // Account balances for risk weighting (aggregate debit - credit per account).
14370        let mut account_balances = std::collections::HashMap::<String, f64>::new();
14371        for entry in entries {
14372            for line in &entry.lines {
14373                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14374                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14375                *account_balances
14376                    .entry(line.account_code.clone())
14377                    .or_insert(0.0) += debit_f64 - credit_f64;
14378            }
14379        }
14380
14381        // Internal control IDs and anomaly refs are populated by the
14382        // caller when available; here we default to empty because the
14383        // orchestrator state may not have generated controls/anomalies
14384        // yet at this point in the pipeline.
14385        let control_ids: Vec<String> = Vec::new();
14386        let anomaly_refs: Vec<String> = Vec::new();
14387
14388        let mut context = EngagementContext {
14389            company_code,
14390            company_name,
14391            fiscal_year: start_date.year(),
14392            currency,
14393            total_revenue,
14394            total_assets,
14395            engagement_start: start_date,
14396            report_date: period_end,
14397            pretax_income,
14398            equity,
14399            gross_profit,
14400            working_capital,
14401            operating_cash_flow,
14402            total_debt,
14403            team_member_ids,
14404            team_member_pairs,
14405            accounts,
14406            vendor_names,
14407            customer_names,
14408            journal_entry_ids,
14409            account_balances,
14410            control_ids,
14411            anomaly_refs,
14412            journal_entries: entries.to_vec(),
14413            is_us_listed: false,
14414            entity_codes,
14415            auditor_firm_name: "DataSynth Audit LLP".into(),
14416            accounting_framework: self
14417                .config
14418                .accounting_standards
14419                .framework
14420                .map(|f| match f {
14421                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14422                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14423                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14424                        "French GAAP"
14425                    }
14426                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14427                        "German GAAP"
14428                    }
14429                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14430                        "Dual Reporting"
14431                    }
14432                })
14433                .unwrap_or("IFRS")
14434                .into(),
14435        };
14436
14437        // 4. Create and run the FSM engine.
14438        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14439        let rng = ChaCha8Rng::seed_from_u64(seed);
14440        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14441
14442        let mut result = engine
14443            .run_engagement(&context)
14444            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14445
14446        info!(
14447            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14448             {} phases completed, duration {:.1}h",
14449            result.event_log.len(),
14450            result.artifacts.total_artifacts(),
14451            result.anomalies.len(),
14452            result.phases_completed.len(),
14453            result.total_duration_hours,
14454        );
14455
14456        // 4b. Populate financial data in the artifact bag for downstream consumers.
14457        let tb_entity = context.company_code.clone();
14458        let tb_fy = context.fiscal_year;
14459        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14460        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14461            entries,
14462            &tb_entity,
14463            tb_fy,
14464            self.coa.as_ref().map(|c| c.as_ref()),
14465        );
14466
14467        // 5. Map ArtifactBag fields to AuditSnapshot.
14468        let bag = result.artifacts;
14469        let mut snapshot = AuditSnapshot {
14470            engagements: bag.engagements,
14471            engagement_letters: bag.engagement_letters,
14472            materiality_calculations: bag.materiality_calculations,
14473            risk_assessments: bag.risk_assessments,
14474            combined_risk_assessments: bag.combined_risk_assessments,
14475            workpapers: bag.workpapers,
14476            evidence: bag.evidence,
14477            findings: bag.findings,
14478            judgments: bag.judgments,
14479            sampling_plans: bag.sampling_plans,
14480            sampled_items: bag.sampled_items,
14481            analytical_results: bag.analytical_results,
14482            going_concern_assessments: bag.going_concern_assessments,
14483            subsequent_events: bag.subsequent_events,
14484            audit_opinions: bag.audit_opinions,
14485            key_audit_matters: bag.key_audit_matters,
14486            procedure_steps: bag.procedure_steps,
14487            samples: bag.samples,
14488            confirmations: bag.confirmations,
14489            confirmation_responses: bag.confirmation_responses,
14490            // Store the event trail for downstream export.
14491            fsm_event_trail: Some(result.event_log),
14492            // Fields not produced by the FSM engine remain at their defaults.
14493            ..Default::default()
14494        };
14495
14496        // 6. Add static reference data (same as legacy path).
14497        {
14498            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14499            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14500        }
14501        {
14502            use datasynth_standards::audit::isa_reference::IsaStandard;
14503            snapshot.isa_mappings = IsaStandard::standard_entries();
14504        }
14505
14506        info!(
14507            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14508             {} risk assessments, {} findings, {} materiality calcs",
14509            snapshot.engagements.len(),
14510            snapshot.workpapers.len(),
14511            snapshot.evidence.len(),
14512            snapshot.risk_assessments.len(),
14513            snapshot.findings.len(),
14514            snapshot.materiality_calculations.len(),
14515        );
14516
14517        Ok(snapshot)
14518    }
14519
14520    /// Export journal entries as graph data for ML training and network reconstruction.
14521    ///
14522    /// Builds a transaction graph where:
14523    /// - Nodes are GL accounts
14524    /// - Edges are money flows from credit to debit accounts
14525    /// - Edge attributes include amount, date, business process, anomaly flags
14526    fn export_graphs(
14527        &mut self,
14528        entries: &[JournalEntry],
14529        _coa: &Arc<ChartOfAccounts>,
14530        stats: &mut EnhancedGenerationStatistics,
14531    ) -> SynthResult<GraphExportSnapshot> {
14532        let pb = self.create_progress_bar(100, "Exporting Graphs");
14533
14534        let mut snapshot = GraphExportSnapshot::default();
14535
14536        // Get output directory
14537        let output_dir = self
14538            .output_path
14539            .clone()
14540            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14541        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14542
14543        // Process each graph type configuration
14544        for graph_type in &self.config.graph_export.graph_types {
14545            if let Some(pb) = &pb {
14546                pb.inc(10);
14547            }
14548
14549            // Build transaction graph
14550            let graph_config = TransactionGraphConfig {
14551                include_vendors: false,
14552                include_customers: false,
14553                create_debit_credit_edges: true,
14554                include_document_nodes: graph_type.include_document_nodes,
14555                min_edge_weight: graph_type.min_edge_weight,
14556                aggregate_parallel_edges: graph_type.aggregate_edges,
14557                framework: None,
14558            };
14559
14560            let mut builder = TransactionGraphBuilder::new(graph_config);
14561            builder.add_journal_entries(entries);
14562            let graph = builder.build();
14563
14564            // Update stats
14565            stats.graph_node_count += graph.node_count();
14566            stats.graph_edge_count += graph.edge_count();
14567
14568            if let Some(pb) = &pb {
14569                pb.inc(40);
14570            }
14571
14572            // Export to each configured format
14573            for format in &self.config.graph_export.formats {
14574                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14575
14576                // Create output directory
14577                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14578                    warn!("Failed to create graph output directory: {}", e);
14579                    continue;
14580                }
14581
14582                match format {
14583                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14584                        let pyg_config = PyGExportConfig {
14585                            common: datasynth_graph::CommonExportConfig {
14586                                export_node_features: true,
14587                                export_edge_features: true,
14588                                export_node_labels: true,
14589                                export_edge_labels: true,
14590                                export_masks: true,
14591                                train_ratio: self.config.graph_export.train_ratio,
14592                                val_ratio: self.config.graph_export.validation_ratio,
14593                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14594                            },
14595                            one_hot_categoricals: false,
14596                        };
14597
14598                        let exporter = PyGExporter::new(pyg_config);
14599                        match exporter.export(&graph, &format_dir) {
14600                            Ok(metadata) => {
14601                                snapshot.exports.insert(
14602                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14603                                    GraphExportInfo {
14604                                        name: graph_type.name.clone(),
14605                                        format: "pytorch_geometric".to_string(),
14606                                        output_path: format_dir.clone(),
14607                                        node_count: metadata.num_nodes,
14608                                        edge_count: metadata.num_edges,
14609                                    },
14610                                );
14611                                snapshot.graph_count += 1;
14612                            }
14613                            Err(e) => {
14614                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14615                            }
14616                        }
14617                    }
14618                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14619                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14620
14621                        let neo4j_config = Neo4jExportConfig {
14622                            export_node_properties: true,
14623                            export_edge_properties: true,
14624                            export_features: true,
14625                            generate_cypher: true,
14626                            generate_admin_import: true,
14627                            database_name: "synth".to_string(),
14628                            cypher_batch_size: 1000,
14629                        };
14630
14631                        let exporter = Neo4jExporter::new(neo4j_config);
14632                        match exporter.export(&graph, &format_dir) {
14633                            Ok(metadata) => {
14634                                snapshot.exports.insert(
14635                                    format!("{}_{}", graph_type.name, "neo4j"),
14636                                    GraphExportInfo {
14637                                        name: graph_type.name.clone(),
14638                                        format: "neo4j".to_string(),
14639                                        output_path: format_dir.clone(),
14640                                        node_count: metadata.num_nodes,
14641                                        edge_count: metadata.num_edges,
14642                                    },
14643                                );
14644                                snapshot.graph_count += 1;
14645                            }
14646                            Err(e) => {
14647                                warn!("Failed to export Neo4j graph: {}", e);
14648                            }
14649                        }
14650                    }
14651                    datasynth_config::schema::GraphExportFormat::Dgl => {
14652                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14653
14654                        let dgl_config = DGLExportConfig {
14655                            common: datasynth_graph::CommonExportConfig {
14656                                export_node_features: true,
14657                                export_edge_features: true,
14658                                export_node_labels: true,
14659                                export_edge_labels: true,
14660                                export_masks: true,
14661                                train_ratio: self.config.graph_export.train_ratio,
14662                                val_ratio: self.config.graph_export.validation_ratio,
14663                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14664                            },
14665                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14666                            include_pickle_script: true, // DGL ecosystem standard helper
14667                        };
14668
14669                        let exporter = DGLExporter::new(dgl_config);
14670                        match exporter.export(&graph, &format_dir) {
14671                            Ok(metadata) => {
14672                                snapshot.exports.insert(
14673                                    format!("{}_{}", graph_type.name, "dgl"),
14674                                    GraphExportInfo {
14675                                        name: graph_type.name.clone(),
14676                                        format: "dgl".to_string(),
14677                                        output_path: format_dir.clone(),
14678                                        node_count: metadata.common.num_nodes,
14679                                        edge_count: metadata.common.num_edges,
14680                                    },
14681                                );
14682                                snapshot.graph_count += 1;
14683                            }
14684                            Err(e) => {
14685                                warn!("Failed to export DGL graph: {}", e);
14686                            }
14687                        }
14688                    }
14689                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14690                        use datasynth_graph::{
14691                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14692                        };
14693
14694                        let rustgraph_config = RustGraphExportConfig {
14695                            include_features: true,
14696                            include_temporal: true,
14697                            include_labels: true,
14698                            source_name: "datasynth".to_string(),
14699                            batch_id: None,
14700                            output_format: RustGraphOutputFormat::JsonLines,
14701                            export_node_properties: true,
14702                            export_edge_properties: true,
14703                            pretty_print: false,
14704                        };
14705
14706                        let exporter = RustGraphExporter::new(rustgraph_config);
14707                        match exporter.export(&graph, &format_dir) {
14708                            Ok(metadata) => {
14709                                snapshot.exports.insert(
14710                                    format!("{}_{}", graph_type.name, "rustgraph"),
14711                                    GraphExportInfo {
14712                                        name: graph_type.name.clone(),
14713                                        format: "rustgraph".to_string(),
14714                                        output_path: format_dir.clone(),
14715                                        node_count: metadata.num_nodes,
14716                                        edge_count: metadata.num_edges,
14717                                    },
14718                                );
14719                                snapshot.graph_count += 1;
14720                            }
14721                            Err(e) => {
14722                                warn!("Failed to export RustGraph: {}", e);
14723                            }
14724                        }
14725                    }
14726                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14727                        // Hypergraph export is handled separately in Phase 10b
14728                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14729                    }
14730                }
14731            }
14732
14733            if let Some(pb) = &pb {
14734                pb.inc(40);
14735            }
14736        }
14737
14738        stats.graph_export_count = snapshot.graph_count;
14739        snapshot.exported = snapshot.graph_count > 0;
14740
14741        if let Some(pb) = pb {
14742            pb.finish_with_message(format!(
14743                "Graphs exported: {} graphs ({} nodes, {} edges)",
14744                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14745            ));
14746        }
14747
14748        Ok(snapshot)
14749    }
14750
14751    /// Build additional graph types (banking, approval, entity) when relevant data
14752    /// is available. These run as a late phase because the data they need (banking
14753    /// snapshot, intercompany snapshot) is only generated after the main graph
14754    /// export phase.
14755    fn build_additional_graphs(
14756        &self,
14757        banking: &BankingSnapshot,
14758        intercompany: &IntercompanySnapshot,
14759        entries: &[JournalEntry],
14760        stats: &mut EnhancedGenerationStatistics,
14761    ) {
14762        let output_dir = self
14763            .output_path
14764            .clone()
14765            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14766        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14767
14768        // Banking graph: build when banking customers and transactions exist
14769        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14770            info!("Phase 10c: Building banking network graph");
14771            let config = BankingGraphConfig::default();
14772            let mut builder = BankingGraphBuilder::new(config);
14773            builder.add_customers(&banking.customers);
14774            builder.add_accounts(&banking.accounts, &banking.customers);
14775            builder.add_transactions(&banking.transactions);
14776            let graph = builder.build();
14777
14778            let node_count = graph.node_count();
14779            let edge_count = graph.edge_count();
14780            stats.graph_node_count += node_count;
14781            stats.graph_edge_count += edge_count;
14782
14783            // Export as PyG if configured
14784            for format in &self.config.graph_export.formats {
14785                if matches!(
14786                    format,
14787                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14788                ) {
14789                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14790                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14791                        warn!("Failed to create banking graph output dir: {}", e);
14792                        continue;
14793                    }
14794                    let pyg_config = PyGExportConfig::default();
14795                    let exporter = PyGExporter::new(pyg_config);
14796                    if let Err(e) = exporter.export(&graph, &format_dir) {
14797                        warn!("Failed to export banking graph as PyG: {}", e);
14798                    } else {
14799                        info!(
14800                            "Banking network graph exported: {} nodes, {} edges",
14801                            node_count, edge_count
14802                        );
14803                    }
14804                }
14805            }
14806        }
14807
14808        // Approval graph: build from journal entry approval workflows
14809        let approval_entries: Vec<_> = entries
14810            .iter()
14811            .filter(|je| je.header.approval_workflow.is_some())
14812            .collect();
14813
14814        if !approval_entries.is_empty() {
14815            info!(
14816                "Phase 10c: Building approval network graph ({} entries with approvals)",
14817                approval_entries.len()
14818            );
14819            let config = ApprovalGraphConfig::default();
14820            let mut builder = ApprovalGraphBuilder::new(config);
14821
14822            for je in &approval_entries {
14823                if let Some(ref wf) = je.header.approval_workflow {
14824                    for action in &wf.actions {
14825                        let record = datasynth_core::models::ApprovalRecord {
14826                            approval_id: format!(
14827                                "APR-{}-{}",
14828                                je.header.document_id, action.approval_level
14829                            ),
14830                            document_number: je.header.document_id.to_string(),
14831                            document_type: "JE".to_string(),
14832                            company_code: je.company_code().to_string(),
14833                            requester_id: wf.preparer_id.clone(),
14834                            requester_name: Some(wf.preparer_name.clone()),
14835                            approver_id: action.actor_id.clone(),
14836                            approver_name: action.actor_name.clone(),
14837                            approval_date: je.posting_date(),
14838                            action: format!("{:?}", action.action),
14839                            amount: wf.amount,
14840                            approval_limit: None,
14841                            comments: action.comments.clone(),
14842                            delegation_from: None,
14843                            is_auto_approved: false,
14844                        };
14845                        builder.add_approval(&record);
14846                    }
14847                }
14848            }
14849
14850            let graph = builder.build();
14851            let node_count = graph.node_count();
14852            let edge_count = graph.edge_count();
14853            stats.graph_node_count += node_count;
14854            stats.graph_edge_count += edge_count;
14855
14856            // Export as PyG if configured
14857            for format in &self.config.graph_export.formats {
14858                if matches!(
14859                    format,
14860                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14861                ) {
14862                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14863                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14864                        warn!("Failed to create approval graph output dir: {}", e);
14865                        continue;
14866                    }
14867                    let pyg_config = PyGExportConfig::default();
14868                    let exporter = PyGExporter::new(pyg_config);
14869                    if let Err(e) = exporter.export(&graph, &format_dir) {
14870                        warn!("Failed to export approval graph as PyG: {}", e);
14871                    } else {
14872                        info!(
14873                            "Approval network graph exported: {} nodes, {} edges",
14874                            node_count, edge_count
14875                        );
14876                    }
14877                }
14878            }
14879        }
14880
14881        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14882        if self.config.companies.len() >= 2 {
14883            info!(
14884                "Phase 10c: Building entity relationship graph ({} companies)",
14885                self.config.companies.len()
14886            );
14887
14888            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14889                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14890
14891            // Map CompanyConfig → Company objects
14892            let parent_code = &self.config.companies[0].code;
14893            let mut companies: Vec<datasynth_core::models::Company> =
14894                Vec::with_capacity(self.config.companies.len());
14895
14896            // First company is the parent
14897            let first = &self.config.companies[0];
14898            companies.push(datasynth_core::models::Company::parent(
14899                &first.code,
14900                &first.name,
14901                &first.country,
14902                &first.currency,
14903            ));
14904
14905            // Remaining companies are subsidiaries (100% owned by parent)
14906            for cc in self.config.companies.iter().skip(1) {
14907                companies.push(datasynth_core::models::Company::subsidiary(
14908                    &cc.code,
14909                    &cc.name,
14910                    &cc.country,
14911                    &cc.currency,
14912                    parent_code,
14913                    rust_decimal::Decimal::from(100),
14914                ));
14915            }
14916
14917            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14918            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14919                self.config
14920                    .companies
14921                    .iter()
14922                    .skip(1)
14923                    .enumerate()
14924                    .map(|(i, cc)| {
14925                        let mut rel =
14926                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14927                                format!("REL{:03}", i + 1),
14928                                parent_code.clone(),
14929                                cc.code.clone(),
14930                                rust_decimal::Decimal::from(100),
14931                                start_date,
14932                            );
14933                        rel.functional_currency = cc.currency.clone();
14934                        rel
14935                    })
14936                    .collect();
14937
14938            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14939            builder.add_companies(&companies);
14940            builder.add_ownership_relationships(&relationships);
14941
14942            // Thread IC matched-pair transaction edges into the entity graph
14943            for pair in &intercompany.matched_pairs {
14944                builder.add_intercompany_edge(
14945                    &pair.seller_company,
14946                    &pair.buyer_company,
14947                    pair.amount,
14948                    &format!("{:?}", pair.transaction_type),
14949                );
14950            }
14951
14952            let graph = builder.build();
14953            let node_count = graph.node_count();
14954            let edge_count = graph.edge_count();
14955            stats.graph_node_count += node_count;
14956            stats.graph_edge_count += edge_count;
14957
14958            // Export as PyG if configured
14959            for format in &self.config.graph_export.formats {
14960                if matches!(
14961                    format,
14962                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14963                ) {
14964                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14965                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14966                        warn!("Failed to create entity graph output dir: {}", e);
14967                        continue;
14968                    }
14969                    let pyg_config = PyGExportConfig::default();
14970                    let exporter = PyGExporter::new(pyg_config);
14971                    if let Err(e) = exporter.export(&graph, &format_dir) {
14972                        warn!("Failed to export entity graph as PyG: {}", e);
14973                    } else {
14974                        info!(
14975                            "Entity relationship graph exported: {} nodes, {} edges",
14976                            node_count, edge_count
14977                        );
14978                    }
14979                }
14980            }
14981        } else {
14982            debug!(
14983                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14984                self.config.companies.len()
14985            );
14986        }
14987    }
14988
14989    /// Export a multi-layer hypergraph for RustGraph integration.
14990    ///
14991    /// Builds a 3-layer hypergraph:
14992    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14993    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14994    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14995    #[allow(clippy::too_many_arguments)]
14996    fn export_hypergraph(
14997        &self,
14998        coa: &Arc<ChartOfAccounts>,
14999        entries: &[JournalEntry],
15000        document_flows: &DocumentFlowSnapshot,
15001        sourcing: &SourcingSnapshot,
15002        hr: &HrSnapshot,
15003        manufacturing: &ManufacturingSnapshot,
15004        banking: &BankingSnapshot,
15005        audit: &AuditSnapshot,
15006        financial_reporting: &FinancialReportingSnapshot,
15007        ocpm: &OcpmSnapshot,
15008        compliance: &ComplianceRegulationsSnapshot,
15009        stats: &mut EnhancedGenerationStatistics,
15010    ) -> SynthResult<HypergraphExportInfo> {
15011        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15012        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15013        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15014        use datasynth_graph::models::hypergraph::AggregationStrategy;
15015
15016        let hg_settings = &self.config.graph_export.hypergraph;
15017
15018        // Parse aggregation strategy from config string
15019        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15020            "truncate" => AggregationStrategy::Truncate,
15021            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15022            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15023            "importance_sample" => AggregationStrategy::ImportanceSample,
15024            _ => AggregationStrategy::PoolByCounterparty,
15025        };
15026
15027        let builder_config = HypergraphConfig {
15028            max_nodes: hg_settings.max_nodes,
15029            aggregation_strategy,
15030            include_coso: hg_settings.governance_layer.include_coso,
15031            include_controls: hg_settings.governance_layer.include_controls,
15032            include_sox: hg_settings.governance_layer.include_sox,
15033            include_vendors: hg_settings.governance_layer.include_vendors,
15034            include_customers: hg_settings.governance_layer.include_customers,
15035            include_employees: hg_settings.governance_layer.include_employees,
15036            include_p2p: hg_settings.process_layer.include_p2p,
15037            include_o2c: hg_settings.process_layer.include_o2c,
15038            include_s2c: hg_settings.process_layer.include_s2c,
15039            include_h2r: hg_settings.process_layer.include_h2r,
15040            include_mfg: hg_settings.process_layer.include_mfg,
15041            include_bank: hg_settings.process_layer.include_bank,
15042            include_audit: hg_settings.process_layer.include_audit,
15043            include_r2r: hg_settings.process_layer.include_r2r,
15044            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15045            docs_per_counterparty_threshold: hg_settings
15046                .process_layer
15047                .docs_per_counterparty_threshold,
15048            include_accounts: hg_settings.accounting_layer.include_accounts,
15049            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15050            include_cross_layer_edges: hg_settings.cross_layer.enabled,
15051            include_compliance: self.config.compliance_regulations.enabled,
15052            include_tax: true,
15053            include_treasury: true,
15054            include_esg: true,
15055            include_project: true,
15056            include_intercompany: true,
15057            include_temporal_events: true,
15058        };
15059
15060        let mut builder = HypergraphBuilder::new(builder_config);
15061
15062        // Layer 1: Governance & Controls
15063        builder.add_coso_framework();
15064
15065        // Add controls if available (generated during JE generation)
15066        // Controls are generated per-company; we use the standard set
15067        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15068            let controls = InternalControl::standard_controls();
15069            builder.add_controls(&controls);
15070        }
15071
15072        // Add master data
15073        builder.add_vendors(&self.master_data.vendors);
15074        builder.add_customers(&self.master_data.customers);
15075        builder.add_employees(&self.master_data.employees);
15076
15077        // Layer 2: Process Events (all process families)
15078        builder.add_p2p_documents(
15079            &document_flows.purchase_orders,
15080            &document_flows.goods_receipts,
15081            &document_flows.vendor_invoices,
15082            &document_flows.payments,
15083        );
15084        builder.add_o2c_documents(
15085            &document_flows.sales_orders,
15086            &document_flows.deliveries,
15087            &document_flows.customer_invoices,
15088        );
15089        builder.add_s2c_documents(
15090            &sourcing.sourcing_projects,
15091            &sourcing.qualifications,
15092            &sourcing.rfx_events,
15093            &sourcing.bids,
15094            &sourcing.bid_evaluations,
15095            &sourcing.contracts,
15096        );
15097        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15098        builder.add_mfg_documents(
15099            &manufacturing.production_orders,
15100            &manufacturing.quality_inspections,
15101            &manufacturing.cycle_counts,
15102        );
15103        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15104        builder.add_audit_documents(
15105            &audit.engagements,
15106            &audit.workpapers,
15107            &audit.findings,
15108            &audit.evidence,
15109            &audit.risk_assessments,
15110            &audit.judgments,
15111            &audit.materiality_calculations,
15112            &audit.audit_opinions,
15113            &audit.going_concern_assessments,
15114        );
15115        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15116
15117        // OCPM events as hyperedges
15118        if let Some(ref event_log) = ocpm.event_log {
15119            builder.add_ocpm_events(event_log);
15120        }
15121
15122        // Compliance regulations as cross-layer nodes
15123        if self.config.compliance_regulations.enabled
15124            && hg_settings.governance_layer.include_controls
15125        {
15126            // Reconstruct ComplianceStandard objects from the registry
15127            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15128            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15129                .standard_records
15130                .iter()
15131                .filter_map(|r| {
15132                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15133                    registry.get(&sid).cloned()
15134                })
15135                .collect();
15136
15137            builder.add_compliance_regulations(
15138                &standards,
15139                &compliance.findings,
15140                &compliance.filings,
15141            );
15142        }
15143
15144        // Layer 3: Accounting Network
15145        builder.add_accounts(coa);
15146        builder.add_journal_entries_as_hyperedges(entries);
15147
15148        // Build the hypergraph
15149        let hypergraph = builder.build();
15150
15151        // Export
15152        let output_dir = self
15153            .output_path
15154            .clone()
15155            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15156        let hg_dir = output_dir
15157            .join(&self.config.graph_export.output_subdirectory)
15158            .join(&hg_settings.output_subdirectory);
15159
15160        // Branch on output format
15161        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15162            "unified" => {
15163                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15164                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15165                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15166                })?;
15167                (
15168                    metadata.num_nodes,
15169                    metadata.num_edges,
15170                    metadata.num_hyperedges,
15171                )
15172            }
15173            _ => {
15174                // "native" or any unrecognized format → use existing exporter
15175                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15176                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15177                    SynthError::generation(format!("Hypergraph export failed: {e}"))
15178                })?;
15179                (
15180                    metadata.num_nodes,
15181                    metadata.num_edges,
15182                    metadata.num_hyperedges,
15183                )
15184            }
15185        };
15186
15187        // Stream to RustGraph ingest endpoint if configured
15188        #[cfg(feature = "streaming")]
15189        if let Some(ref target_url) = hg_settings.stream_target {
15190            use crate::stream_client::{StreamClient, StreamConfig};
15191            use std::io::Write as _;
15192
15193            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15194            let stream_config = StreamConfig {
15195                target_url: target_url.clone(),
15196                batch_size: hg_settings.stream_batch_size,
15197                api_key,
15198                ..StreamConfig::default()
15199            };
15200
15201            match StreamClient::new(stream_config) {
15202                Ok(mut client) => {
15203                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15204                    match exporter.export_to_writer(&hypergraph, &mut client) {
15205                        Ok(_) => {
15206                            if let Err(e) = client.flush() {
15207                                warn!("Failed to flush stream client: {}", e);
15208                            } else {
15209                                info!("Streamed {} records to {}", client.total_sent(), target_url);
15210                            }
15211                        }
15212                        Err(e) => {
15213                            warn!("Streaming export failed: {}", e);
15214                        }
15215                    }
15216                }
15217                Err(e) => {
15218                    warn!("Failed to create stream client: {}", e);
15219                }
15220            }
15221        }
15222
15223        // Update stats
15224        stats.graph_node_count += num_nodes;
15225        stats.graph_edge_count += num_edges;
15226        stats.graph_export_count += 1;
15227
15228        Ok(HypergraphExportInfo {
15229            node_count: num_nodes,
15230            edge_count: num_edges,
15231            hyperedge_count: num_hyperedges,
15232            output_path: hg_dir,
15233        })
15234    }
15235
15236    /// Generate banking KYC/AML data.
15237    ///
15238    /// Creates banking customers, accounts, and transactions with AML typology injection.
15239    /// Uses the BankingOrchestrator from synth-banking crate.
15240    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15241        let pb = self.create_progress_bar(100, "Generating Banking Data");
15242
15243        // Build the banking orchestrator from config
15244        let orchestrator = BankingOrchestratorBuilder::new()
15245            .config(self.config.banking.clone())
15246            .seed(self.seed + 9000)
15247            .country_pack(self.primary_pack().clone())
15248            .build();
15249
15250        if let Some(pb) = &pb {
15251            pb.inc(10);
15252        }
15253
15254        // Generate the banking data
15255        let result = orchestrator.generate();
15256
15257        if let Some(pb) = &pb {
15258            pb.inc(90);
15259            pb.finish_with_message(format!(
15260                "Banking: {} customers, {} transactions",
15261                result.customers.len(),
15262                result.transactions.len()
15263            ));
15264        }
15265
15266        // Cross-reference banking customers with core master data so that
15267        // banking customer names align with the enterprise customer list.
15268        // We rotate through core customers, overlaying their name and country
15269        // onto the generated banking customers where possible.
15270        let mut banking_customers = result.customers;
15271        let core_customers = &self.master_data.customers;
15272        if !core_customers.is_empty() {
15273            for (i, bc) in banking_customers.iter_mut().enumerate() {
15274                let core = &core_customers[i % core_customers.len()];
15275                bc.name = CustomerName::business(&core.name);
15276                bc.residence_country = core.country.clone();
15277                bc.enterprise_customer_id = Some(core.customer_id.clone());
15278            }
15279            debug!(
15280                "Cross-referenced {} banking customers with {} core customers",
15281                banking_customers.len(),
15282                core_customers.len()
15283            );
15284        }
15285
15286        Ok(BankingSnapshot {
15287            customers: banking_customers,
15288            accounts: result.accounts,
15289            transactions: result.transactions,
15290            transaction_labels: result.transaction_labels,
15291            customer_labels: result.customer_labels,
15292            account_labels: result.account_labels,
15293            relationship_labels: result.relationship_labels,
15294            narratives: result.narratives,
15295            suspicious_count: result.stats.suspicious_count,
15296            scenario_count: result.scenarios.len(),
15297        })
15298    }
15299
15300    /// Calculate total transactions to generate.
15301    fn calculate_total_transactions(&self) -> u64 {
15302        let months = self.config.global.period_months as f64;
15303        self.config
15304            .companies
15305            .iter()
15306            .map(|c| {
15307                let annual = c.annual_transaction_volume.count() as f64;
15308                let weighted = annual * c.volume_weight;
15309                (weighted * months / 12.0) as u64
15310            })
15311            .sum()
15312    }
15313
15314    /// Create a progress bar if progress display is enabled.
15315    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15316        if !self.phase_config.show_progress {
15317            return None;
15318        }
15319
15320        let pb = if let Some(mp) = &self.multi_progress {
15321            mp.add(ProgressBar::new(total))
15322        } else {
15323            ProgressBar::new(total)
15324        };
15325
15326        pb.set_style(
15327            ProgressStyle::default_bar()
15328                .template(&format!(
15329                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15330                ))
15331                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15332                .progress_chars("#>-"),
15333        );
15334
15335        Some(pb)
15336    }
15337
15338    /// Get the generated chart of accounts.
15339    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15340        self.coa.clone()
15341    }
15342
15343    /// Get the generated master data.
15344    pub fn get_master_data(&self) -> &MasterDataSnapshot {
15345        &self.master_data
15346    }
15347
15348    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
15349    fn phase_compliance_regulations(
15350        &mut self,
15351        _stats: &mut EnhancedGenerationStatistics,
15352    ) -> SynthResult<ComplianceRegulationsSnapshot> {
15353        if !self.phase_config.generate_compliance_regulations {
15354            return Ok(ComplianceRegulationsSnapshot::default());
15355        }
15356
15357        info!("Phase: Generating Compliance Regulations Data");
15358
15359        let cr_config = &self.config.compliance_regulations;
15360
15361        // Determine jurisdictions: from config or inferred from companies
15362        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15363            self.config
15364                .companies
15365                .iter()
15366                .map(|c| c.country.clone())
15367                .collect::<std::collections::HashSet<_>>()
15368                .into_iter()
15369                .collect()
15370        } else {
15371            cr_config.jurisdictions.clone()
15372        };
15373
15374        // Determine reference date
15375        let fallback_date =
15376            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15377        let reference_date = cr_config
15378            .reference_date
15379            .as_ref()
15380            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15381            .unwrap_or_else(|| {
15382                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15383                    .unwrap_or(fallback_date)
15384            });
15385
15386        // Generate standards registry data
15387        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15388        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15389        let cross_reference_records = reg_gen.generate_cross_reference_records();
15390        let jurisdiction_records =
15391            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15392
15393        info!(
15394            "  Standards: {} records, {} cross-references, {} jurisdictions",
15395            standard_records.len(),
15396            cross_reference_records.len(),
15397            jurisdiction_records.len()
15398        );
15399
15400        // Generate audit procedures (if enabled)
15401        let audit_procedures = if cr_config.audit_procedures.enabled {
15402            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15403                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15404                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15405                confidence_level: cr_config.audit_procedures.confidence_level,
15406                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15407            };
15408            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15409                self.seed + 9000,
15410                proc_config,
15411            );
15412            let registry = reg_gen.registry();
15413            let mut all_procs = Vec::new();
15414            for jurisdiction in &jurisdictions {
15415                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15416                all_procs.extend(procs);
15417            }
15418            info!("  Audit procedures: {}", all_procs.len());
15419            all_procs
15420        } else {
15421            Vec::new()
15422        };
15423
15424        // Generate compliance findings (if enabled)
15425        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15426            let finding_config =
15427                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15428                    finding_rate: cr_config.findings.finding_rate,
15429                    material_weakness_rate: cr_config.findings.material_weakness_rate,
15430                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15431                    generate_remediation: cr_config.findings.generate_remediation,
15432                };
15433            let mut finding_gen =
15434                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15435                    self.seed + 9100,
15436                    finding_config,
15437                );
15438            let mut all_findings = Vec::new();
15439            for company in &self.config.companies {
15440                let company_findings =
15441                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15442                all_findings.extend(company_findings);
15443            }
15444            info!("  Compliance findings: {}", all_findings.len());
15445            all_findings
15446        } else {
15447            Vec::new()
15448        };
15449
15450        // Generate regulatory filings (if enabled)
15451        let filings = if cr_config.filings.enabled {
15452            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15453                filing_types: cr_config.filings.filing_types.clone(),
15454                generate_status_progression: cr_config.filings.generate_status_progression,
15455            };
15456            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15457                self.seed + 9200,
15458                filing_config,
15459            );
15460            let company_codes: Vec<String> = self
15461                .config
15462                .companies
15463                .iter()
15464                .map(|c| c.code.clone())
15465                .collect();
15466            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15467                .unwrap_or(fallback_date);
15468            let filings = filing_gen.generate_filings(
15469                &company_codes,
15470                &jurisdictions,
15471                start_date,
15472                self.config.global.period_months,
15473            );
15474            info!("  Regulatory filings: {}", filings.len());
15475            filings
15476        } else {
15477            Vec::new()
15478        };
15479
15480        // Build compliance graph (if enabled)
15481        let compliance_graph = if cr_config.graph.enabled {
15482            let graph_config = datasynth_graph::ComplianceGraphConfig {
15483                include_standard_nodes: cr_config.graph.include_compliance_nodes,
15484                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15485                include_cross_references: cr_config.graph.include_cross_references,
15486                include_supersession_edges: cr_config.graph.include_supersession_edges,
15487                include_account_links: cr_config.graph.include_account_links,
15488                include_control_links: cr_config.graph.include_control_links,
15489                include_company_links: cr_config.graph.include_company_links,
15490            };
15491            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15492
15493            // Add standard nodes
15494            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15495                .iter()
15496                .map(|r| datasynth_graph::StandardNodeInput {
15497                    standard_id: r.standard_id.clone(),
15498                    title: r.title.clone(),
15499                    category: r.category.clone(),
15500                    domain: r.domain.clone(),
15501                    is_active: r.is_active,
15502                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
15503                    applicable_account_types: r.applicable_account_types.clone(),
15504                    applicable_processes: r.applicable_processes.clone(),
15505                })
15506                .collect();
15507            builder.add_standards(&standard_inputs);
15508
15509            // Add jurisdiction nodes
15510            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15511                jurisdiction_records
15512                    .iter()
15513                    .map(|r| datasynth_graph::JurisdictionNodeInput {
15514                        country_code: r.country_code.clone(),
15515                        country_name: r.country_name.clone(),
15516                        framework: r.accounting_framework.clone(),
15517                        standard_count: r.standard_count,
15518                        tax_rate: r.statutory_tax_rate,
15519                    })
15520                    .collect();
15521            builder.add_jurisdictions(&jurisdiction_inputs);
15522
15523            // Add cross-reference edges
15524            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15525                cross_reference_records
15526                    .iter()
15527                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15528                        from_standard: r.from_standard.clone(),
15529                        to_standard: r.to_standard.clone(),
15530                        relationship: r.relationship.clone(),
15531                        convergence_level: r.convergence_level,
15532                    })
15533                    .collect();
15534            builder.add_cross_references(&xref_inputs);
15535
15536            // Add jurisdiction→standard mappings
15537            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15538                .iter()
15539                .map(|r| datasynth_graph::JurisdictionMappingInput {
15540                    country_code: r.jurisdiction.clone(),
15541                    standard_id: r.standard_id.clone(),
15542                })
15543                .collect();
15544            builder.add_jurisdiction_mappings(&mapping_inputs);
15545
15546            // Add procedure nodes
15547            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15548                .iter()
15549                .map(|p| datasynth_graph::ProcedureNodeInput {
15550                    procedure_id: p.procedure_id.clone(),
15551                    standard_id: p.standard_id.clone(),
15552                    procedure_type: p.procedure_type.clone(),
15553                    sample_size: p.sample_size,
15554                    confidence_level: p.confidence_level,
15555                })
15556                .collect();
15557            builder.add_procedures(&proc_inputs);
15558
15559            // Add finding nodes
15560            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15561                .iter()
15562                .map(|f| datasynth_graph::FindingNodeInput {
15563                    finding_id: f.finding_id.to_string(),
15564                    standard_id: f
15565                        .related_standards
15566                        .first()
15567                        .map(|s| s.as_str().to_string())
15568                        .unwrap_or_default(),
15569                    severity: f.severity.to_string(),
15570                    deficiency_level: f.deficiency_level.to_string(),
15571                    severity_score: f.deficiency_level.severity_score(),
15572                    control_id: f.control_id.clone(),
15573                    affected_accounts: f.affected_accounts.clone(),
15574                })
15575                .collect();
15576            builder.add_findings(&finding_inputs);
15577
15578            // Cross-domain: link standards to accounts from chart of accounts
15579            if cr_config.graph.include_account_links {
15580                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15581                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15582                for std_record in &standard_records {
15583                    if let Some(std_obj) =
15584                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15585                            &std_record.standard_id,
15586                        ))
15587                    {
15588                        for acct_type in &std_obj.applicable_account_types {
15589                            account_links.push(datasynth_graph::AccountLinkInput {
15590                                standard_id: std_record.standard_id.clone(),
15591                                account_code: acct_type.clone(),
15592                                account_name: acct_type.clone(),
15593                            });
15594                        }
15595                    }
15596                }
15597                builder.add_account_links(&account_links);
15598            }
15599
15600            // Cross-domain: link standards to internal controls
15601            if cr_config.graph.include_control_links {
15602                let mut control_links = Vec::new();
15603                // SOX/PCAOB standards link to all controls
15604                let sox_like_ids: Vec<String> = standard_records
15605                    .iter()
15606                    .filter(|r| {
15607                        r.standard_id.starts_with("SOX")
15608                            || r.standard_id.starts_with("PCAOB-AS-2201")
15609                    })
15610                    .map(|r| r.standard_id.clone())
15611                    .collect();
15612                // Get control IDs from config (C001-C060 standard controls)
15613                let control_ids = [
15614                    ("C001", "Cash Controls"),
15615                    ("C002", "Large Transaction Approval"),
15616                    ("C010", "PO Approval"),
15617                    ("C011", "Three-Way Match"),
15618                    ("C020", "Revenue Recognition"),
15619                    ("C021", "Credit Check"),
15620                    ("C030", "Manual JE Approval"),
15621                    ("C031", "Period Close Review"),
15622                    ("C032", "Account Reconciliation"),
15623                    ("C040", "Payroll Processing"),
15624                    ("C050", "Fixed Asset Capitalization"),
15625                    ("C060", "Intercompany Elimination"),
15626                ];
15627                for sox_id in &sox_like_ids {
15628                    for (ctrl_id, ctrl_name) in &control_ids {
15629                        control_links.push(datasynth_graph::ControlLinkInput {
15630                            standard_id: sox_id.clone(),
15631                            control_id: ctrl_id.to_string(),
15632                            control_name: ctrl_name.to_string(),
15633                        });
15634                    }
15635                }
15636                builder.add_control_links(&control_links);
15637            }
15638
15639            // Cross-domain: filing nodes with company links
15640            if cr_config.graph.include_company_links {
15641                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15642                    .iter()
15643                    .enumerate()
15644                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15645                        filing_id: format!("F{:04}", i + 1),
15646                        filing_type: f.filing_type.to_string(),
15647                        company_code: f.company_code.clone(),
15648                        jurisdiction: f.jurisdiction.clone(),
15649                        status: format!("{:?}", f.status),
15650                    })
15651                    .collect();
15652                builder.add_filings(&filing_inputs);
15653            }
15654
15655            let graph = builder.build();
15656            info!(
15657                "  Compliance graph: {} nodes, {} edges",
15658                graph.nodes.len(),
15659                graph.edges.len()
15660            );
15661            Some(graph)
15662        } else {
15663            None
15664        };
15665
15666        self.check_resources_with_log("post-compliance-regulations")?;
15667
15668        Ok(ComplianceRegulationsSnapshot {
15669            standard_records,
15670            cross_reference_records,
15671            jurisdiction_records,
15672            audit_procedures,
15673            findings,
15674            filings,
15675            compliance_graph,
15676        })
15677    }
15678
15679    /// Build a lineage graph describing config → phase → output relationships.
15680    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15681        use super::lineage::LineageGraphBuilder;
15682
15683        let mut builder = LineageGraphBuilder::new();
15684
15685        // Config sections
15686        builder.add_config_section("config:global", "Global Config");
15687        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15688        builder.add_config_section("config:transactions", "Transaction Config");
15689
15690        // Generator phases
15691        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15692        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15693
15694        // Config → phase edges
15695        builder.configured_by("phase:coa", "config:chart_of_accounts");
15696        builder.configured_by("phase:je", "config:transactions");
15697
15698        // Output files
15699        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15700        builder.produced_by("output:je", "phase:je");
15701
15702        // Optional phases based on config
15703        if self.phase_config.generate_master_data {
15704            builder.add_config_section("config:master_data", "Master Data Config");
15705            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15706            builder.configured_by("phase:master_data", "config:master_data");
15707            builder.input_to("phase:master_data", "phase:je");
15708        }
15709
15710        if self.phase_config.generate_document_flows {
15711            builder.add_config_section("config:document_flows", "Document Flow Config");
15712            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15713            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15714            builder.configured_by("phase:p2p", "config:document_flows");
15715            builder.configured_by("phase:o2c", "config:document_flows");
15716
15717            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15718            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15719            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15720            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15721            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15722
15723            builder.produced_by("output:po", "phase:p2p");
15724            builder.produced_by("output:gr", "phase:p2p");
15725            builder.produced_by("output:vi", "phase:p2p");
15726            builder.produced_by("output:so", "phase:o2c");
15727            builder.produced_by("output:ci", "phase:o2c");
15728        }
15729
15730        if self.phase_config.inject_anomalies {
15731            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15732            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15733            builder.configured_by("phase:anomaly", "config:fraud");
15734            builder.add_output_file(
15735                "output:labels",
15736                "Anomaly Labels",
15737                "labels/anomaly_labels.csv",
15738            );
15739            builder.produced_by("output:labels", "phase:anomaly");
15740        }
15741
15742        if self.phase_config.generate_audit {
15743            builder.add_config_section("config:audit", "Audit Config");
15744            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15745            builder.configured_by("phase:audit", "config:audit");
15746        }
15747
15748        if self.phase_config.generate_banking {
15749            builder.add_config_section("config:banking", "Banking Config");
15750            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15751            builder.configured_by("phase:banking", "config:banking");
15752        }
15753
15754        if self.config.llm.enabled {
15755            builder.add_config_section("config:llm", "LLM Enrichment Config");
15756            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15757            builder.configured_by("phase:llm_enrichment", "config:llm");
15758        }
15759
15760        if self.config.diffusion.enabled {
15761            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15762            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15763            builder.configured_by("phase:diffusion", "config:diffusion");
15764        }
15765
15766        if self.config.causal.enabled {
15767            builder.add_config_section("config:causal", "Causal Generation Config");
15768            builder.add_generator_phase("phase:causal", "Causal Overlay");
15769            builder.configured_by("phase:causal", "config:causal");
15770        }
15771
15772        builder.build()
15773    }
15774
15775    // -----------------------------------------------------------------------
15776    // Trial-balance helpers used to replace hardcoded proxy values
15777    // -----------------------------------------------------------------------
15778
15779    /// Compute total revenue for a company from its journal entries.
15780    ///
15781    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15782    /// net credits on all revenue-account lines filtered to `company_code`.
15783    fn compute_company_revenue(
15784        entries: &[JournalEntry],
15785        company_code: &str,
15786    ) -> rust_decimal::Decimal {
15787        use rust_decimal::Decimal;
15788        let mut revenue = Decimal::ZERO;
15789        for je in entries {
15790            if je.header.company_code != company_code {
15791                continue;
15792            }
15793            for line in &je.lines {
15794                if line.gl_account.starts_with('4') {
15795                    // Revenue is credit-normal
15796                    revenue += line.credit_amount - line.debit_amount;
15797                }
15798            }
15799        }
15800        revenue.max(Decimal::ZERO)
15801    }
15802
15803    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15804    ///
15805    /// Asset accounts start with "1"; liability accounts start with "2".
15806    fn compute_entity_net_assets(
15807        entries: &[JournalEntry],
15808        entity_code: &str,
15809    ) -> rust_decimal::Decimal {
15810        use rust_decimal::Decimal;
15811        let mut asset_net = Decimal::ZERO;
15812        let mut liability_net = Decimal::ZERO;
15813        for je in entries {
15814            if je.header.company_code != entity_code {
15815                continue;
15816            }
15817            for line in &je.lines {
15818                if line.gl_account.starts_with('1') {
15819                    asset_net += line.debit_amount - line.credit_amount;
15820                } else if line.gl_account.starts_with('2') {
15821                    liability_net += line.credit_amount - line.debit_amount;
15822                }
15823            }
15824        }
15825        asset_net - liability_net
15826    }
15827
15828    /// v3.5.1+: Run the statistical validation suite configured in
15829    /// `distributions.validation.tests` over the final amount
15830    /// distribution.  Collects every non-zero line-level amount (debit +
15831    /// credit) and hands it to the runners in
15832    /// `datasynth_core::distributions::validation`.
15833    ///
15834    /// Returns `Ok(None)` when validation is disabled (the default).
15835    /// When `reporting.fail_on_error = true` and any test fails, returns
15836    /// `Err` with a concise message; otherwise attaches the report to
15837    /// the result and lets callers inspect it.
15838    fn phase_statistical_validation(
15839        &self,
15840        entries: &[JournalEntry],
15841    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15842        use datasynth_config::schema::StatisticalTestConfig;
15843        use datasynth_core::distributions::{
15844            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15845            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15846        };
15847        use rust_decimal::prelude::ToPrimitive;
15848
15849        let cfg = &self.config.distributions.validation;
15850        if !cfg.enabled {
15851            return Ok(None);
15852        }
15853
15854        // Collect per-line positive amounts (debit + credit is zero on the
15855        // non-posting side, so this naturally picks the magnitude).
15856        let amounts: Vec<rust_decimal::Decimal> = entries
15857            .iter()
15858            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15859            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15860            .collect();
15861
15862        // v4.1.0+ paired (amount, line_count) per entry for correlation
15863        // checks. Amount per entry is the debit-side total (= credit-side
15864        // total for a balanced entry).
15865        let paired_amount_linecount: Vec<(f64, f64)> = entries
15866            .iter()
15867            .filter_map(|je| {
15868                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15869                if amt > rust_decimal::Decimal::ZERO {
15870                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15871                } else {
15872                    None
15873                }
15874            })
15875            .collect();
15876
15877        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15878        for test_cfg in &cfg.tests {
15879            match test_cfg {
15880                StatisticalTestConfig::BenfordFirstDigit {
15881                    threshold_mad,
15882                    warning_mad,
15883                } => {
15884                    results.push(run_benford_first_digit(
15885                        &amounts,
15886                        *threshold_mad,
15887                        *warning_mad,
15888                    ));
15889                }
15890                StatisticalTestConfig::ChiSquared { bins, significance } => {
15891                    results.push(run_chi_squared(&amounts, *bins, *significance));
15892                }
15893                StatisticalTestConfig::DistributionFit {
15894                    target: _,
15895                    ks_significance,
15896                    method: _,
15897                } => {
15898                    // v3.5.1+: log-uniformity KS check. Target-specific
15899                    // fits against Normal / Exponential land in v4.1.1+.
15900                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15901                }
15902                StatisticalTestConfig::AndersonDarling {
15903                    target: _,
15904                    significance,
15905                } => {
15906                    // v4.1.0+: A*² statistic against log-normal on the
15907                    // log-scale. Other targets follow the same pattern.
15908                    results.push(run_anderson_darling(&amounts, *significance));
15909                }
15910                StatisticalTestConfig::CorrelationCheck {
15911                    expected_correlations,
15912                } => {
15913                    // v4.1.0+: (amount, line_count) is tracked today.
15914                    // Other pairs resolve to Skipped pending richer
15915                    // per-entry attribute collection.
15916                    if expected_correlations.is_empty() {
15917                        results.push(StatisticalTestResult {
15918                            name: "correlation_check".to_string(),
15919                            outcome: TestOutcome::Skipped,
15920                            statistic: 0.0,
15921                            threshold: 0.0,
15922                            message: "no expected correlations declared".to_string(),
15923                        });
15924                    } else {
15925                        for ec in expected_correlations {
15926                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15927                            let is_amount_linecount = (ec.field1 == "amount"
15928                                && ec.field2 == "line_count")
15929                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15930                            if is_amount_linecount {
15931                                let xs: Vec<f64> =
15932                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15933                                let ys: Vec<f64> =
15934                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15935                                results.push(run_correlation_check(
15936                                    &pair_key,
15937                                    &xs,
15938                                    &ys,
15939                                    ec.expected_r,
15940                                    ec.tolerance,
15941                                ));
15942                            } else {
15943                                results.push(StatisticalTestResult {
15944                                    name: format!("correlation_check_{pair_key}"),
15945                                    outcome: TestOutcome::Skipped,
15946                                    statistic: 0.0,
15947                                    threshold: ec.tolerance,
15948                                    message: format!(
15949                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15950                                        ec.field1, ec.field2
15951                                    ),
15952                                });
15953                            }
15954                        }
15955                    }
15956                }
15957            }
15958        }
15959
15960        let report = StatisticalValidationReport {
15961            sample_count: amounts.len(),
15962            results,
15963        };
15964
15965        if cfg.reporting.fail_on_error && !report.all_passed() {
15966            let failed = report.failed_names().join(", ");
15967            return Err(SynthError::validation(format!(
15968                "statistical validation failed: {failed}"
15969            )));
15970        }
15971
15972        Ok(Some(report))
15973    }
15974
15975    /// v3.3.0: analytics-metadata phase.
15976    ///
15977    /// Runs AFTER all JE-adding phases (including Phase 20b's
15978    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15979    /// gated by an individual `analytics_metadata.<flag>` toggle:
15980    ///
15981    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15982    ///    current-period account balances.
15983    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15984    ///    configured `global.industry`.
15985    /// 3. `ManagementReportGenerator` — management-report artefacts.
15986    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15987    fn phase_analytics_metadata(
15988        &mut self,
15989        entries: &[JournalEntry],
15990    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15991        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15992        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15993        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15994        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15995        use std::collections::BTreeMap;
15996
15997        let mut snap = AnalyticsMetadataSnapshot::default();
15998
15999        if !self.phase_config.generate_analytics_metadata {
16000            return Ok(snap);
16001        }
16002
16003        let cfg = &self.config.analytics_metadata;
16004        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16005            .map(|d| d.year())
16006            .unwrap_or(2025);
16007
16008        // ---- 1. Prior-year comparatives ----
16009        if cfg.prior_year {
16010            let mut gen = PriorYearGenerator::new(self.seed + 9100);
16011            for company in &self.config.companies {
16012                // Aggregate current-period balances per account code +
16013                // account name from the entries slice.
16014                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16015                    BTreeMap::new();
16016                for je in entries {
16017                    if je.header.company_code != company.code {
16018                        continue;
16019                    }
16020                    for line in &je.lines {
16021                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16022                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16023                        });
16024                        entry.1 += line.debit_amount - line.credit_amount;
16025                    }
16026                }
16027                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16028                    .into_iter()
16029                    .filter(|(_, (_, bal))| !bal.is_zero())
16030                    .map(|(code, (name, bal))| (code, name, bal))
16031                    .collect();
16032                if !current.is_empty() {
16033                    let comparatives =
16034                        gen.generate_comparatives(&company.code, fiscal_year, &current);
16035                    snap.prior_year_comparatives.extend(comparatives);
16036                }
16037            }
16038            info!(
16039                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16040                snap.prior_year_comparatives.len(),
16041                self.config.companies.len()
16042            );
16043        }
16044
16045        // ---- 2. Industry benchmarks ----
16046        if cfg.industry_benchmark {
16047            use datasynth_core::models::IndustrySector;
16048            let industry = match self.config.global.industry {
16049                IndustrySector::Manufacturing => "manufacturing",
16050                IndustrySector::Retail => "retail",
16051                IndustrySector::FinancialServices => "financial_services",
16052                IndustrySector::Technology => "technology",
16053                IndustrySector::Healthcare => "healthcare",
16054                _ => "other",
16055            };
16056            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16057            let benchmarks = gen.generate(industry, fiscal_year);
16058            info!(
16059                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16060                benchmarks.len()
16061            );
16062            snap.industry_benchmarks = benchmarks;
16063        }
16064
16065        // ---- 3. Management reports ----
16066        if cfg.management_reports {
16067            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16068            let period_months = self.config.global.period_months;
16069            for company in &self.config.companies {
16070                let reports =
16071                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16072                snap.management_reports.extend(reports);
16073            }
16074            info!(
16075                "v3.3.0 analytics: {} management reports across {} companies",
16076                snap.management_reports.len(),
16077                self.config.companies.len()
16078            );
16079        }
16080
16081        // ---- 4. Drift-event labels ----
16082        if cfg.drift_events {
16083            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16084                .expect("hardcoded NaiveDate 2025-01-01 is valid");
16085            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16086                .unwrap_or(fallback_start);
16087            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16088            let mut gen = DriftEventGenerator::new(self.seed + 9400);
16089            let drifts = gen.generate_standalone_drifts(start_date, end_date);
16090            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16091            snap.drift_events = drifts;
16092        }
16093        // `entries` parameter reserved for future JE-aware drift detection
16094        let _ = entries;
16095
16096        Ok(snap)
16097    }
16098}
16099
16100/// Get the directory name for a graph export format.
16101fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16102    match format {
16103        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16104        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16105        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16106        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16107        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16108    }
16109}
16110
16111/// Aggregate journal entry lines into per-account trial balance rows.
16112///
16113/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
16114/// debit/credit totals and a net balance (debit minus credit).
16115fn compute_trial_balance_entries(
16116    entries: &[JournalEntry],
16117    entity_code: &str,
16118    fiscal_year: i32,
16119    coa: Option<&ChartOfAccounts>,
16120) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16121    use std::collections::BTreeMap;
16122
16123    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16124        BTreeMap::new();
16125
16126    for je in entries {
16127        for line in &je.lines {
16128            let entry = balances.entry(line.account_code.clone()).or_default();
16129            entry.0 += line.debit_amount;
16130            entry.1 += line.credit_amount;
16131        }
16132    }
16133
16134    balances
16135        .into_iter()
16136        .map(
16137            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16138                account_description: coa
16139                    .and_then(|c| c.get_account(&account_code))
16140                    .map(|a| a.description().to_string())
16141                    .unwrap_or_else(|| account_code.clone()),
16142                account_code,
16143                debit_balance: debit,
16144                credit_balance: credit,
16145                net_balance: debit - credit,
16146                entity_code: entity_code.to_string(),
16147                period: format!("FY{}", fiscal_year),
16148            },
16149        )
16150        .collect()
16151}
16152
16153#[cfg(test)]
16154mod tests {
16155    use super::*;
16156    use datasynth_config::schema::*;
16157
16158    fn create_test_config() -> GeneratorConfig {
16159        GeneratorConfig {
16160            global: GlobalConfig {
16161                industry: IndustrySector::Manufacturing,
16162                start_date: "2024-01-01".to_string(),
16163                period_months: 1,
16164                seed: Some(42),
16165                parallel: false,
16166                group_currency: "USD".to_string(),
16167                presentation_currency: None,
16168                worker_threads: 0,
16169                memory_limit_mb: 0,
16170                fiscal_year_months: None,
16171            },
16172            companies: vec![CompanyConfig {
16173                code: "1000".to_string(),
16174                name: "Test Company".to_string(),
16175                currency: "USD".to_string(),
16176                functional_currency: None,
16177                country: "US".to_string(),
16178                annual_transaction_volume: TransactionVolume::TenK,
16179                volume_weight: 1.0,
16180                fiscal_year_variant: "K4".to_string(),
16181            }],
16182            chart_of_accounts: ChartOfAccountsConfig {
16183                complexity: CoAComplexity::Small,
16184                industry_specific: true,
16185                custom_accounts: None,
16186                min_hierarchy_depth: 2,
16187                max_hierarchy_depth: 4,
16188                expand_industry_subaccounts: false,
16189            },
16190            transactions: TransactionConfig::default(),
16191            output: OutputConfig::default(),
16192            fraud: FraudConfig::default(),
16193            internal_controls: InternalControlsConfig::default(),
16194            business_processes: BusinessProcessConfig::default(),
16195            user_personas: UserPersonaConfig::default(),
16196            templates: TemplateConfig::default(),
16197            approval: ApprovalConfig::default(),
16198            departments: DepartmentConfig::default(),
16199            master_data: MasterDataConfig::default(),
16200            document_flows: DocumentFlowConfig::default(),
16201            intercompany: IntercompanyConfig::default(),
16202            balance: BalanceConfig::default(),
16203            ocpm: OcpmConfig::default(),
16204            audit: AuditGenerationConfig::default(),
16205            banking: datasynth_banking::BankingConfig::default(),
16206            data_quality: DataQualitySchemaConfig::default(),
16207            scenario: ScenarioConfig::default(),
16208            temporal: TemporalDriftConfig::default(),
16209            graph_export: GraphExportConfig::default(),
16210            streaming: StreamingSchemaConfig::default(),
16211            rate_limit: RateLimitSchemaConfig::default(),
16212            temporal_attributes: TemporalAttributeSchemaConfig::default(),
16213            relationships: RelationshipSchemaConfig::default(),
16214            accounting_standards: AccountingStandardsConfig::default(),
16215            audit_standards: AuditStandardsConfig::default(),
16216            distributions: Default::default(),
16217            temporal_patterns: Default::default(),
16218            vendor_network: VendorNetworkSchemaConfig::default(),
16219            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16220            relationship_strength: RelationshipStrengthSchemaConfig::default(),
16221            cross_process_links: CrossProcessLinksSchemaConfig::default(),
16222            organizational_events: OrganizationalEventsSchemaConfig::default(),
16223            behavioral_drift: BehavioralDriftSchemaConfig::default(),
16224            market_drift: MarketDriftSchemaConfig::default(),
16225            drift_labeling: DriftLabelingSchemaConfig::default(),
16226            anomaly_injection: Default::default(),
16227            industry_specific: Default::default(),
16228            fingerprint_privacy: Default::default(),
16229            quality_gates: Default::default(),
16230            compliance: Default::default(),
16231            webhooks: Default::default(),
16232            llm: Default::default(),
16233            diffusion: Default::default(),
16234            causal: Default::default(),
16235            source_to_pay: Default::default(),
16236            financial_reporting: Default::default(),
16237            hr: Default::default(),
16238            manufacturing: Default::default(),
16239            sales_quotes: Default::default(),
16240            tax: Default::default(),
16241            treasury: Default::default(),
16242            project_accounting: Default::default(),
16243            esg: Default::default(),
16244            country_packs: None,
16245            scenarios: Default::default(),
16246            session: Default::default(),
16247            compliance_regulations: Default::default(),
16248            analytics_metadata: Default::default(),
16249            concentration: Default::default(),
16250        }
16251    }
16252
16253    #[test]
16254    fn test_enhanced_orchestrator_creation() {
16255        let config = create_test_config();
16256        let orchestrator = EnhancedOrchestrator::with_defaults(config);
16257        assert!(orchestrator.is_ok());
16258    }
16259
16260    #[test]
16261    fn test_minimal_generation() {
16262        let config = create_test_config();
16263        let phase_config = PhaseConfig {
16264            generate_master_data: false,
16265            generate_document_flows: false,
16266            generate_journal_entries: true,
16267            inject_anomalies: false,
16268            show_progress: false,
16269            ..Default::default()
16270        };
16271
16272        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16273        let result = orchestrator.generate();
16274
16275        assert!(result.is_ok());
16276        let result = result.unwrap();
16277        assert!(!result.journal_entries.is_empty());
16278    }
16279
16280    #[test]
16281    fn test_master_data_generation() {
16282        let config = create_test_config();
16283        let phase_config = PhaseConfig {
16284            generate_master_data: true,
16285            generate_document_flows: false,
16286            generate_journal_entries: false,
16287            inject_anomalies: false,
16288            show_progress: false,
16289            vendors_per_company: 5,
16290            customers_per_company: 5,
16291            materials_per_company: 10,
16292            assets_per_company: 5,
16293            employees_per_company: 10,
16294            ..Default::default()
16295        };
16296
16297        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16298        let result = orchestrator.generate().unwrap();
16299
16300        assert!(!result.master_data.vendors.is_empty());
16301        assert!(!result.master_data.customers.is_empty());
16302        assert!(!result.master_data.materials.is_empty());
16303    }
16304
16305    #[test]
16306    fn test_document_flow_generation() {
16307        let config = create_test_config();
16308        let phase_config = PhaseConfig {
16309            generate_master_data: true,
16310            generate_document_flows: true,
16311            generate_journal_entries: false,
16312            inject_anomalies: false,
16313            inject_data_quality: false,
16314            validate_balances: false,
16315            validate_coa_coverage_strict: false,
16316            generate_ocpm_events: false,
16317            show_progress: false,
16318            vendors_per_company: 5,
16319            customers_per_company: 5,
16320            materials_per_company: 10,
16321            assets_per_company: 5,
16322            employees_per_company: 10,
16323            p2p_chains: 5,
16324            o2c_chains: 5,
16325            ..Default::default()
16326        };
16327
16328        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16329        let result = orchestrator.generate().unwrap();
16330
16331        // Should have generated P2P and O2C chains
16332        assert!(!result.document_flows.p2p_chains.is_empty());
16333        assert!(!result.document_flows.o2c_chains.is_empty());
16334
16335        // Flattened documents should be populated
16336        assert!(!result.document_flows.purchase_orders.is_empty());
16337        assert!(!result.document_flows.sales_orders.is_empty());
16338    }
16339
16340    #[test]
16341    fn test_anomaly_injection() {
16342        let config = create_test_config();
16343        let phase_config = PhaseConfig {
16344            generate_master_data: false,
16345            generate_document_flows: false,
16346            generate_journal_entries: true,
16347            inject_anomalies: true,
16348            show_progress: false,
16349            ..Default::default()
16350        };
16351
16352        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16353        let result = orchestrator.generate().unwrap();
16354
16355        // Should have journal entries
16356        assert!(!result.journal_entries.is_empty());
16357
16358        // With ~833 entries and 2% rate, expect some anomalies
16359        // Note: This is probabilistic, so we just verify the structure exists
16360        assert!(result.anomaly_labels.summary.is_some());
16361    }
16362
16363    #[test]
16364    fn test_full_generation_pipeline() {
16365        let config = create_test_config();
16366        let phase_config = PhaseConfig {
16367            generate_master_data: true,
16368            generate_document_flows: true,
16369            generate_journal_entries: true,
16370            inject_anomalies: false,
16371            inject_data_quality: false,
16372            validate_balances: true,
16373            validate_coa_coverage_strict: false,
16374            generate_ocpm_events: false,
16375            show_progress: false,
16376            vendors_per_company: 3,
16377            customers_per_company: 3,
16378            materials_per_company: 5,
16379            assets_per_company: 3,
16380            employees_per_company: 5,
16381            p2p_chains: 3,
16382            o2c_chains: 3,
16383            ..Default::default()
16384        };
16385
16386        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16387        let result = orchestrator.generate().unwrap();
16388
16389        // All phases should have results
16390        assert!(!result.master_data.vendors.is_empty());
16391        assert!(!result.master_data.customers.is_empty());
16392        assert!(!result.document_flows.p2p_chains.is_empty());
16393        assert!(!result.document_flows.o2c_chains.is_empty());
16394        assert!(!result.journal_entries.is_empty());
16395        assert!(result.statistics.accounts_count > 0);
16396
16397        // Subledger linking should have run
16398        assert!(!result.subledger.ap_invoices.is_empty());
16399        assert!(!result.subledger.ar_invoices.is_empty());
16400
16401        // Balance validation should have run
16402        assert!(result.balance_validation.validated);
16403        assert!(result.balance_validation.entries_processed > 0);
16404    }
16405
16406    #[test]
16407    fn test_subledger_linking() {
16408        let config = create_test_config();
16409        let phase_config = PhaseConfig {
16410            generate_master_data: true,
16411            generate_document_flows: true,
16412            generate_journal_entries: false,
16413            inject_anomalies: false,
16414            inject_data_quality: false,
16415            validate_balances: false,
16416            validate_coa_coverage_strict: false,
16417            generate_ocpm_events: false,
16418            show_progress: false,
16419            vendors_per_company: 5,
16420            customers_per_company: 5,
16421            materials_per_company: 10,
16422            assets_per_company: 3,
16423            employees_per_company: 5,
16424            p2p_chains: 5,
16425            o2c_chains: 5,
16426            ..Default::default()
16427        };
16428
16429        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16430        let result = orchestrator.generate().unwrap();
16431
16432        // Should have document flows
16433        assert!(!result.document_flows.vendor_invoices.is_empty());
16434        assert!(!result.document_flows.customer_invoices.is_empty());
16435
16436        // Subledger should be linked from document flows
16437        assert!(!result.subledger.ap_invoices.is_empty());
16438        assert!(!result.subledger.ar_invoices.is_empty());
16439
16440        // AP invoices count should match vendor invoices count
16441        assert_eq!(
16442            result.subledger.ap_invoices.len(),
16443            result.document_flows.vendor_invoices.len()
16444        );
16445
16446        // AR invoices count should match customer invoices count
16447        assert_eq!(
16448            result.subledger.ar_invoices.len(),
16449            result.document_flows.customer_invoices.len()
16450        );
16451
16452        // Statistics should reflect subledger counts
16453        assert_eq!(
16454            result.statistics.ap_invoice_count,
16455            result.subledger.ap_invoices.len()
16456        );
16457        assert_eq!(
16458            result.statistics.ar_invoice_count,
16459            result.subledger.ar_invoices.len()
16460        );
16461    }
16462
16463    #[test]
16464    fn test_balance_validation() {
16465        let config = create_test_config();
16466        let phase_config = PhaseConfig {
16467            generate_master_data: false,
16468            generate_document_flows: false,
16469            generate_journal_entries: true,
16470            inject_anomalies: false,
16471            validate_balances: true,
16472            validate_coa_coverage_strict: false,
16473            show_progress: false,
16474            ..Default::default()
16475        };
16476
16477        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16478        let result = orchestrator.generate().unwrap();
16479
16480        // Balance validation should run
16481        assert!(result.balance_validation.validated);
16482        assert!(result.balance_validation.entries_processed > 0);
16483
16484        // Generated JEs should be balanced (no unbalanced entries)
16485        assert!(!result.balance_validation.has_unbalanced_entries);
16486
16487        // Total debits should equal total credits
16488        assert_eq!(
16489            result.balance_validation.total_debits,
16490            result.balance_validation.total_credits
16491        );
16492    }
16493
16494    #[test]
16495    fn test_statistics_accuracy() {
16496        let config = create_test_config();
16497        let phase_config = PhaseConfig {
16498            generate_master_data: true,
16499            generate_document_flows: false,
16500            generate_journal_entries: true,
16501            inject_anomalies: false,
16502            show_progress: false,
16503            vendors_per_company: 10,
16504            customers_per_company: 20,
16505            materials_per_company: 15,
16506            assets_per_company: 5,
16507            employees_per_company: 8,
16508            ..Default::default()
16509        };
16510
16511        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16512        let result = orchestrator.generate().unwrap();
16513
16514        // Statistics should match actual data
16515        assert_eq!(
16516            result.statistics.vendor_count,
16517            result.master_data.vendors.len()
16518        );
16519        assert_eq!(
16520            result.statistics.customer_count,
16521            result.master_data.customers.len()
16522        );
16523        assert_eq!(
16524            result.statistics.material_count,
16525            result.master_data.materials.len()
16526        );
16527        assert_eq!(
16528            result.statistics.total_entries as usize,
16529            result.journal_entries.len()
16530        );
16531    }
16532
16533    #[test]
16534    fn test_phase_config_defaults() {
16535        let config = PhaseConfig::default();
16536        assert!(config.generate_master_data);
16537        assert!(config.generate_document_flows);
16538        assert!(config.generate_journal_entries);
16539        assert!(!config.inject_anomalies);
16540        assert!(config.validate_balances);
16541        assert!(config.show_progress);
16542        assert!(config.vendors_per_company > 0);
16543        assert!(config.customers_per_company > 0);
16544    }
16545
16546    #[test]
16547    fn test_get_coa_before_generation() {
16548        let config = create_test_config();
16549        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16550
16551        // Before generation, CoA should be None
16552        assert!(orchestrator.get_coa().is_none());
16553    }
16554
16555    #[test]
16556    fn test_get_coa_after_generation() {
16557        let config = create_test_config();
16558        let phase_config = PhaseConfig {
16559            generate_master_data: false,
16560            generate_document_flows: false,
16561            generate_journal_entries: true,
16562            inject_anomalies: false,
16563            show_progress: false,
16564            ..Default::default()
16565        };
16566
16567        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16568        let _ = orchestrator.generate().unwrap();
16569
16570        // After generation, CoA should be available
16571        assert!(orchestrator.get_coa().is_some());
16572    }
16573
16574    #[test]
16575    fn test_get_master_data() {
16576        let config = create_test_config();
16577        let phase_config = PhaseConfig {
16578            generate_master_data: true,
16579            generate_document_flows: false,
16580            generate_journal_entries: false,
16581            inject_anomalies: false,
16582            show_progress: false,
16583            vendors_per_company: 5,
16584            customers_per_company: 5,
16585            materials_per_company: 5,
16586            assets_per_company: 5,
16587            employees_per_company: 5,
16588            ..Default::default()
16589        };
16590
16591        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16592        let result = orchestrator.generate().unwrap();
16593
16594        // After generate(), master_data is moved into the result
16595        assert!(!result.master_data.vendors.is_empty());
16596    }
16597
16598    #[test]
16599    fn test_with_progress_builder() {
16600        let config = create_test_config();
16601        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16602            .unwrap()
16603            .with_progress(false);
16604
16605        // Should still work without progress
16606        assert!(!orchestrator.phase_config.show_progress);
16607    }
16608
16609    #[test]
16610    fn test_multi_company_generation() {
16611        let mut config = create_test_config();
16612        config.companies.push(CompanyConfig {
16613            code: "2000".to_string(),
16614            name: "Subsidiary".to_string(),
16615            currency: "EUR".to_string(),
16616            functional_currency: None,
16617            country: "DE".to_string(),
16618            annual_transaction_volume: TransactionVolume::TenK,
16619            volume_weight: 0.5,
16620            fiscal_year_variant: "K4".to_string(),
16621        });
16622
16623        let phase_config = PhaseConfig {
16624            generate_master_data: true,
16625            generate_document_flows: false,
16626            generate_journal_entries: true,
16627            inject_anomalies: false,
16628            show_progress: false,
16629            vendors_per_company: 5,
16630            customers_per_company: 5,
16631            materials_per_company: 5,
16632            assets_per_company: 5,
16633            employees_per_company: 5,
16634            ..Default::default()
16635        };
16636
16637        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16638        let result = orchestrator.generate().unwrap();
16639
16640        // Should have master data for both companies
16641        assert!(result.statistics.vendor_count >= 10); // 5 per company
16642        assert!(result.statistics.customer_count >= 10);
16643        assert!(result.statistics.companies_count == 2);
16644    }
16645
16646    #[test]
16647    fn test_empty_master_data_skips_document_flows() {
16648        let config = create_test_config();
16649        let phase_config = PhaseConfig {
16650            generate_master_data: false,   // Skip master data
16651            generate_document_flows: true, // Try to generate flows
16652            generate_journal_entries: false,
16653            inject_anomalies: false,
16654            show_progress: false,
16655            ..Default::default()
16656        };
16657
16658        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16659        let result = orchestrator.generate().unwrap();
16660
16661        // Without master data, document flows should be empty
16662        assert!(result.document_flows.p2p_chains.is_empty());
16663        assert!(result.document_flows.o2c_chains.is_empty());
16664    }
16665
16666    #[test]
16667    fn test_journal_entry_line_item_count() {
16668        let config = create_test_config();
16669        let phase_config = PhaseConfig {
16670            generate_master_data: false,
16671            generate_document_flows: false,
16672            generate_journal_entries: true,
16673            inject_anomalies: false,
16674            show_progress: false,
16675            ..Default::default()
16676        };
16677
16678        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16679        let result = orchestrator.generate().unwrap();
16680
16681        // Total line items should match sum of all entry line counts
16682        let calculated_line_items: u64 = result
16683            .journal_entries
16684            .iter()
16685            .map(|e| e.line_count() as u64)
16686            .sum();
16687        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16688    }
16689
16690    #[test]
16691    fn test_audit_generation() {
16692        let config = create_test_config();
16693        let phase_config = PhaseConfig {
16694            generate_master_data: false,
16695            generate_document_flows: false,
16696            generate_journal_entries: true,
16697            inject_anomalies: false,
16698            show_progress: false,
16699            generate_audit: true,
16700            audit_engagements: 2,
16701            workpapers_per_engagement: 5,
16702            evidence_per_workpaper: 2,
16703            risks_per_engagement: 3,
16704            findings_per_engagement: 2,
16705            judgments_per_engagement: 2,
16706            ..Default::default()
16707        };
16708
16709        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16710        let result = orchestrator.generate().unwrap();
16711
16712        // Should have generated audit data
16713        assert_eq!(result.audit.engagements.len(), 2);
16714        assert!(!result.audit.workpapers.is_empty());
16715        assert!(!result.audit.evidence.is_empty());
16716        assert!(!result.audit.risk_assessments.is_empty());
16717        assert!(!result.audit.findings.is_empty());
16718        assert!(!result.audit.judgments.is_empty());
16719
16720        // New ISA entity collections should also be populated
16721        assert!(
16722            !result.audit.confirmations.is_empty(),
16723            "ISA 505 confirmations should be generated"
16724        );
16725        assert!(
16726            !result.audit.confirmation_responses.is_empty(),
16727            "ISA 505 confirmation responses should be generated"
16728        );
16729        assert!(
16730            !result.audit.procedure_steps.is_empty(),
16731            "ISA 330 procedure steps should be generated"
16732        );
16733        // Samples may or may not be generated depending on workpaper sampling methods
16734        assert!(
16735            !result.audit.analytical_results.is_empty(),
16736            "ISA 520 analytical procedures should be generated"
16737        );
16738        assert!(
16739            !result.audit.ia_functions.is_empty(),
16740            "ISA 610 IA functions should be generated (one per engagement)"
16741        );
16742        assert!(
16743            !result.audit.related_parties.is_empty(),
16744            "ISA 550 related parties should be generated"
16745        );
16746
16747        // Statistics should match
16748        assert_eq!(
16749            result.statistics.audit_engagement_count,
16750            result.audit.engagements.len()
16751        );
16752        assert_eq!(
16753            result.statistics.audit_workpaper_count,
16754            result.audit.workpapers.len()
16755        );
16756        assert_eq!(
16757            result.statistics.audit_evidence_count,
16758            result.audit.evidence.len()
16759        );
16760        assert_eq!(
16761            result.statistics.audit_risk_count,
16762            result.audit.risk_assessments.len()
16763        );
16764        assert_eq!(
16765            result.statistics.audit_finding_count,
16766            result.audit.findings.len()
16767        );
16768        assert_eq!(
16769            result.statistics.audit_judgment_count,
16770            result.audit.judgments.len()
16771        );
16772        assert_eq!(
16773            result.statistics.audit_confirmation_count,
16774            result.audit.confirmations.len()
16775        );
16776        assert_eq!(
16777            result.statistics.audit_confirmation_response_count,
16778            result.audit.confirmation_responses.len()
16779        );
16780        assert_eq!(
16781            result.statistics.audit_procedure_step_count,
16782            result.audit.procedure_steps.len()
16783        );
16784        assert_eq!(
16785            result.statistics.audit_sample_count,
16786            result.audit.samples.len()
16787        );
16788        assert_eq!(
16789            result.statistics.audit_analytical_result_count,
16790            result.audit.analytical_results.len()
16791        );
16792        assert_eq!(
16793            result.statistics.audit_ia_function_count,
16794            result.audit.ia_functions.len()
16795        );
16796        assert_eq!(
16797            result.statistics.audit_ia_report_count,
16798            result.audit.ia_reports.len()
16799        );
16800        assert_eq!(
16801            result.statistics.audit_related_party_count,
16802            result.audit.related_parties.len()
16803        );
16804        assert_eq!(
16805            result.statistics.audit_related_party_transaction_count,
16806            result.audit.related_party_transactions.len()
16807        );
16808    }
16809
16810    #[test]
16811    fn test_new_phases_disabled_by_default() {
16812        let config = create_test_config();
16813        // Verify new config fields default to disabled
16814        assert!(!config.llm.enabled);
16815        assert!(!config.diffusion.enabled);
16816        assert!(!config.causal.enabled);
16817
16818        let phase_config = PhaseConfig {
16819            generate_master_data: false,
16820            generate_document_flows: false,
16821            generate_journal_entries: true,
16822            inject_anomalies: false,
16823            show_progress: false,
16824            ..Default::default()
16825        };
16826
16827        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16828        let result = orchestrator.generate().unwrap();
16829
16830        // All new phase statistics should be zero when disabled
16831        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16832        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16833        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16834        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16835        assert_eq!(result.statistics.causal_generation_ms, 0);
16836        assert_eq!(result.statistics.causal_samples_generated, 0);
16837        assert!(result.statistics.causal_validation_passed.is_none());
16838        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16839        assert!(result.counterfactual_pairs.is_empty());
16840    }
16841
16842    #[test]
16843    fn test_counterfactual_generation_enabled() {
16844        let config = create_test_config();
16845        let phase_config = PhaseConfig {
16846            generate_master_data: false,
16847            generate_document_flows: false,
16848            generate_journal_entries: true,
16849            inject_anomalies: false,
16850            show_progress: false,
16851            generate_counterfactuals: true,
16852            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16853            ..Default::default()
16854        };
16855
16856        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16857        let result = orchestrator.generate().unwrap();
16858
16859        // With JE generation enabled, counterfactual pairs should be generated
16860        if !result.journal_entries.is_empty() {
16861            assert_eq!(
16862                result.counterfactual_pairs.len(),
16863                result.journal_entries.len()
16864            );
16865            assert_eq!(
16866                result.statistics.counterfactual_pair_count,
16867                result.journal_entries.len()
16868            );
16869            // Each pair should have a distinct pair_id
16870            let ids: std::collections::HashSet<_> = result
16871                .counterfactual_pairs
16872                .iter()
16873                .map(|p| p.pair_id.clone())
16874                .collect();
16875            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16876        }
16877    }
16878
16879    #[test]
16880    fn test_llm_enrichment_enabled() {
16881        let mut config = create_test_config();
16882        config.llm.enabled = true;
16883        config.llm.max_vendor_enrichments = 3;
16884
16885        let phase_config = PhaseConfig {
16886            generate_master_data: true,
16887            generate_document_flows: false,
16888            generate_journal_entries: false,
16889            inject_anomalies: false,
16890            show_progress: false,
16891            vendors_per_company: 5,
16892            customers_per_company: 3,
16893            materials_per_company: 3,
16894            assets_per_company: 3,
16895            employees_per_company: 3,
16896            ..Default::default()
16897        };
16898
16899        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16900        let result = orchestrator.generate().unwrap();
16901
16902        // LLM enrichment should have run
16903        assert!(result.statistics.llm_vendors_enriched > 0);
16904        assert!(result.statistics.llm_vendors_enriched <= 3);
16905    }
16906
16907    #[test]
16908    fn test_diffusion_enhancement_enabled() {
16909        let mut config = create_test_config();
16910        config.diffusion.enabled = true;
16911        config.diffusion.n_steps = 50;
16912        config.diffusion.sample_size = 20;
16913
16914        let phase_config = PhaseConfig {
16915            generate_master_data: false,
16916            generate_document_flows: false,
16917            generate_journal_entries: true,
16918            inject_anomalies: false,
16919            show_progress: false,
16920            ..Default::default()
16921        };
16922
16923        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16924        let result = orchestrator.generate().unwrap();
16925
16926        // Diffusion phase should have generated samples
16927        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16928    }
16929
16930    #[test]
16931    fn test_causal_overlay_enabled() {
16932        let mut config = create_test_config();
16933        config.causal.enabled = true;
16934        config.causal.template = "fraud_detection".to_string();
16935        config.causal.sample_size = 100;
16936        config.causal.validate = true;
16937
16938        let phase_config = PhaseConfig {
16939            generate_master_data: false,
16940            generate_document_flows: false,
16941            generate_journal_entries: true,
16942            inject_anomalies: false,
16943            show_progress: false,
16944            ..Default::default()
16945        };
16946
16947        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16948        let result = orchestrator.generate().unwrap();
16949
16950        // Causal phase should have generated samples
16951        assert_eq!(result.statistics.causal_samples_generated, 100);
16952        // Validation should have run
16953        assert!(result.statistics.causal_validation_passed.is_some());
16954    }
16955
16956    #[test]
16957    fn test_causal_overlay_revenue_cycle_template() {
16958        let mut config = create_test_config();
16959        config.causal.enabled = true;
16960        config.causal.template = "revenue_cycle".to_string();
16961        config.causal.sample_size = 50;
16962        config.causal.validate = false;
16963
16964        let phase_config = PhaseConfig {
16965            generate_master_data: false,
16966            generate_document_flows: false,
16967            generate_journal_entries: true,
16968            inject_anomalies: false,
16969            show_progress: false,
16970            ..Default::default()
16971        };
16972
16973        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16974        let result = orchestrator.generate().unwrap();
16975
16976        // Causal phase should have generated samples
16977        assert_eq!(result.statistics.causal_samples_generated, 50);
16978        // Validation was disabled
16979        assert!(result.statistics.causal_validation_passed.is_none());
16980    }
16981
16982    #[test]
16983    fn test_all_new_phases_enabled_together() {
16984        let mut config = create_test_config();
16985        config.llm.enabled = true;
16986        config.llm.max_vendor_enrichments = 2;
16987        config.diffusion.enabled = true;
16988        config.diffusion.n_steps = 20;
16989        config.diffusion.sample_size = 10;
16990        config.causal.enabled = true;
16991        config.causal.sample_size = 50;
16992        config.causal.validate = true;
16993
16994        let phase_config = PhaseConfig {
16995            generate_master_data: true,
16996            generate_document_flows: false,
16997            generate_journal_entries: true,
16998            inject_anomalies: false,
16999            show_progress: false,
17000            vendors_per_company: 5,
17001            customers_per_company: 3,
17002            materials_per_company: 3,
17003            assets_per_company: 3,
17004            employees_per_company: 3,
17005            ..Default::default()
17006        };
17007
17008        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17009        let result = orchestrator.generate().unwrap();
17010
17011        // All three phases should have run
17012        assert!(result.statistics.llm_vendors_enriched > 0);
17013        assert_eq!(result.statistics.diffusion_samples_generated, 10);
17014        assert_eq!(result.statistics.causal_samples_generated, 50);
17015        assert!(result.statistics.causal_validation_passed.is_some());
17016    }
17017
17018    #[test]
17019    fn test_statistics_serialization_with_new_fields() {
17020        let stats = EnhancedGenerationStatistics {
17021            total_entries: 100,
17022            total_line_items: 500,
17023            llm_enrichment_ms: 42,
17024            llm_vendors_enriched: 10,
17025            diffusion_enhancement_ms: 100,
17026            diffusion_samples_generated: 50,
17027            causal_generation_ms: 200,
17028            causal_samples_generated: 100,
17029            causal_validation_passed: Some(true),
17030            ..Default::default()
17031        };
17032
17033        let json = serde_json::to_string(&stats).unwrap();
17034        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17035
17036        assert_eq!(deserialized.llm_enrichment_ms, 42);
17037        assert_eq!(deserialized.llm_vendors_enriched, 10);
17038        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17039        assert_eq!(deserialized.diffusion_samples_generated, 50);
17040        assert_eq!(deserialized.causal_generation_ms, 200);
17041        assert_eq!(deserialized.causal_samples_generated, 100);
17042        assert_eq!(deserialized.causal_validation_passed, Some(true));
17043    }
17044
17045    #[test]
17046    fn test_statistics_backward_compat_deserialization() {
17047        // Old JSON without the new fields should still deserialize
17048        let old_json = r#"{
17049            "total_entries": 100,
17050            "total_line_items": 500,
17051            "accounts_count": 50,
17052            "companies_count": 1,
17053            "period_months": 12,
17054            "vendor_count": 10,
17055            "customer_count": 20,
17056            "material_count": 15,
17057            "asset_count": 5,
17058            "employee_count": 8,
17059            "p2p_chain_count": 5,
17060            "o2c_chain_count": 5,
17061            "ap_invoice_count": 5,
17062            "ar_invoice_count": 5,
17063            "ocpm_event_count": 0,
17064            "ocpm_object_count": 0,
17065            "ocpm_case_count": 0,
17066            "audit_engagement_count": 0,
17067            "audit_workpaper_count": 0,
17068            "audit_evidence_count": 0,
17069            "audit_risk_count": 0,
17070            "audit_finding_count": 0,
17071            "audit_judgment_count": 0,
17072            "anomalies_injected": 0,
17073            "data_quality_issues": 0,
17074            "banking_customer_count": 0,
17075            "banking_account_count": 0,
17076            "banking_transaction_count": 0,
17077            "banking_suspicious_count": 0,
17078            "graph_export_count": 0,
17079            "graph_node_count": 0,
17080            "graph_edge_count": 0
17081        }"#;
17082
17083        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17084
17085        // New fields should default to 0 / None
17086        assert_eq!(stats.llm_enrichment_ms, 0);
17087        assert_eq!(stats.llm_vendors_enriched, 0);
17088        assert_eq!(stats.diffusion_enhancement_ms, 0);
17089        assert_eq!(stats.diffusion_samples_generated, 0);
17090        assert_eq!(stats.causal_generation_ms, 0);
17091        assert_eq!(stats.causal_samples_generated, 0);
17092        assert!(stats.causal_validation_passed.is_none());
17093    }
17094
17095    // ── v5.33 #162 — framework-aware TB classification ──────────────────────
17096
17097    #[test]
17098    fn category_from_account_code_us_gaap_unchanged() {
17099        // US-style numbering — same answers as the pre-v5.33 hard-coded table.
17100        assert_eq!(
17101            EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17102            "Cash"
17103        );
17104        assert_eq!(
17105            EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17106            "FixedAssets"
17107        );
17108        assert_eq!(
17109            EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17110            "Revenue"
17111        );
17112        assert_eq!(
17113            EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17114            "OperatingExpenses"
17115        );
17116    }
17117
17118    #[test]
17119    fn category_from_account_code_skr04_german() {
17120        // SKR04 (German GAAP): 0xxx = fixed assets, 4xxx = revenue,
17121        // 8xxx = tax/extraordinary expense — pre-v5.33 the US-only table
17122        // mis-classified 0xxx as OperatingExpenses (default arm), 4xxx as
17123        // Revenue (accidentally correct), and 8xxx as OtherExpenses.
17124        // Framework-aware version routes them correctly.
17125        assert_eq!(
17126            EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17127            "FixedAssets",
17128            "SKR 0xxx must be classified as fixed assets, not P&L"
17129        );
17130        assert_eq!(
17131            EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17132            "Cash"
17133        );
17134        assert_eq!(
17135            EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17136            "Receivables"
17137        );
17138        assert_eq!(
17139            EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17140            "Equity"
17141        );
17142        assert_eq!(
17143            EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17144            "Payables"
17145        );
17146        assert_eq!(
17147            EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17148            "Revenue"
17149        );
17150        assert_eq!(
17151            EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17152            "CostOfSales"
17153        );
17154        assert_eq!(
17155            EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17156            "OtherExpenses"
17157        );
17158    }
17159
17160    #[test]
17161    fn category_from_account_code_pcg_french() {
17162        // PCG (French GAAP): 2 = fixed assets, 5 = cash, 6 = expenses,
17163        // 7 = revenue. Pre-v5.33 these all hit the wrong US-prefix arms.
17164        assert_eq!(
17165            EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17166            "FixedAssets"
17167        );
17168        assert_eq!(
17169            EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17170            "Receivables"
17171        );
17172        assert_eq!(
17173            EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17174            "Payables"
17175        );
17176        assert_eq!(
17177            EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17178            "Cash"
17179        );
17180        assert_eq!(
17181            EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17182            "OperatingExpenses"
17183        );
17184        assert_eq!(
17185            EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17186            "Revenue"
17187        );
17188        assert_eq!(
17189            EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17190            "Equity"
17191        );
17192    }
17193
17194    #[test]
17195    fn is_balance_sheet_account_routes_skr_correctly() {
17196        // SKR04: 0xxx fixed assets, 1xxx current assets, 2xxx equity,
17197        // 3xxx liabilities → all BS.  4xxx revenue, 5-6 expenses → P&L.
17198        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17199            "0010",
17200            "german_gaap"
17201        ));
17202        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17203            "1200",
17204            "german_gaap"
17205        ));
17206        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17207            "2000",
17208            "german_gaap"
17209        ));
17210        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17211            "3000",
17212            "german_gaap"
17213        ));
17214        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17215            "4000",
17216            "german_gaap"
17217        ));
17218        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17219            "6000",
17220            "german_gaap"
17221        ));
17222    }
17223
17224    #[test]
17225    fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17226        // Defect C regression test — every TB line was hard-coded
17227        // `account_type: Asset` regardless of the underlying code. With
17228        // the framework-aware classifier wired in, the same SKR codes
17229        // resolve to their proper sides.
17230        use datasynth_generators::TrialBalanceEntry;
17231        let entries = vec![
17232            TrialBalanceEntry {
17233                account_code: "0010".to_string(), // SKR fixed asset
17234                account_name: "Land".to_string(),
17235                category: "FixedAssets".to_string(),
17236                debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17237                credit_balance: rust_decimal::Decimal::ZERO,
17238            },
17239            TrialBalanceEntry {
17240                account_code: "3000".to_string(), // SKR liability
17241                account_name: "Trade payables".to_string(),
17242                category: "Payables".to_string(),
17243                debit_balance: rust_decimal::Decimal::ZERO,
17244                credit_balance: rust_decimal::Decimal::new(500_000, 0),
17245            },
17246            TrialBalanceEntry {
17247                account_code: "4000".to_string(), // SKR revenue
17248                account_name: "Sales".to_string(),
17249                category: "Revenue".to_string(),
17250                debit_balance: rust_decimal::Decimal::ZERO,
17251                credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17252            },
17253            TrialBalanceEntry {
17254                account_code: "6000".to_string(), // SKR expense
17255                account_name: "Personnel cost".to_string(),
17256                category: "OperatingExpenses".to_string(),
17257                debit_balance: rust_decimal::Decimal::new(800_000, 0),
17258                credit_balance: rust_decimal::Decimal::ZERO,
17259            },
17260        ];
17261        let ptb = PeriodTrialBalance {
17262            fiscal_year: 2024,
17263            fiscal_period: 12,
17264            period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17265            period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17266            entries,
17267            framework: "german_gaap".to_string(),
17268        };
17269        let tb = ptb.into_canonical("ACME_EU", "EUR");
17270        // Line account_types are no longer all-Asset.
17271        let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17272        assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17273        assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17274        assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17275        assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17276        // is_balanced is now an unconditional truth claim — the
17277        // underlying JE-balance invariant is the only one we guarantee.
17278        assert!(tb.is_balanced);
17279        assert!(tb.is_equation_valid);
17280        assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17281        assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17282    }
17283
17284    #[test]
17285    fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17286        // Old in-memory snapshots (pre-v5.33) didn't carry the framework
17287        // field. Serde `#[serde(default)]` must let them round-trip with
17288        // a `"us_gaap"` fallback so older saved sessions keep working.
17289        let legacy_json = r#"{
17290            "fiscal_year": 2024,
17291            "fiscal_period": 12,
17292            "period_start": "2024-01-01",
17293            "period_end": "2024-12-31",
17294            "entries": []
17295        }"#;
17296        let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17297        assert_eq!(ptb.framework, "us_gaap");
17298    }
17299}