Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// `crate::output_writer::write_outputs`.
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829    /// Convert this in-memory period TB into the canonical
830    /// [`datasynth_core::models::balance::TrialBalance`] shape used
831    /// for the on-disk artefact.
832    ///
833    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
834    /// aggregate's `tb_loader` consumes the canonical type directly,
835    /// dropping the v5.0 dual-shape detection that converted from
836    /// `PeriodTrialBalance` JSON on the fly.
837    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838        let mut total_debits = Decimal::ZERO;
839        let mut total_credits = Decimal::ZERO;
840        let lines: Vec<TrialBalanceLine> = self
841            .entries
842            .into_iter()
843            .map(|e| {
844                total_debits += e.debit_balance;
845                total_credits += e.credit_balance;
846                let category = AccountCategory::from_account_code(&e.account_code);
847                TrialBalanceLine {
848                    account_code: e.account_code,
849                    account_description: e.account_name,
850                    category,
851                    account_type: AccountType::Asset,
852                    opening_balance: Decimal::ZERO,
853                    period_debits: e.debit_balance,
854                    period_credits: e.credit_balance,
855                    closing_balance: e.debit_balance - e.credit_balance,
856                    debit_balance: e.debit_balance,
857                    credit_balance: e.credit_balance,
858                    cost_center: None,
859                    profit_center: None,
860                }
861            })
862            .collect();
863        let imbalance = total_debits - total_credits;
864        let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865        TrialBalance {
866            trial_balance_id: format!(
867                "{company_code}-{:04}{:02}",
868                self.fiscal_year, self.fiscal_period
869            ),
870            company_code: company_code.to_string(),
871            company_name: None,
872            as_of_date: self.period_end,
873            fiscal_year: self.fiscal_year as i32,
874            fiscal_period: self.fiscal_period as u32,
875            currency: currency.to_string(),
876            balance_type: TrialBalanceType::Adjusted,
877            lines,
878            total_debits,
879            total_credits,
880            is_balanced,
881            out_of_balance: imbalance,
882            is_equation_valid: is_balanced,
883            equation_difference: imbalance,
884            category_summary: std::collections::HashMap::new(),
885            created_at: self
886                .period_start
887                .and_hms_opt(0, 0, 0)
888                .expect("midnight is a valid time"),
889            created_by: "ORCHESTRATOR".to_string(),
890            approved_by: None,
891            approved_at: None,
892            status: TrialBalanceStatus::Final,
893        }
894    }
895}
896
897/// Financial reporting snapshot (financial statements + bank reconciliations).
898#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900    /// Financial statements (balance sheet, income statement, cash flow).
901    /// For multi-entity configs this includes all standalone statements.
902    pub financial_statements: Vec<FinancialStatement>,
903    /// Standalone financial statements keyed by entity code.
904    /// Each entity has its own slice of statements.
905    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
907    pub consolidated_statements: Vec<FinancialStatement>,
908    /// Consolidation schedules (one per period) showing pre/post elimination detail.
909    pub consolidation_schedules: Vec<ConsolidationSchedule>,
910    /// Bank reconciliations.
911    pub bank_reconciliations: Vec<BankReconciliation>,
912    /// Period-close trial balances (one per period).
913    pub trial_balances: Vec<PeriodTrialBalance>,
914    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
915    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
917    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
919    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
923#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925    /// Payroll runs (actual data).
926    pub payroll_runs: Vec<PayrollRun>,
927    /// Payroll line items (actual data).
928    pub payroll_line_items: Vec<PayrollLineItem>,
929    /// Time entries (actual data).
930    pub time_entries: Vec<TimeEntry>,
931    /// Expense reports (actual data).
932    pub expense_reports: Vec<ExpenseReport>,
933    /// Benefit enrollments (actual data).
934    pub benefit_enrollments: Vec<BenefitEnrollment>,
935    /// Defined benefit pension plans (IAS 19 / ASC 715).
936    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937    /// Pension obligation (DBO) roll-forwards.
938    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939    /// Plan asset roll-forwards.
940    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941    /// Pension disclosures.
942    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943    /// Journal entries generated from pension expense and OCI remeasurements.
944    pub pension_journal_entries: Vec<JournalEntry>,
945    /// Stock grants (ASC 718 / IFRS 2).
946    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947    /// Stock-based compensation period expense records.
948    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949    /// Journal entries generated from stock-based compensation expense.
950    pub stock_comp_journal_entries: Vec<JournalEntry>,
951    /// Payroll runs.
952    pub payroll_run_count: usize,
953    /// Payroll line item count.
954    pub payroll_line_item_count: usize,
955    /// Time entry count.
956    pub time_entry_count: usize,
957    /// Expense report count.
958    pub expense_report_count: usize,
959    /// Benefit enrollment count.
960    pub benefit_enrollment_count: usize,
961    /// Pension plan count.
962    pub pension_plan_count: usize,
963    /// Stock grant count.
964    pub stock_grant_count: usize,
965}
966
967/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
968#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970    /// Revenue recognition contracts (actual data).
971    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972    /// Impairment tests (actual data).
973    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974    /// Business combinations (IFRS 3 / ASC 805).
975    pub business_combinations:
976        Vec<datasynth_core::models::business_combination::BusinessCombination>,
977    /// Journal entries generated from business combinations (Day 1 + amortization).
978    pub business_combination_journal_entries: Vec<JournalEntry>,
979    /// ECL models (IFRS 9 / ASC 326).
980    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981    /// ECL provision movements.
982    pub ecl_provision_movements:
983        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984    /// Journal entries from ECL provision.
985    pub ecl_journal_entries: Vec<JournalEntry>,
986    /// Provisions (IAS 37 / ASC 450).
987    pub provisions: Vec<datasynth_core::models::provision::Provision>,
988    /// Provision movement roll-forwards (IAS 37 / ASC 450).
989    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990    /// Contingent liabilities (IAS 37 / ASC 450).
991    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992    /// Journal entries from provisions.
993    pub provision_journal_entries: Vec<JournalEntry>,
994    /// IAS 21 functional currency translation results (one per entity per period).
995    pub currency_translation_results:
996        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997    /// Revenue recognition contract count.
998    pub revenue_contract_count: usize,
999    /// Impairment test count.
1000    pub impairment_test_count: usize,
1001    /// Business combination count.
1002    pub business_combination_count: usize,
1003    /// ECL model count.
1004    pub ecl_model_count: usize,
1005    /// Provision count.
1006    pub provision_count: usize,
1007    /// Currency translation result count (IAS 21).
1008    pub currency_translation_count: usize,
1009    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1010    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1011    /// ROU asset + lease liability details.
1012    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1014    pub fair_value_measurements:
1015        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016    /// Framework difference records (dual-reporting only).
1017    pub framework_differences:
1018        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019    /// Per-entity framework reconciliation (dual-reporting only).
1020    pub framework_reconciliations:
1021        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022    /// Counts for stats logging.
1023    pub lease_count: usize,
1024    pub fair_value_measurement_count: usize,
1025    pub framework_difference_count: usize,
1026}
1027
1028/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1029#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031    /// Flattened standard records for output.
1032    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033    /// Cross-reference records.
1034    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035    /// Jurisdiction profile records.
1036    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037    /// Generated audit procedures.
1038    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039    /// Generated compliance findings.
1040    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041    /// Generated regulatory filings.
1042    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043    /// Compliance graph (if graph integration enabled).
1044    pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1048#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050    /// Production orders (actual data).
1051    pub production_orders: Vec<ProductionOrder>,
1052    /// Quality inspections (actual data).
1053    pub quality_inspections: Vec<QualityInspection>,
1054    /// Cycle counts (actual data).
1055    pub cycle_counts: Vec<CycleCount>,
1056    /// BOM components (actual data).
1057    pub bom_components: Vec<BomComponent>,
1058    /// Inventory movements (actual data).
1059    pub inventory_movements: Vec<InventoryMovement>,
1060    /// Production order count.
1061    pub production_order_count: usize,
1062    /// Quality inspection count.
1063    pub quality_inspection_count: usize,
1064    /// Cycle count count.
1065    pub cycle_count_count: usize,
1066    /// BOM component count.
1067    pub bom_component_count: usize,
1068    /// Inventory movement count.
1069    pub inventory_movement_count: usize,
1070}
1071
1072/// Sales, KPI, and budget data snapshot.
1073#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075    /// Sales quotes (actual data).
1076    pub sales_quotes: Vec<SalesQuote>,
1077    /// Management KPIs (actual data).
1078    pub kpis: Vec<ManagementKpi>,
1079    /// Budgets (actual data).
1080    pub budgets: Vec<Budget>,
1081    /// Sales quote count.
1082    pub sales_quote_count: usize,
1083    /// Management KPI count.
1084    pub kpi_count: usize,
1085    /// Budget line count.
1086    pub budget_line_count: usize,
1087}
1088
1089/// Anomaly labels generated during injection.
1090#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092    /// All anomaly labels.
1093    pub labels: Vec<LabeledAnomaly>,
1094    /// Summary statistics.
1095    pub summary: Option<AnomalySummary>,
1096    /// Count by anomaly type.
1097    pub by_type: HashMap<String, usize>,
1098}
1099
1100/// Balance validation results from running balance tracker.
1101#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103    /// Whether validation was performed.
1104    pub validated: bool,
1105    /// Whether balance sheet equation is satisfied.
1106    pub is_balanced: bool,
1107    /// Number of entries processed.
1108    pub entries_processed: u64,
1109    /// Total debits across all entries.
1110    pub total_debits: rust_decimal::Decimal,
1111    /// Total credits across all entries.
1112    pub total_credits: rust_decimal::Decimal,
1113    /// Number of accounts tracked.
1114    pub accounts_tracked: usize,
1115    /// Number of companies tracked.
1116    pub companies_tracked: usize,
1117    /// Validation errors encountered.
1118    pub validation_errors: Vec<ValidationError>,
1119    /// Whether any unbalanced entries were found.
1120    pub has_unbalanced_entries: bool,
1121}
1122
1123/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1124#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126    /// Tax jurisdictions.
1127    pub jurisdictions: Vec<TaxJurisdiction>,
1128    /// Tax codes.
1129    pub codes: Vec<TaxCode>,
1130    /// Tax lines computed on documents.
1131    pub tax_lines: Vec<TaxLine>,
1132    /// Tax returns filed per period.
1133    pub tax_returns: Vec<TaxReturn>,
1134    /// Tax provisions.
1135    pub tax_provisions: Vec<TaxProvision>,
1136    /// Withholding tax records.
1137    pub withholding_records: Vec<WithholdingTaxRecord>,
1138    /// Tax anomaly labels.
1139    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140    /// Jurisdiction count.
1141    pub jurisdiction_count: usize,
1142    /// Code count.
1143    pub code_count: usize,
1144    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1145    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146    /// Journal entries posting tax payable/receivable from computed tax lines.
1147    pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1151#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153    /// Group ownership structure (parent/subsidiary/associate relationships).
1154    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155    /// IC matched pairs (transaction pairs between related entities).
1156    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157    /// IC journal entries generated from matched pairs (seller side).
1158    pub seller_journal_entries: Vec<JournalEntry>,
1159    /// IC journal entries generated from matched pairs (buyer side).
1160    pub buyer_journal_entries: Vec<JournalEntry>,
1161    /// Elimination entries for consolidation.
1162    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163    /// NCI measurements derived from group structure ownership percentages.
1164    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1166    #[serde(skip)]
1167    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168    /// IC matched pair count.
1169    pub matched_pair_count: usize,
1170    /// IC elimination entry count.
1171    pub elimination_entry_count: usize,
1172    /// IC matching rate (0.0 to 1.0).
1173    pub match_rate: f64,
1174}
1175
1176/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1177#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179    /// Emission records (scope 1, 2, 3).
1180    pub emissions: Vec<EmissionRecord>,
1181    /// Energy consumption records.
1182    pub energy: Vec<EnergyConsumption>,
1183    /// Water usage records.
1184    pub water: Vec<WaterUsage>,
1185    /// Waste records.
1186    pub waste: Vec<WasteRecord>,
1187    /// Workforce diversity metrics.
1188    pub diversity: Vec<WorkforceDiversityMetric>,
1189    /// Pay equity metrics.
1190    pub pay_equity: Vec<PayEquityMetric>,
1191    /// Safety incidents.
1192    pub safety_incidents: Vec<SafetyIncident>,
1193    /// Safety metrics.
1194    pub safety_metrics: Vec<SafetyMetric>,
1195    /// Governance metrics.
1196    pub governance: Vec<GovernanceMetric>,
1197    /// Supplier ESG assessments.
1198    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199    /// Materiality assessments.
1200    pub materiality: Vec<MaterialityAssessment>,
1201    /// ESG disclosures.
1202    pub disclosures: Vec<EsgDisclosure>,
1203    /// Climate scenarios.
1204    pub climate_scenarios: Vec<ClimateScenario>,
1205    /// ESG anomaly labels.
1206    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207    /// Total emission record count.
1208    pub emission_count: usize,
1209    /// Total disclosure count.
1210    pub disclosure_count: usize,
1211}
1212
1213/// Treasury data snapshot (cash management, hedging, debt, pooling).
1214#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216    /// Cash positions (daily balances per account).
1217    pub cash_positions: Vec<CashPosition>,
1218    /// Cash forecasts.
1219    pub cash_forecasts: Vec<CashForecast>,
1220    /// Cash pools.
1221    pub cash_pools: Vec<CashPool>,
1222    /// Cash pool sweep transactions.
1223    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224    /// Hedging instruments.
1225    pub hedging_instruments: Vec<HedgingInstrument>,
1226    /// Hedge relationships (ASC 815/IFRS 9 designations).
1227    pub hedge_relationships: Vec<HedgeRelationship>,
1228    /// Debt instruments.
1229    pub debt_instruments: Vec<DebtInstrument>,
1230    /// Bank guarantees and letters of credit.
1231    pub bank_guarantees: Vec<BankGuarantee>,
1232    /// Intercompany netting runs.
1233    pub netting_runs: Vec<NettingRun>,
1234    /// Treasury anomaly labels.
1235    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236    /// Journal entries generated from treasury instruments (debt interest accruals,
1237    /// hedge MTM, cash pool sweeps).
1238    pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1242#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244    /// Projects with WBS hierarchies.
1245    pub projects: Vec<Project>,
1246    /// Project cost lines (linked from source documents).
1247    pub cost_lines: Vec<ProjectCostLine>,
1248    /// Revenue recognition records.
1249    pub revenue_records: Vec<ProjectRevenue>,
1250    /// Earned value metrics.
1251    pub earned_value_metrics: Vec<EarnedValueMetric>,
1252    /// Change orders.
1253    pub change_orders: Vec<ChangeOrder>,
1254    /// Project milestones.
1255    pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258/// Complete result of enhanced generation run.
1259#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261    /// Generated chart of accounts.
1262    pub chart_of_accounts: ChartOfAccounts,
1263    /// Master data snapshot.
1264    pub master_data: MasterDataSnapshot,
1265    /// Document flow snapshot.
1266    pub document_flows: DocumentFlowSnapshot,
1267    /// Subledger snapshot (linked from document flows).
1268    pub subledger: SubledgerSnapshot,
1269    /// OCPM event log snapshot (if OCPM generation enabled).
1270    pub ocpm: OcpmSnapshot,
1271    /// Audit data snapshot (if audit generation enabled).
1272    pub audit: AuditSnapshot,
1273    /// Banking KYC/AML data snapshot (if banking generation enabled).
1274    pub banking: BankingSnapshot,
1275    /// Graph export snapshot (if graph export enabled).
1276    pub graph_export: GraphExportSnapshot,
1277    /// S2C sourcing data snapshot (if sourcing generation enabled).
1278    pub sourcing: SourcingSnapshot,
1279    /// Financial reporting snapshot (financial statements + bank reconciliations).
1280    pub financial_reporting: FinancialReportingSnapshot,
1281    /// HR data snapshot (payroll, time entries, expenses).
1282    pub hr: HrSnapshot,
1283    /// Accounting standards snapshot (revenue recognition, impairment).
1284    pub accounting_standards: AccountingStandardsSnapshot,
1285    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1286    pub manufacturing: ManufacturingSnapshot,
1287    /// Sales, KPI, and budget snapshot.
1288    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1290    pub tax: TaxSnapshot,
1291    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1292    pub esg: EsgSnapshot,
1293    /// Treasury data snapshot (cash management, hedging, debt).
1294    pub treasury: TreasurySnapshot,
1295    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1296    pub project_accounting: ProjectAccountingSnapshot,
1297    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1298    pub process_evolution: Vec<ProcessEvolutionEvent>,
1299    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1300    pub organizational_events: Vec<OrganizationalEvent>,
1301    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1302    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1304    pub intercompany: IntercompanySnapshot,
1305    /// Generated journal entries.
1306    pub journal_entries: Vec<JournalEntry>,
1307    /// Anomaly labels (if injection enabled).
1308    pub anomaly_labels: AnomalyLabels,
1309    /// Balance validation results (if validation enabled).
1310    pub balance_validation: BalanceValidationResult,
1311    /// Data quality statistics (if injection enabled).
1312    pub data_quality_stats: DataQualityStats,
1313    /// Data quality issue records (if injection enabled).
1314    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315    /// Generation statistics.
1316    pub statistics: EnhancedGenerationStatistics,
1317    /// Data lineage graph (if tracking enabled).
1318    pub lineage: Option<super::lineage::LineageGraph>,
1319    /// Quality gate evaluation result.
1320    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321    /// Internal controls (if controls generation enabled).
1322    pub internal_controls: Vec<InternalControl>,
1323    /// SoD (Segregation of Duties) violations identified during control application.
1324    ///
1325    /// Each record corresponds to a journal entry where `sod_violation == true`.
1326    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327    /// Opening balances (if opening balance generation enabled).
1328    pub opening_balances: Vec<GeneratedOpeningBalance>,
1329    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1330    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331    /// Counterfactual (original, mutated) JE pairs for ML training.
1332    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333    /// Fraud red-flag indicators on P2P/O2C documents.
1334    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335    /// Collusion rings (coordinated fraud networks).
1336    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337    /// Bi-temporal version chains for vendor entities.
1338    pub temporal_vendor_chains:
1339        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340    /// Entity relationship graph (nodes + edges with strength scores).
1341    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342    /// Cross-process links (P2P ↔ O2C via inventory movements).
1343    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344    /// Industry-specific GL accounts and metadata.
1345    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346    /// SP5.2 — CoA semantic prior snapshot. When `Some`, `write_journal_entries_csv`
1347    /// builds a secondary lookup from the prior's 3,123 corpus accounts and uses
1348    /// it as a fallback when the synthetic CoA index misses a line's `gl_account`
1349    /// (common when SP3.7's per-source attribute conditional emits corpus account
1350    /// numbers that differ from the synthetic CoA master table's number set).
1351    pub coa_semantic_prior:
1352        Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1353    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1354    pub compliance_regulations: ComplianceRegulationsSnapshot,
1355    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1356    /// industry benchmarks, management reports, drift events). Empty
1357    /// when `analytics_metadata.enabled = false`.
1358    pub analytics_metadata: AnalyticsMetadataSnapshot,
1359    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1360    /// KS) over the generated amount distribution.  `None` when
1361    /// `distributions.validation.enabled = false`.
1362    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1363    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1364    /// customer value-segment labels, and industry-specific metadata
1365    /// populated from the previously-inert `vendor_network`,
1366    /// `customer_segmentation`, and `industry_specific` schema
1367    /// sections. Empty when those sections are disabled.
1368    pub interconnectivity: InterconnectivitySnapshot,
1369}
1370
1371/// v4.1.3+: interconnectivity snapshot. Populated when
1372/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1373/// `industry_specific.enabled` are set. Holds tier / segment / industry
1374/// labels for generated entities so downstream tooling (graph export,
1375/// risk models) can consume them without re-deriving from scratch.
1376#[derive(Debug, Clone, Default)]
1377pub struct InterconnectivitySnapshot {
1378    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1379    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1380    pub vendor_tiers: Vec<(String, u8)>,
1381    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1382    /// `"reliable_strategic" / "standard_operational" / "transactional"
1383    /// / "problematic"`.
1384    pub vendor_clusters: Vec<(String, String)>,
1385    /// `(customer_id, value_segment)` pairs where value_segment is one
1386    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1387    pub customer_value_segments: Vec<(String, String)>,
1388    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1389    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1390    /// "churned" / "won_back"`.
1391    pub customer_lifecycle_stages: Vec<(String, String)>,
1392    /// Summary: industry-specific knob applied, if any (e.g.
1393    /// `"manufacturing.bom_depth=3"`).
1394    pub industry_metadata: Vec<String>,
1395}
1396
1397/// v3.3.0: snapshot for the analytics-metadata phase.
1398#[derive(Debug, Clone, Default)]
1399pub struct AnalyticsMetadataSnapshot {
1400    /// Prior-year comparative balances per account, per entity.
1401    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1402    /// Industry benchmarks for the configured industry.
1403    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1404    /// Management-report artefacts (dashboards, MDA sections).
1405    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1406    /// Drift-event labels emitted from the post-generation sweep.
1407    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1408}
1409
1410/// Enhanced statistics about a generation run.
1411#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1412pub struct EnhancedGenerationStatistics {
1413    /// Total journal entries generated.
1414    pub total_entries: u64,
1415    /// Total line items generated.
1416    pub total_line_items: u64,
1417    /// Number of accounts in CoA.
1418    pub accounts_count: usize,
1419    /// Number of companies.
1420    pub companies_count: usize,
1421    /// Period in months.
1422    pub period_months: u32,
1423    /// Master data counts.
1424    pub vendor_count: usize,
1425    pub customer_count: usize,
1426    pub material_count: usize,
1427    pub asset_count: usize,
1428    pub employee_count: usize,
1429    /// Document flow counts.
1430    pub p2p_chain_count: usize,
1431    pub o2c_chain_count: usize,
1432    /// Subledger counts.
1433    pub ap_invoice_count: usize,
1434    pub ar_invoice_count: usize,
1435    /// OCPM counts.
1436    pub ocpm_event_count: usize,
1437    pub ocpm_object_count: usize,
1438    pub ocpm_case_count: usize,
1439    /// Audit counts.
1440    pub audit_engagement_count: usize,
1441    pub audit_workpaper_count: usize,
1442    pub audit_evidence_count: usize,
1443    pub audit_risk_count: usize,
1444    pub audit_finding_count: usize,
1445    pub audit_judgment_count: usize,
1446    /// ISA 505 confirmation counts.
1447    #[serde(default)]
1448    pub audit_confirmation_count: usize,
1449    #[serde(default)]
1450    pub audit_confirmation_response_count: usize,
1451    /// ISA 330/530 procedure step and sample counts.
1452    #[serde(default)]
1453    pub audit_procedure_step_count: usize,
1454    #[serde(default)]
1455    pub audit_sample_count: usize,
1456    /// ISA 520 analytical procedure counts.
1457    #[serde(default)]
1458    pub audit_analytical_result_count: usize,
1459    /// ISA 610 internal audit counts.
1460    #[serde(default)]
1461    pub audit_ia_function_count: usize,
1462    #[serde(default)]
1463    pub audit_ia_report_count: usize,
1464    /// ISA 550 related party counts.
1465    #[serde(default)]
1466    pub audit_related_party_count: usize,
1467    #[serde(default)]
1468    pub audit_related_party_transaction_count: usize,
1469    /// Anomaly counts.
1470    pub anomalies_injected: usize,
1471    /// Data quality issue counts.
1472    pub data_quality_issues: usize,
1473    /// Banking counts.
1474    pub banking_customer_count: usize,
1475    pub banking_account_count: usize,
1476    pub banking_transaction_count: usize,
1477    pub banking_suspicious_count: usize,
1478    /// Graph export counts.
1479    pub graph_export_count: usize,
1480    pub graph_node_count: usize,
1481    pub graph_edge_count: usize,
1482    /// LLM enrichment timing (milliseconds).
1483    #[serde(default)]
1484    pub llm_enrichment_ms: u64,
1485    /// Number of vendor names enriched by LLM.
1486    #[serde(default)]
1487    pub llm_vendors_enriched: usize,
1488    /// v4.1.1+: number of customer names enriched by LLM.
1489    #[serde(default)]
1490    pub llm_customers_enriched: usize,
1491    /// v4.1.1+: number of material descriptions enriched by LLM.
1492    #[serde(default)]
1493    pub llm_materials_enriched: usize,
1494    /// v4.1.1+: number of audit finding titles enriched by LLM.
1495    #[serde(default)]
1496    pub llm_findings_enriched: usize,
1497    /// Diffusion enhancement timing (milliseconds).
1498    #[serde(default)]
1499    pub diffusion_enhancement_ms: u64,
1500    /// Number of diffusion samples generated.
1501    #[serde(default)]
1502    pub diffusion_samples_generated: usize,
1503    /// Hybrid-diffusion blend weight actually applied (after clamp to \[0,1\]).
1504    /// `None` when the neural/hybrid backend is not active.
1505    #[serde(default, skip_serializing_if = "Option::is_none")]
1506    pub neural_hybrid_weight: Option<f64>,
1507    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1508    #[serde(default, skip_serializing_if = "Option::is_none")]
1509    pub neural_hybrid_strategy: Option<String>,
1510    /// How many columns were routed through the neural backend.
1511    #[serde(default, skip_serializing_if = "Option::is_none")]
1512    pub neural_routed_column_count: Option<usize>,
1513    /// Causal generation timing (milliseconds).
1514    #[serde(default)]
1515    pub causal_generation_ms: u64,
1516    /// Number of causal samples generated.
1517    #[serde(default)]
1518    pub causal_samples_generated: usize,
1519    /// Whether causal validation passed.
1520    #[serde(default)]
1521    pub causal_validation_passed: Option<bool>,
1522    /// S2C sourcing counts.
1523    #[serde(default)]
1524    pub sourcing_project_count: usize,
1525    #[serde(default)]
1526    pub rfx_event_count: usize,
1527    #[serde(default)]
1528    pub bid_count: usize,
1529    #[serde(default)]
1530    pub contract_count: usize,
1531    #[serde(default)]
1532    pub catalog_item_count: usize,
1533    #[serde(default)]
1534    pub scorecard_count: usize,
1535    /// Financial reporting counts.
1536    #[serde(default)]
1537    pub financial_statement_count: usize,
1538    #[serde(default)]
1539    pub bank_reconciliation_count: usize,
1540    /// HR counts.
1541    #[serde(default)]
1542    pub payroll_run_count: usize,
1543    #[serde(default)]
1544    pub time_entry_count: usize,
1545    #[serde(default)]
1546    pub expense_report_count: usize,
1547    #[serde(default)]
1548    pub benefit_enrollment_count: usize,
1549    #[serde(default)]
1550    pub pension_plan_count: usize,
1551    #[serde(default)]
1552    pub stock_grant_count: usize,
1553    /// Accounting standards counts.
1554    #[serde(default)]
1555    pub revenue_contract_count: usize,
1556    #[serde(default)]
1557    pub impairment_test_count: usize,
1558    #[serde(default)]
1559    pub business_combination_count: usize,
1560    #[serde(default)]
1561    pub ecl_model_count: usize,
1562    #[serde(default)]
1563    pub provision_count: usize,
1564    /// Manufacturing counts.
1565    #[serde(default)]
1566    pub production_order_count: usize,
1567    #[serde(default)]
1568    pub quality_inspection_count: usize,
1569    #[serde(default)]
1570    pub cycle_count_count: usize,
1571    #[serde(default)]
1572    pub bom_component_count: usize,
1573    #[serde(default)]
1574    pub inventory_movement_count: usize,
1575    /// Sales & reporting counts.
1576    #[serde(default)]
1577    pub sales_quote_count: usize,
1578    #[serde(default)]
1579    pub kpi_count: usize,
1580    #[serde(default)]
1581    pub budget_line_count: usize,
1582    /// Tax counts.
1583    #[serde(default)]
1584    pub tax_jurisdiction_count: usize,
1585    #[serde(default)]
1586    pub tax_code_count: usize,
1587    /// ESG counts.
1588    #[serde(default)]
1589    pub esg_emission_count: usize,
1590    #[serde(default)]
1591    pub esg_disclosure_count: usize,
1592    /// Intercompany counts.
1593    #[serde(default)]
1594    pub ic_matched_pair_count: usize,
1595    #[serde(default)]
1596    pub ic_elimination_count: usize,
1597    /// Number of intercompany journal entries (seller + buyer side).
1598    #[serde(default)]
1599    pub ic_transaction_count: usize,
1600    /// Number of fixed asset subledger records.
1601    #[serde(default)]
1602    pub fa_subledger_count: usize,
1603    /// Number of inventory subledger records.
1604    #[serde(default)]
1605    pub inventory_subledger_count: usize,
1606    /// Treasury debt instrument count.
1607    #[serde(default)]
1608    pub treasury_debt_instrument_count: usize,
1609    /// Treasury hedging instrument count.
1610    #[serde(default)]
1611    pub treasury_hedging_instrument_count: usize,
1612    /// Project accounting project count.
1613    #[serde(default)]
1614    pub project_count: usize,
1615    /// Project accounting change order count.
1616    #[serde(default)]
1617    pub project_change_order_count: usize,
1618    /// Tax provision count.
1619    #[serde(default)]
1620    pub tax_provision_count: usize,
1621    /// Opening balance count.
1622    #[serde(default)]
1623    pub opening_balance_count: usize,
1624    /// Subledger reconciliation count.
1625    #[serde(default)]
1626    pub subledger_reconciliation_count: usize,
1627    /// Tax line count.
1628    #[serde(default)]
1629    pub tax_line_count: usize,
1630    /// Project cost line count.
1631    #[serde(default)]
1632    pub project_cost_line_count: usize,
1633    /// Cash position count.
1634    #[serde(default)]
1635    pub cash_position_count: usize,
1636    /// Cash forecast count.
1637    #[serde(default)]
1638    pub cash_forecast_count: usize,
1639    /// Cash pool count.
1640    #[serde(default)]
1641    pub cash_pool_count: usize,
1642    /// Process evolution event count.
1643    #[serde(default)]
1644    pub process_evolution_event_count: usize,
1645    /// Organizational event count.
1646    #[serde(default)]
1647    pub organizational_event_count: usize,
1648    /// Counterfactual pair count.
1649    #[serde(default)]
1650    pub counterfactual_pair_count: usize,
1651    /// Number of fraud red-flag indicators generated.
1652    #[serde(default)]
1653    pub red_flag_count: usize,
1654    /// Number of collusion rings generated.
1655    #[serde(default)]
1656    pub collusion_ring_count: usize,
1657    /// Number of bi-temporal vendor version chains generated.
1658    #[serde(default)]
1659    pub temporal_version_chain_count: usize,
1660    /// Number of nodes in the entity relationship graph.
1661    #[serde(default)]
1662    pub entity_relationship_node_count: usize,
1663    /// Number of edges in the entity relationship graph.
1664    #[serde(default)]
1665    pub entity_relationship_edge_count: usize,
1666    /// Number of cross-process links generated.
1667    #[serde(default)]
1668    pub cross_process_link_count: usize,
1669    /// Number of disruption events generated.
1670    #[serde(default)]
1671    pub disruption_event_count: usize,
1672    /// Number of industry-specific GL accounts generated.
1673    #[serde(default)]
1674    pub industry_gl_account_count: usize,
1675    /// Number of period-close journal entries generated (tax provision + closing entries).
1676    #[serde(default)]
1677    pub period_close_je_count: usize,
1678}
1679
1680/// Enhanced orchestrator with full feature integration.
1681pub struct EnhancedOrchestrator {
1682    config: GeneratorConfig,
1683    phase_config: PhaseConfig,
1684    coa: Option<Arc<ChartOfAccounts>>,
1685    master_data: MasterDataSnapshot,
1686    seed: u64,
1687    multi_progress: Option<MultiProgress>,
1688    /// Resource guard for memory, disk, and CPU monitoring
1689    resource_guard: ResourceGuard,
1690    /// Output path for disk space monitoring
1691    output_path: Option<PathBuf>,
1692    /// Copula generators for preserving correlations (from fingerprint)
1693    copula_generators: Vec<CopulaGeneratorSpec>,
1694    /// Country pack registry for localized data generation
1695    country_pack_registry: datasynth_core::CountryPackRegistry,
1696    /// Optional streaming sink for phase-by-phase output
1697    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1698    /// Shared template provider for user-supplied template packs.
1699    ///
1700    /// Constructed from `config.templates.path` at orchestrator creation
1701    /// time. When the path is `None`, this is still populated with an
1702    /// embedded-only provider so generators can always call trait methods
1703    /// without an `Option<…>` guard. v3.2.0+.
1704    template_provider: datasynth_core::templates::SharedTemplateProvider,
1705    /// v3.4.1+ temporal context for business-day / holiday awareness.
1706    ///
1707    /// Populated only when `temporal_patterns.business_days.enabled`. When
1708    /// `None`, document-flow / HR / treasury / period-close generators keep
1709    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1710    /// for the same seed).
1711    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1712    /// Optional shard-mode context (set by group-engine shard runners).
1713    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1714    shard_context: Option<crate::shard_context::ShardContext>,
1715    /// SP3.12 — cached priors, shared between `generate_journal_entries` (which
1716    /// loads them) and `generate_jes_from_document_flows` (which applies padding).
1717    /// Set once after the SP3 opt-in block in `generate_journal_entries`.
1718    cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1719}
1720
1721impl EnhancedOrchestrator {
1722    /// Create a new enhanced orchestrator.
1723    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1724        datasynth_config::validate_config(&config)?;
1725
1726        let seed = config.global.seed.unwrap_or_else(rand::random);
1727
1728        // Build resource guard from config
1729        let resource_guard = Self::build_resource_guard(&config, None);
1730
1731        // Build country pack registry from config
1732        let country_pack_registry = match &config.country_packs {
1733            Some(cp) => {
1734                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1735                    .map_err(|e| SynthError::config(e.to_string()))?
1736            }
1737            None => datasynth_core::CountryPackRegistry::builtin_only()
1738                .map_err(|e| SynthError::config(e.to_string()))?,
1739        };
1740
1741        // Build the shared template provider from config.templates.path.
1742        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1743        // `Some(path)` → load file/dir and honour `merge_strategy`.
1744        let template_provider = Self::build_template_provider(&config)?;
1745
1746        // v3.4.1: build a shared temporal context when
1747        // `temporal_patterns.business_days.enabled`. `None` preserves the
1748        // raw-RNG date-offset behaviour per-generator.
1749        let temporal_context = Self::build_temporal_context(&config)?;
1750
1751        Ok(Self {
1752            config,
1753            phase_config,
1754            coa: None,
1755            master_data: MasterDataSnapshot::default(),
1756            seed,
1757            multi_progress: None,
1758            resource_guard,
1759            output_path: None,
1760            copula_generators: Vec::new(),
1761            country_pack_registry,
1762            phase_sink: None,
1763            template_provider,
1764            temporal_context,
1765            shard_context: None,
1766            cached_priors: None,
1767        })
1768    }
1769
1770    /// Install shard-mode context.  Called by the group shard runner
1771    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1772    /// entry point).  Has no effect on single-entity runs.
1773    ///
1774    /// See [`crate::shard_context::ShardContext`] for rationale.
1775    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1776        self.shard_context = Some(ctx);
1777    }
1778
1779    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1780    ///
1781    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1782    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1783    /// enabled. Returns `Err` only for unrecoverable config errors.
1784    fn build_temporal_context(
1785        config: &GeneratorConfig,
1786    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1787        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1788
1789        let tp = &config.temporal_patterns;
1790        if !tp.enabled || !tp.business_days.enabled {
1791            return Ok(None);
1792        }
1793
1794        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1795            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1796        let end_date = start_date + chrono::Months::new(config.global.period_months);
1797
1798        let region_code = tp
1799            .calendars
1800            .regions
1801            .first()
1802            .cloned()
1803            .unwrap_or_else(|| "US".to_string());
1804        let region = parse_region_code(&region_code);
1805
1806        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1807    }
1808
1809    /// Build the shared template provider from `config.templates`.
1810    ///
1811    /// Always returns a provider — falls back to embedded-only when
1812    /// `config.templates.path` is `None`. The merge-strategy from config
1813    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1814    /// orchestrator-construction time are fatal (preferable to silently
1815    /// using embedded pools when the user supplied a bad path).
1816    fn build_template_provider(
1817        config: &GeneratorConfig,
1818    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1819        use datasynth_core::templates::{
1820            loader::{MergeStrategy, TemplateLoader},
1821            DefaultTemplateProvider,
1822        };
1823        use std::sync::Arc;
1824
1825        let provider = match &config.templates.path {
1826            None => DefaultTemplateProvider::new(),
1827            Some(path) => {
1828                let data = if path.is_dir() {
1829                    TemplateLoader::load_from_directory(path)
1830                } else {
1831                    TemplateLoader::load_from_file(path)
1832                }
1833                .map_err(|e| {
1834                    SynthError::config(format!(
1835                        "Failed to load templates from {}: {e}",
1836                        path.display()
1837                    ))
1838                })?;
1839                let strategy = match config.templates.merge_strategy {
1840                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1841                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1842                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1843                        MergeStrategy::MergePreferFile
1844                    }
1845                };
1846                DefaultTemplateProvider::with_templates(data, strategy)
1847            }
1848        };
1849        Ok(Arc::new(provider))
1850    }
1851
1852    /// Create with default phase config.
1853    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1854        Self::new(config, PhaseConfig::default())
1855    }
1856
1857    /// Set a streaming phase sink for real-time output (builder pattern).
1858    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1859        self.phase_sink = Some(sink);
1860        self
1861    }
1862
1863    /// Set a streaming phase sink on an existing orchestrator.
1864    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1865        self.phase_sink = Some(sink);
1866    }
1867
1868    /// Emit a batch of items to the phase sink (if configured).
1869    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1870        if let Some(ref sink) = self.phase_sink {
1871            for item in items {
1872                if let Ok(value) = serde_json::to_value(item) {
1873                    if let Err(e) = sink.emit(phase, type_name, &value) {
1874                        warn!(
1875                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1876                        );
1877                    }
1878                }
1879            }
1880            if let Err(e) = sink.phase_complete(phase) {
1881                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1882            }
1883        }
1884    }
1885
1886    /// Enable/disable progress bars.
1887    pub fn with_progress(mut self, show: bool) -> Self {
1888        self.phase_config.show_progress = show;
1889        if show {
1890            self.multi_progress = Some(MultiProgress::new());
1891        }
1892        self
1893    }
1894
1895    /// Set the output path for disk space monitoring.
1896    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1897        let path = path.into();
1898        self.output_path = Some(path.clone());
1899        // Rebuild resource guard with the output path
1900        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1901        self
1902    }
1903
1904    /// Access the country pack registry.
1905    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1906        &self.country_pack_registry
1907    }
1908
1909    /// Look up a country pack by country code string.
1910    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1911        self.country_pack_registry.get_by_str(country)
1912    }
1913
1914    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1915    /// company, defaulting to `"US"` if no companies are configured.
1916    fn primary_country_code(&self) -> &str {
1917        self.config
1918            .companies
1919            .first()
1920            .map(|c| c.country.as_str())
1921            .unwrap_or("US")
1922    }
1923
1924    /// Resolve the country pack for the primary (first) company.
1925    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1926        self.country_pack_for(self.primary_country_code())
1927    }
1928
1929    /// Resolve the CoA framework from config/country-pack.
1930    fn resolve_coa_framework(&self) -> CoAFramework {
1931        if self.config.accounting_standards.enabled {
1932            match self.config.accounting_standards.framework {
1933                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1934                    return CoAFramework::FrenchPcg;
1935                }
1936                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1937                    return CoAFramework::GermanSkr04;
1938                }
1939                _ => {}
1940            }
1941        }
1942        // Fallback: derive from country pack
1943        let pack = self.primary_pack();
1944        match pack.accounting.framework.as_str() {
1945            "french_gaap" => CoAFramework::FrenchPcg,
1946            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1947            _ => CoAFramework::UsGaap,
1948        }
1949    }
1950
1951    /// Check if copula generators are available.
1952    ///
1953    /// Returns true if the orchestrator has copula generators for preserving
1954    /// correlations (typically from fingerprint-based generation).
1955    pub fn has_copulas(&self) -> bool {
1956        !self.copula_generators.is_empty()
1957    }
1958
1959    /// Get the copula generators.
1960    ///
1961    /// Returns a reference to the copula generators for use during generation.
1962    /// These can be used to generate correlated samples that preserve the
1963    /// statistical relationships from the source data.
1964    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1965        &self.copula_generators
1966    }
1967
1968    /// Get a mutable reference to the copula generators.
1969    ///
1970    /// Allows generators to sample from copulas during data generation.
1971    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1972        &mut self.copula_generators
1973    }
1974
1975    /// Sample correlated values from a named copula.
1976    ///
1977    /// Returns None if the copula doesn't exist.
1978    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1979        self.copula_generators
1980            .iter_mut()
1981            .find(|c| c.name == copula_name)
1982            .map(|c| c.generator.sample())
1983    }
1984
1985    /// Create an orchestrator from a fingerprint file.
1986    ///
1987    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1988    /// and creates an orchestrator configured to generate data matching
1989    /// the statistical properties of the original data.
1990    ///
1991    /// # Arguments
1992    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1993    /// * `phase_config` - Phase configuration for generation
1994    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1995    ///
1996    /// # Example
1997    /// ```no_run
1998    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1999    /// use std::path::Path;
2000    ///
2001    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
2002    ///     Path::new("fingerprint.dsf"),
2003    ///     PhaseConfig::default(),
2004    ///     1.0,
2005    /// ).unwrap();
2006    /// ```
2007    pub fn from_fingerprint(
2008        fingerprint_path: &std::path::Path,
2009        phase_config: PhaseConfig,
2010        scale: f64,
2011    ) -> SynthResult<Self> {
2012        info!("Loading fingerprint from: {}", fingerprint_path.display());
2013
2014        // Read the fingerprint
2015        let reader = FingerprintReader::new();
2016        let fingerprint = reader
2017            .read_from_file(fingerprint_path)
2018            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2019
2020        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2021    }
2022
2023    /// Create an orchestrator from a loaded fingerprint.
2024    ///
2025    /// # Arguments
2026    /// * `fingerprint` - The loaded fingerprint
2027    /// * `phase_config` - Phase configuration for generation
2028    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2029    pub fn from_fingerprint_data(
2030        fingerprint: Fingerprint,
2031        phase_config: PhaseConfig,
2032        scale: f64,
2033    ) -> SynthResult<Self> {
2034        info!(
2035            "Synthesizing config from fingerprint (version: {}, tables: {})",
2036            fingerprint.manifest.version,
2037            fingerprint.schema.tables.len()
2038        );
2039
2040        // Generate a seed for the synthesis
2041        let seed: u64 = rand::random();
2042        info!("Fingerprint synthesis seed: {}", seed);
2043
2044        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2045        let options = SynthesisOptions {
2046            scale,
2047            seed: Some(seed),
2048            preserve_correlations: true,
2049            inject_anomalies: true,
2050        };
2051        let synthesizer = ConfigSynthesizer::with_options(options);
2052
2053        // Synthesize full result including copula generators
2054        let synthesis_result = synthesizer
2055            .synthesize_full(&fingerprint, seed)
2056            .map_err(|e| {
2057                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2058            })?;
2059
2060        // Start with a base config from the fingerprint's industry if available
2061        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2062            Self::base_config_for_industry(industry)
2063        } else {
2064            Self::base_config_for_industry("manufacturing")
2065        };
2066
2067        // Apply the synthesized patches
2068        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2069
2070        // Log synthesis results
2071        info!(
2072            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2073            fingerprint.schema.tables.len(),
2074            scale,
2075            synthesis_result.copula_generators.len()
2076        );
2077
2078        if !synthesis_result.copula_generators.is_empty() {
2079            for spec in &synthesis_result.copula_generators {
2080                info!(
2081                    "  Copula '{}' for table '{}': {} columns",
2082                    spec.name,
2083                    spec.table,
2084                    spec.columns.len()
2085                );
2086            }
2087        }
2088
2089        // Create the orchestrator with the synthesized config
2090        let mut orchestrator = Self::new(config, phase_config)?;
2091
2092        // Store copula generators for use during generation
2093        orchestrator.copula_generators = synthesis_result.copula_generators;
2094
2095        Ok(orchestrator)
2096    }
2097
2098    /// Create a base config for a given industry.
2099    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2100        use datasynth_config::presets::create_preset;
2101        use datasynth_config::TransactionVolume;
2102        use datasynth_core::models::{CoAComplexity, IndustrySector};
2103
2104        let sector = match industry.to_lowercase().as_str() {
2105            "manufacturing" => IndustrySector::Manufacturing,
2106            "retail" => IndustrySector::Retail,
2107            "financial" | "financial_services" => IndustrySector::FinancialServices,
2108            "healthcare" => IndustrySector::Healthcare,
2109            "technology" | "tech" => IndustrySector::Technology,
2110            _ => IndustrySector::Manufacturing,
2111        };
2112
2113        // Create a preset with reasonable defaults
2114        create_preset(
2115            sector,
2116            1,  // company count
2117            12, // period months
2118            CoAComplexity::Medium,
2119            TransactionVolume::TenK,
2120        )
2121    }
2122
2123    /// Apply a config patch to a GeneratorConfig.
2124    fn apply_config_patch(
2125        mut config: GeneratorConfig,
2126        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2127    ) -> GeneratorConfig {
2128        use datasynth_fingerprint::synthesis::ConfigValue;
2129
2130        for (key, value) in patch.values() {
2131            match (key.as_str(), value) {
2132                // Transaction count is handled via TransactionVolume enum on companies
2133                // Log it but cannot directly set it (would need to modify company volumes)
2134                ("transactions.count", ConfigValue::Integer(n)) => {
2135                    info!(
2136                        "Fingerprint suggests {} transactions (apply via company volumes)",
2137                        n
2138                    );
2139                }
2140                ("global.period_months", ConfigValue::Integer(n)) => {
2141                    config.global.period_months = (*n).clamp(1, 120) as u32;
2142                }
2143                ("global.start_date", ConfigValue::String(s)) => {
2144                    config.global.start_date = s.clone();
2145                }
2146                ("global.seed", ConfigValue::Integer(n)) => {
2147                    config.global.seed = Some(*n as u64);
2148                }
2149                ("fraud.enabled", ConfigValue::Bool(b)) => {
2150                    config.fraud.enabled = *b;
2151                }
2152                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2153                    config.fraud.fraud_rate = *f;
2154                }
2155                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2156                    config.data_quality.enabled = *b;
2157                }
2158                // Handle anomaly injection paths (mapped to fraud config)
2159                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2160                    config.fraud.enabled = *b;
2161                }
2162                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2163                    config.fraud.fraud_rate = *f;
2164                }
2165                _ => {
2166                    debug!("Ignoring unknown config patch key: {}", key);
2167                }
2168            }
2169        }
2170
2171        config
2172    }
2173
2174    /// Build a resource guard from the configuration.
2175    fn build_resource_guard(
2176        config: &GeneratorConfig,
2177        output_path: Option<PathBuf>,
2178    ) -> ResourceGuard {
2179        let mut builder = ResourceGuardBuilder::new();
2180
2181        // Configure memory limit if set
2182        if config.global.memory_limit_mb > 0 {
2183            builder = builder.memory_limit(config.global.memory_limit_mb);
2184        }
2185
2186        // Configure disk monitoring for output path
2187        if let Some(path) = output_path {
2188            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2189        }
2190
2191        // Use conservative degradation settings for production safety
2192        builder = builder.conservative();
2193
2194        builder.build()
2195    }
2196
2197    /// Check resources (memory, disk, CPU) and return degradation level.
2198    ///
2199    /// Returns an error if hard limits are exceeded.
2200    /// Returns Ok(DegradationLevel) indicating current resource state.
2201    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2202        self.resource_guard.check()
2203    }
2204
2205    /// Check resources with logging.
2206    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2207        let level = self.resource_guard.check()?;
2208
2209        if level != DegradationLevel::Normal {
2210            warn!(
2211                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2212                phase,
2213                level,
2214                self.resource_guard.current_memory_mb(),
2215                self.resource_guard.available_disk_mb()
2216            );
2217        }
2218
2219        Ok(level)
2220    }
2221
2222    /// Get current degradation actions based on resource state.
2223    fn get_degradation_actions(&self) -> DegradationActions {
2224        self.resource_guard.get_actions()
2225    }
2226
2227    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2228    fn check_memory_limit(&self) -> SynthResult<()> {
2229        self.check_resources()?;
2230        Ok(())
2231    }
2232
2233    /// Run the complete generation workflow.
2234    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2235        info!("Starting enhanced generation workflow");
2236        info!(
2237            "Config: industry={:?}, period_months={}, companies={}",
2238            self.config.global.industry,
2239            self.config.global.period_months,
2240            self.config.companies.len()
2241        );
2242
2243        // Set decimal serialization mode (thread-local, affects JSON output).
2244        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2245        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2246        datasynth_core::serde_decimal::set_numeric_native(is_native);
2247        struct NumericModeGuard;
2248        impl Drop for NumericModeGuard {
2249            fn drop(&mut self) {
2250                datasynth_core::serde_decimal::set_numeric_native(false);
2251            }
2252        }
2253        let _numeric_guard = if is_native {
2254            Some(NumericModeGuard)
2255        } else {
2256            None
2257        };
2258
2259        // Initial resource check before starting
2260        let initial_level = self.check_resources_with_log("initial")?;
2261        if initial_level == DegradationLevel::Emergency {
2262            return Err(SynthError::resource(
2263                "Insufficient resources to start generation",
2264            ));
2265        }
2266
2267        let mut stats = EnhancedGenerationStatistics {
2268            companies_count: self.config.companies.len(),
2269            period_months: self.config.global.period_months,
2270            ..Default::default()
2271        };
2272
2273        // Phase 1: Chart of Accounts
2274        let coa = self.phase_chart_of_accounts(&mut stats)?;
2275
2276        // Phase 2: Master Data
2277        self.phase_master_data(&mut stats)?;
2278
2279        // Emit master data to stream sink
2280        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2281        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2282        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2283
2284        // Phase 3: Document Flows + Subledger Linking
2285        let (mut document_flows, mut subledger, fa_journal_entries) =
2286            self.phase_document_flows(&mut stats)?;
2287
2288        // Emit document flows to stream sink
2289        self.emit_phase_items(
2290            "document_flows",
2291            "PurchaseOrder",
2292            &document_flows.purchase_orders,
2293        );
2294        self.emit_phase_items(
2295            "document_flows",
2296            "GoodsReceipt",
2297            &document_flows.goods_receipts,
2298        );
2299        self.emit_phase_items(
2300            "document_flows",
2301            "VendorInvoice",
2302            &document_flows.vendor_invoices,
2303        );
2304        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2305        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2306
2307        // Phase 3b: Opening Balances (before JE generation)
2308        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2309
2310        // Phase 3c: Convert opening balances to journal entries and prepend them.
2311        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2312        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2313        // balance map type.
2314        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2315            .iter()
2316            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2317            .collect();
2318        if !opening_balance_jes.is_empty() {
2319            debug!(
2320                "Prepending {} opening balance JEs to entries",
2321                opening_balance_jes.len()
2322            );
2323        }
2324
2325        // Phase 4: Journal Entries
2326        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2327
2328        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2329        // starts from the correct initial state.
2330        if !opening_balance_jes.is_empty() {
2331            let mut combined = opening_balance_jes;
2332            combined.extend(entries);
2333            entries = combined;
2334        }
2335
2336        // Phase 4c: Append FA acquisition journal entries to main entries
2337        if !fa_journal_entries.is_empty() {
2338            debug!(
2339                "Appending {} FA acquisition JEs to main entries",
2340                fa_journal_entries.len()
2341            );
2342            entries.extend(fa_journal_entries);
2343        }
2344
2345        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2346        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2347
2348        // Get current degradation actions for optional phases
2349        let actions = self.get_degradation_actions();
2350
2351        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2352        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2353
2354        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2355        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2356        if !sourcing.contracts.is_empty() {
2357            let mut linked_count = 0usize;
2358            // Collect (vendor_id, po_id) pairs from P2P chains
2359            let po_vendor_pairs: Vec<(String, String)> = document_flows
2360                .p2p_chains
2361                .iter()
2362                .map(|chain| {
2363                    (
2364                        chain.purchase_order.vendor_id.clone(),
2365                        chain.purchase_order.header.document_id.clone(),
2366                    )
2367                })
2368                .collect();
2369
2370            for chain in &mut document_flows.p2p_chains {
2371                if chain.purchase_order.contract_id.is_none() {
2372                    if let Some(contract) = sourcing
2373                        .contracts
2374                        .iter()
2375                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2376                    {
2377                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2378                        linked_count += 1;
2379                    }
2380                }
2381            }
2382
2383            // Populate reverse FK: purchase_order_ids on each contract
2384            for contract in &mut sourcing.contracts {
2385                let po_ids: Vec<String> = po_vendor_pairs
2386                    .iter()
2387                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2388                    .map(|(_, po_id)| po_id.clone())
2389                    .collect();
2390                if !po_ids.is_empty() {
2391                    contract.purchase_order_ids = po_ids;
2392                }
2393            }
2394
2395            if linked_count > 0 {
2396                debug!(
2397                    "Linked {} purchase orders to S2C contracts by vendor match",
2398                    linked_count
2399                );
2400            }
2401        }
2402
2403        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2404        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2405
2406        // Phase 5c: Append IC journal entries to main entries
2407        if !intercompany.seller_journal_entries.is_empty()
2408            || !intercompany.buyer_journal_entries.is_empty()
2409        {
2410            let ic_je_count = intercompany.seller_journal_entries.len()
2411                + intercompany.buyer_journal_entries.len();
2412            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2413            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2414            debug!(
2415                "Appended {} IC journal entries to main entries",
2416                ic_je_count
2417            );
2418        }
2419
2420        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2421        if !intercompany.elimination_entries.is_empty() {
2422            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2423                &intercompany.elimination_entries,
2424            );
2425            if !elim_jes.is_empty() {
2426                debug!(
2427                    "Appended {} elimination journal entries to main entries",
2428                    elim_jes.len()
2429                );
2430                // IC elimination net-zero assertion (v2.5 hardening)
2431                let elim_debit: rust_decimal::Decimal =
2432                    elim_jes.iter().map(|je| je.total_debit()).sum();
2433                let elim_credit: rust_decimal::Decimal =
2434                    elim_jes.iter().map(|je| je.total_credit()).sum();
2435                let elim_diff = (elim_debit - elim_credit).abs();
2436                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2437                if elim_diff > tolerance {
2438                    return Err(datasynth_core::error::SynthError::generation(format!(
2439                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2440                        elim_debit, elim_credit, elim_diff, tolerance
2441                    )));
2442                }
2443                debug!(
2444                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2445                    elim_debit, elim_credit, elim_diff
2446                );
2447                entries.extend(elim_jes);
2448            }
2449        }
2450
2451        // Phase 5e: Wire IC source documents into document flow snapshot
2452        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2453            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2454                document_flows
2455                    .customer_invoices
2456                    .extend(ic_docs.seller_invoices.iter().cloned());
2457                document_flows
2458                    .purchase_orders
2459                    .extend(ic_docs.buyer_orders.iter().cloned());
2460                document_flows
2461                    .goods_receipts
2462                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2463                document_flows
2464                    .vendor_invoices
2465                    .extend(ic_docs.buyer_invoices.iter().cloned());
2466                debug!(
2467                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2468                    ic_docs.seller_invoices.len(),
2469                    ic_docs.buyer_orders.len(),
2470                    ic_docs.buyer_goods_receipts.len(),
2471                    ic_docs.buyer_invoices.len(),
2472                );
2473            }
2474        }
2475
2476        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2477        let hr = self.phase_hr_data(&mut stats)?;
2478
2479        // Phase 6b: Generate JEs from payroll runs
2480        if !hr.payroll_runs.is_empty() {
2481            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2482            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2483            entries.extend(payroll_jes);
2484        }
2485
2486        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2487        if !hr.pension_journal_entries.is_empty() {
2488            debug!(
2489                "Generated {} JEs from pension plans",
2490                hr.pension_journal_entries.len()
2491            );
2492            entries.extend(hr.pension_journal_entries.iter().cloned());
2493        }
2494
2495        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2496        if !hr.stock_comp_journal_entries.is_empty() {
2497            debug!(
2498                "Generated {} JEs from stock-based compensation",
2499                hr.stock_comp_journal_entries.len()
2500            );
2501            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2502        }
2503
2504        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2505        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2506
2507        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2508        if !manufacturing_snap.production_orders.is_empty() {
2509            let currency = self
2510                .config
2511                .companies
2512                .first()
2513                .map(|c| c.currency.as_str())
2514                .unwrap_or("USD");
2515            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2516                &manufacturing_snap.production_orders,
2517                &manufacturing_snap.quality_inspections,
2518                currency,
2519            );
2520            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2521            entries.extend(mfg_jes);
2522        }
2523
2524        // Phase 7a-warranty: Generate warranty provisions per company
2525        if !manufacturing_snap.quality_inspections.is_empty() {
2526            let framework = match self.config.accounting_standards.framework {
2527                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2528                _ => "US_GAAP",
2529            };
2530            for company in &self.config.companies {
2531                let company_orders: Vec<_> = manufacturing_snap
2532                    .production_orders
2533                    .iter()
2534                    .filter(|o| o.company_code == company.code)
2535                    .cloned()
2536                    .collect();
2537                let company_inspections: Vec<_> = manufacturing_snap
2538                    .quality_inspections
2539                    .iter()
2540                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2541                    .cloned()
2542                    .collect();
2543                if company_inspections.is_empty() {
2544                    continue;
2545                }
2546                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2547                let warranty_result = warranty_gen.generate(
2548                    &company.code,
2549                    &company_orders,
2550                    &company_inspections,
2551                    &company.currency,
2552                    framework,
2553                );
2554                if !warranty_result.journal_entries.is_empty() {
2555                    debug!(
2556                        "Generated {} warranty provision JEs for {}",
2557                        warranty_result.journal_entries.len(),
2558                        company.code
2559                    );
2560                    entries.extend(warranty_result.journal_entries);
2561                }
2562            }
2563        }
2564
2565        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2566        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2567        {
2568            let cogs_currency = self
2569                .config
2570                .companies
2571                .first()
2572                .map(|c| c.currency.as_str())
2573                .unwrap_or("USD");
2574            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2575                &document_flows.deliveries,
2576                &manufacturing_snap.production_orders,
2577                cogs_currency,
2578            );
2579            if !cogs_jes.is_empty() {
2580                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2581                entries.extend(cogs_jes);
2582            }
2583        }
2584
2585        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2586        //
2587        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2588        // subledger inventory positions.  Here we reconcile them so that position balances
2589        // reflect the actual stock movements within the generation period.
2590        if !manufacturing_snap.inventory_movements.is_empty()
2591            && !subledger.inventory_positions.is_empty()
2592        {
2593            use datasynth_core::models::MovementType as MfgMovementType;
2594            let mut receipt_count = 0usize;
2595            let mut issue_count = 0usize;
2596            for movement in &manufacturing_snap.inventory_movements {
2597                // Find a matching position by material code and company
2598                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2599                    p.material_id == movement.material_code
2600                        && p.company_code == movement.entity_code
2601                }) {
2602                    match movement.movement_type {
2603                        MfgMovementType::GoodsReceipt => {
2604                            // Increase stock and update weighted-average cost
2605                            pos.add_quantity(
2606                                movement.quantity,
2607                                movement.value,
2608                                movement.movement_date,
2609                            );
2610                            receipt_count += 1;
2611                        }
2612                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2613                            // Decrease stock (best-effort; silently skip if insufficient)
2614                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2615                            issue_count += 1;
2616                        }
2617                        _ => {}
2618                    }
2619                }
2620            }
2621            debug!(
2622                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2623                manufacturing_snap.inventory_movements.len(),
2624                receipt_count,
2625                issue_count,
2626            );
2627        }
2628
2629        // Update final entry/line-item stats after all JE-generating phases
2630        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2631        if !entries.is_empty() {
2632            stats.total_entries = entries.len() as u64;
2633            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2634            debug!(
2635                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2636                stats.total_entries, stats.total_line_items
2637            );
2638        }
2639
2640        // Phase 7b: Apply internal controls to journal entries
2641        if self.config.internal_controls.enabled && !entries.is_empty() {
2642            info!("Phase 7b: Applying internal controls to journal entries");
2643            let control_config = ControlGeneratorConfig {
2644                exception_rate: self.config.internal_controls.exception_rate,
2645                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2646                enable_sox_marking: true,
2647                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2648                    self.config.internal_controls.sox_materiality_threshold,
2649                )
2650                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2651                ..Default::default()
2652            };
2653            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2654            for entry in &mut entries {
2655                control_gen.apply_controls(entry, &coa);
2656            }
2657            let with_controls = entries
2658                .iter()
2659                .filter(|e| !e.header.control_ids.is_empty())
2660                .count();
2661            info!(
2662                "Applied controls to {} entries ({} with control IDs assigned)",
2663                entries.len(),
2664                with_controls
2665            );
2666        }
2667
2668        // Phase 7c: Extract SoD violations from annotated journal entries.
2669        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2670        // Here we materialise those flags into standalone SodViolation records.
2671        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2672            .iter()
2673            .filter(|e| e.header.sod_violation)
2674            .filter_map(|e| {
2675                e.header.sod_conflict_type.map(|ct| {
2676                    use datasynth_core::models::{RiskLevel, SodViolation};
2677                    let severity = match ct {
2678                        datasynth_core::models::SodConflictType::PaymentReleaser
2679                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2680                            RiskLevel::Critical
2681                        }
2682                        datasynth_core::models::SodConflictType::PreparerApprover
2683                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2684                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2685                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2686                            RiskLevel::High
2687                        }
2688                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2689                            RiskLevel::Medium
2690                        }
2691                    };
2692                    let action = format!(
2693                        "SoD conflict {:?} on entry {} ({})",
2694                        ct, e.header.document_id, e.header.company_code
2695                    );
2696                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2697                })
2698            })
2699            .collect();
2700        if !sod_violations.is_empty() {
2701            info!(
2702                "Phase 7c: Extracted {} SoD violations from {} entries",
2703                sod_violations.len(),
2704                entries.len()
2705            );
2706        }
2707
2708        // Emit journal entries to stream sink (after all JE-generating phases)
2709        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2710
2711        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2712        //
2713        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2714        // document-level fraud are exempt from subsequent line-level flag
2715        // overwrites, and so downstream consumers see a coherent picture.
2716        //
2717        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2718        {
2719            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2720            if self.config.fraud.enabled && doc_rate > 0.0 {
2721                use datasynth_core::fraud_propagation::{
2722                    inject_document_fraud, propagate_documents_to_entries,
2723                };
2724                use datasynth_core::utils::weighted_select;
2725                use datasynth_core::FraudType;
2726                use rand_chacha::rand_core::SeedableRng;
2727
2728                let dist = &self.config.fraud.fraud_type_distribution;
2729                let fraud_type_weights: [(FraudType, f64); 8] = [
2730                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2731                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2732                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2733                    (
2734                        FraudType::ImproperCapitalization,
2735                        dist.expense_capitalization,
2736                    ),
2737                    (FraudType::SplitTransaction, dist.split_transaction),
2738                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2739                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2740                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2741                ];
2742                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2743                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2744                    if weights_sum <= 0.0 {
2745                        FraudType::FictitiousEntry
2746                    } else {
2747                        *weighted_select(rng, &fraud_type_weights)
2748                    }
2749                };
2750
2751                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2752                let mut doc_tagged = 0usize;
2753                macro_rules! inject_into {
2754                    ($collection:expr) => {{
2755                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2756                            $collection.iter_mut().map(|d| &mut d.header).collect();
2757                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2758                    }};
2759                }
2760                inject_into!(document_flows.purchase_orders);
2761                inject_into!(document_flows.goods_receipts);
2762                inject_into!(document_flows.vendor_invoices);
2763                inject_into!(document_flows.payments);
2764                inject_into!(document_flows.sales_orders);
2765                inject_into!(document_flows.deliveries);
2766                inject_into!(document_flows.customer_invoices);
2767                if doc_tagged > 0 {
2768                    info!(
2769                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2770                    );
2771                }
2772
2773                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2774                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2775                        Vec::new();
2776                    headers.extend(
2777                        document_flows
2778                            .purchase_orders
2779                            .iter()
2780                            .map(|d| d.header.clone()),
2781                    );
2782                    headers.extend(
2783                        document_flows
2784                            .goods_receipts
2785                            .iter()
2786                            .map(|d| d.header.clone()),
2787                    );
2788                    headers.extend(
2789                        document_flows
2790                            .vendor_invoices
2791                            .iter()
2792                            .map(|d| d.header.clone()),
2793                    );
2794                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2795                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2796                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2797                    headers.extend(
2798                        document_flows
2799                            .customer_invoices
2800                            .iter()
2801                            .map(|d| d.header.clone()),
2802                    );
2803                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2804                    if propagated > 0 {
2805                        info!(
2806                            "Propagated document-level fraud to {propagated} derived journal entries"
2807                        );
2808                    }
2809                }
2810            }
2811        }
2812
2813        // Phase 8: Anomaly Injection (after all JE-generating phases)
2814        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2815
2816        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2817        // through the anomaly injector.
2818        //
2819        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2820        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2821        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2822        //   - Any external mutation that sets is_fraud after the fact
2823        //
2824        // The anomaly injector already applies the same bias inline when it
2825        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2826        // so gating this sweep on `!is_anomaly` avoids double-application.
2827        //
2828        // Without this sweep, fraud entries from these paths show 0 lift on
2829        // the canonical forensic signals (is_round_1000, is_off_hours,
2830        // is_weekend, is_post_close), which is exactly what the SDK-side
2831        // evaluator caught in v3.1 — fraud features had worse lift than
2832        // baseline. See DS-3.1 post-deploy feedback.
2833        {
2834            use datasynth_core::fraud_bias::{
2835                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2836            };
2837            use rand_chacha::rand_core::SeedableRng;
2838            let cfg = FraudBehavioralBiasConfig::default();
2839            if cfg.enabled {
2840                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2841                let mut swept = 0usize;
2842                for entry in entries.iter_mut() {
2843                    if entry.header.is_fraud && !entry.header.is_anomaly {
2844                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2845                        swept += 1;
2846                    }
2847                }
2848                if swept > 0 {
2849                    info!(
2850                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2851                         (doc-propagated + je_generator intrinsic fraud)"
2852                    );
2853                }
2854            }
2855        }
2856
2857        // Emit anomaly labels to stream sink
2858        self.emit_phase_items(
2859            "anomaly_injection",
2860            "LabeledAnomaly",
2861            &anomaly_labels.labels,
2862        );
2863
2864        // Propagate fraud labels from journal entries to source documents.
2865        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2866        // instead of tracing through document_references.json.
2867        //
2868        // Gated by `fraud.propagate_to_document` (default true) — disable when
2869        // downstream consumers want document fraud flags to reflect only
2870        // document-level injection, not line-level.
2871        if self.config.fraud.propagate_to_document {
2872            use std::collections::HashMap;
2873            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2874            //
2875            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2876            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2877            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2878            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2879            // we register BOTH the prefixed form (raw reference) AND the bare form
2880            // (post-colon portion) in the map. Also register the JE's document_id
2881            // UUID so documents that set `journal_entry_id` match via that path.
2882            //
2883            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2884            // looked up "foo", silently producing 0 propagations.
2885            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2886            for je in &entries {
2887                if je.header.is_fraud {
2888                    if let Some(ref fraud_type) = je.header.fraud_type {
2889                        if let Some(ref reference) = je.header.reference {
2890                            // Register the full reference ("GR:PO-2024-000001")
2891                            fraud_map.insert(reference.clone(), *fraud_type);
2892                            // Also register the bare document ID ("PO-2024-000001")
2893                            // by stripping the "PREFIX:" if present.
2894                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2895                                if !bare.is_empty() {
2896                                    fraud_map.insert(bare.to_string(), *fraud_type);
2897                                }
2898                            }
2899                        }
2900                        // Also tag via journal_entry_id on document headers
2901                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2902                    }
2903                }
2904            }
2905            if !fraud_map.is_empty() {
2906                let mut propagated = 0usize;
2907                // Use DocumentHeader::propagate_fraud method for each doc type
2908                macro_rules! propagate_to {
2909                    ($collection:expr) => {
2910                        for doc in &mut $collection {
2911                            if doc.header.propagate_fraud(&fraud_map) {
2912                                propagated += 1;
2913                            }
2914                        }
2915                    };
2916                }
2917                propagate_to!(document_flows.purchase_orders);
2918                propagate_to!(document_flows.goods_receipts);
2919                propagate_to!(document_flows.vendor_invoices);
2920                propagate_to!(document_flows.payments);
2921                propagate_to!(document_flows.sales_orders);
2922                propagate_to!(document_flows.deliveries);
2923                propagate_to!(document_flows.customer_invoices);
2924                if propagated > 0 {
2925                    info!(
2926                        "Propagated fraud labels to {} document flow records",
2927                        propagated
2928                    );
2929                }
2930            }
2931        }
2932
2933        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2934        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2935
2936        // Emit red flags to stream sink
2937        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2938
2939        // Phase 26b: Collusion Ring Generation (after red flags)
2940        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2941
2942        // Emit collusion rings to stream sink
2943        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2944
2945        // Phase 8d: W8.1 — TB drift-correction pass.  When a TB anchor prior is
2946        // loaded (industry bundle with real per-account targets), emit balanced
2947        // "SA" adjustment JEs to nudge the synthetic balance sheet toward the
2948        // corpus-median shape before final balance validation runs.
2949        self.phase_tb_drift_correction(&mut entries)?;
2950
2951        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2952        let balance_validation = self.phase_balance_validation(&entries)?;
2953
2954        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
2955        // chart of accounts. Soft warning by default; hard fail when the
2956        // user passes --validate-coa-coverage / sets the strict flag.
2957        self.validate_coa_coverage(&entries, coa.as_ref())?;
2958
2959        // Phase 9b: GL-to-Subledger Reconciliation
2960        let subledger_reconciliation =
2961            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2962
2963        // Phase 10: Data Quality Injection
2964        let (data_quality_stats, quality_issues) =
2965            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2966
2967        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2968        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2969
2970        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2971        {
2972            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2973
2974            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2975            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2976            let mut unbalanced_clean = 0usize;
2977            for je in &entries {
2978                if je.header.is_fraud || je.header.is_anomaly {
2979                    continue;
2980                }
2981                let diff = (je.total_debit() - je.total_credit()).abs();
2982                if diff > tolerance {
2983                    unbalanced_clean += 1;
2984                    if unbalanced_clean <= 3 {
2985                        warn!(
2986                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2987                            je.header.document_id,
2988                            je.total_debit(),
2989                            je.total_credit(),
2990                            diff
2991                        );
2992                    }
2993                }
2994            }
2995            if unbalanced_clean > 0 {
2996                return Err(datasynth_core::error::SynthError::generation(format!(
2997                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2998                     First few logged above. Tolerance={}",
2999                    unbalanced_clean, tolerance
3000                )));
3001            }
3002            debug!(
3003                "Phase 10c: All {} non-anomaly JEs individually balanced",
3004                entries
3005                    .iter()
3006                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3007                    .count()
3008            );
3009
3010            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
3011            let company_codes: Vec<String> = self
3012                .config
3013                .companies
3014                .iter()
3015                .map(|c| c.code.clone())
3016                .collect();
3017            for company_code in &company_codes {
3018                let mut assets = rust_decimal::Decimal::ZERO;
3019                let mut liab_equity = rust_decimal::Decimal::ZERO;
3020
3021                for entry in &entries {
3022                    if entry.header.company_code != *company_code {
3023                        continue;
3024                    }
3025                    for line in &entry.lines {
3026                        let acct = &line.gl_account;
3027                        let net = line.debit_amount - line.credit_amount;
3028                        // Asset accounts (1xxx): normal debit balance
3029                        if acct.starts_with('1') {
3030                            assets += net;
3031                        }
3032                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3033                        else if acct.starts_with('2') || acct.starts_with('3') {
3034                            liab_equity -= net; // credit-normal, so negate debit-net
3035                        }
3036                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3037                        // so they net to zero after closing entries
3038                    }
3039                }
3040
3041                let bs_diff = (assets - liab_equity).abs();
3042                if bs_diff > tolerance {
3043                    warn!(
3044                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3045                         revenue/expense closing entries may not fully offset",
3046                        company_code, assets, liab_equity, bs_diff
3047                    );
3048                    // Warn rather than error: multi-period datasets may have timing
3049                    // differences from accruals/deferrals that resolve in later periods.
3050                    // The TB footing check (Assert 1) is the hard gate.
3051                } else {
3052                    debug!(
3053                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3054                        company_code, assets, liab_equity, bs_diff
3055                    );
3056                }
3057            }
3058
3059            info!("Phase 10c: All generation-time accounting assertions passed");
3060        }
3061
3062        // Phase 11: Audit Data
3063        let audit = self.phase_audit_data(&entries, &mut stats)?;
3064
3065        // Phase 12: Banking KYC/AML Data
3066        let mut banking = self.phase_banking_data(&mut stats)?;
3067
3068        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3069        // Creates coherence between the accounting layer (payments, JEs) and the
3070        // banking layer (bank transactions). A vendor invoice payment now appears
3071        // on both sides with cross-references and fraud labels propagated.
3072        if self.phase_config.generate_banking
3073            && !document_flows.payments.is_empty()
3074            && !banking.accounts.is_empty()
3075        {
3076            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3077            if bridge_rate > 0.0 {
3078                let mut bridge =
3079                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3080                        self.seed,
3081                    );
3082                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3083                    &document_flows.payments,
3084                    &banking.customers,
3085                    &banking.accounts,
3086                    bridge_rate,
3087                );
3088                info!(
3089                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3090                    bridge_stats.bridged_count,
3091                    bridge_stats.transactions_emitted,
3092                    bridge_stats.fraud_propagated,
3093                );
3094                let bridged_count = bridged_txns.len();
3095                banking.transactions.extend(bridged_txns);
3096
3097                // Re-run velocity computation so bridged txns also get features
3098                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3099                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3100                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3101                        &mut banking.transactions,
3102                    );
3103                }
3104
3105                // Recompute suspicious count after bridging
3106                banking.suspicious_count = banking
3107                    .transactions
3108                    .iter()
3109                    .filter(|t| t.is_suspicious)
3110                    .count();
3111                stats.banking_transaction_count = banking.transactions.len();
3112                stats.banking_suspicious_count = banking.suspicious_count;
3113            }
3114        }
3115
3116        // Phase 13: Graph Export
3117        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3118
3119        // Phase 14: LLM Enrichment
3120        self.phase_llm_enrichment(&mut stats);
3121
3122        // Phase 15: Diffusion Enhancement
3123        self.phase_diffusion_enhancement(&entries, &mut stats);
3124
3125        // Phase 16: Causal Overlay
3126        self.phase_causal_overlay(&mut stats);
3127
3128        // Phase 17: Bank Reconciliation + Financial Statements
3129        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3130        // provision data (from accounting_standards / tax snapshots) can be wired in.
3131        let mut financial_reporting = self.phase_financial_reporting(
3132            &document_flows,
3133            &entries,
3134            &coa,
3135            &hr,
3136            &audit,
3137            &mut stats,
3138        )?;
3139
3140        // BS coherence check: assets = liabilities + equity
3141        {
3142            use datasynth_core::models::StatementType;
3143            for stmt in &financial_reporting.consolidated_statements {
3144                if stmt.statement_type == StatementType::BalanceSheet {
3145                    let total_assets: rust_decimal::Decimal = stmt
3146                        .line_items
3147                        .iter()
3148                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3149                        .map(|li| li.amount)
3150                        .sum();
3151                    let total_le: rust_decimal::Decimal = stmt
3152                        .line_items
3153                        .iter()
3154                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3155                        .map(|li| li.amount)
3156                        .sum();
3157                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3158                        warn!(
3159                            "BS equation imbalance: assets={}, L+E={}",
3160                            total_assets, total_le
3161                        );
3162                    }
3163                }
3164            }
3165        }
3166
3167        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3168        let accounting_standards =
3169            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3170
3171        // Phase 18a: Merge ECL journal entries into main GL
3172        if !accounting_standards.ecl_journal_entries.is_empty() {
3173            debug!(
3174                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3175                accounting_standards.ecl_journal_entries.len()
3176            );
3177            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3178        }
3179
3180        // Phase 18a: Merge provision journal entries into main GL
3181        if !accounting_standards.provision_journal_entries.is_empty() {
3182            debug!(
3183                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3184                accounting_standards.provision_journal_entries.len()
3185            );
3186            entries.extend(
3187                accounting_standards
3188                    .provision_journal_entries
3189                    .iter()
3190                    .cloned(),
3191            );
3192        }
3193
3194        // Phase 18b: OCPM Events (after all process data is available)
3195        let mut ocpm = self.phase_ocpm_events(
3196            &document_flows,
3197            &sourcing,
3198            &hr,
3199            &manufacturing_snap,
3200            &banking,
3201            &audit,
3202            &financial_reporting,
3203            &mut stats,
3204        )?;
3205
3206        // Emit OCPM events to stream sink
3207        if let Some(ref event_log) = ocpm.event_log {
3208            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3209        }
3210
3211        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3212        if let Some(ref event_log) = ocpm.event_log {
3213            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3214            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3215                std::collections::HashMap::new();
3216            for (idx, event) in event_log.events.iter().enumerate() {
3217                if let Some(ref doc_ref) = event.document_ref {
3218                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3219                }
3220            }
3221
3222            if !doc_index.is_empty() {
3223                let mut annotated = 0usize;
3224                for entry in &mut entries {
3225                    let doc_id_str = entry.header.document_id.to_string();
3226                    // Collect matching event indices from document_id and reference
3227                    let mut matched_indices: Vec<usize> = Vec::new();
3228                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3229                        matched_indices.extend(indices);
3230                    }
3231                    if let Some(ref reference) = entry.header.reference {
3232                        let bare_ref = reference
3233                            .find(':')
3234                            .map(|i| &reference[i + 1..])
3235                            .unwrap_or(reference.as_str());
3236                        if let Some(indices) = doc_index.get(bare_ref) {
3237                            for &idx in indices {
3238                                if !matched_indices.contains(&idx) {
3239                                    matched_indices.push(idx);
3240                                }
3241                            }
3242                        }
3243                    }
3244                    // Apply matches to JE header
3245                    if !matched_indices.is_empty() {
3246                        for &idx in &matched_indices {
3247                            let event = &event_log.events[idx];
3248                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3249                                entry.header.ocpm_event_ids.push(event.event_id);
3250                            }
3251                            for obj_ref in &event.object_refs {
3252                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3253                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3254                                }
3255                            }
3256                            if entry.header.ocpm_case_id.is_none() {
3257                                entry.header.ocpm_case_id = event.case_id;
3258                            }
3259                        }
3260                        annotated += 1;
3261                    }
3262                }
3263                debug!(
3264                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3265                    annotated
3266                );
3267            }
3268        }
3269
3270        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3271        // IC eliminations, opening balances, standards-driven entries) so
3272        // every JournalEntry carries at least one `ocpm_event_ids` link.
3273        if let Some(ref mut event_log) = ocpm.event_log {
3274            let synthesized =
3275                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3276            if synthesized > 0 {
3277                info!(
3278                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3279                );
3280            }
3281
3282            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3283            // events and their owning CaseTrace. Without this, every exported
3284            // OCEL event has `is_anomaly = false` even when the underlying JE
3285            // was flagged.
3286            let anomaly_events =
3287                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3288            if anomaly_events > 0 {
3289                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3290            }
3291
3292            // Phase 18f: Inject process-variant imperfections (rework, skipped
3293            // steps, out-of-order events) so conformance checkers see
3294            // realistic variant counts and fitness < 1.0. Uses the P2P
3295            // process rates as the single source of truth.
3296            let p2p_cfg = &self.config.ocpm.p2p_process;
3297            let any_imperfection = p2p_cfg.rework_probability > 0.0
3298                || p2p_cfg.skip_step_probability > 0.0
3299                || p2p_cfg.out_of_order_probability > 0.0;
3300            if any_imperfection {
3301                use rand_chacha::rand_core::SeedableRng;
3302                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3303                    rework_rate: p2p_cfg.rework_probability,
3304                    skip_rate: p2p_cfg.skip_step_probability,
3305                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3306                };
3307                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3308                let stats =
3309                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3310                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3311                    info!(
3312                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3313                        stats.rework, stats.skipped, stats.out_of_order
3314                    );
3315                }
3316            }
3317        }
3318
3319        // Phase 19: Sales Quotes, Management KPIs, Budgets
3320        let sales_kpi_budgets =
3321            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3322
3323        // Phase 22: Treasury Data Generation
3324        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3325        // are included in the pre-tax income used by phase_tax_generation.
3326        let treasury =
3327            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3328
3329        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3330        if !treasury.journal_entries.is_empty() {
3331            debug!(
3332                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3333                treasury.journal_entries.len()
3334            );
3335            entries.extend(treasury.journal_entries.iter().cloned());
3336        }
3337
3338        // Phase 20: Tax Generation
3339        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3340
3341        // Phase 20 JEs: Merge tax posting journal entries into main GL
3342        if !tax.tax_posting_journal_entries.is_empty() {
3343            debug!(
3344                "Merging {} tax posting JEs into GL",
3345                tax.tax_posting_journal_entries.len()
3346            );
3347            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3348        }
3349
3350        // Phase 20b: FINAL fraud behavioral bias sweep.
3351        //
3352        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3353        // period close) extend `entries` with new journal entries that may
3354        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3355        // already-fraudulent transactions). Those late additions miss the
3356        // Phase 8b sweep and ship without bias applied — which is exactly
3357        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3358        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3359        //
3360        // Running the sweep one more time here guarantees every is_fraud
3361        // entry — regardless of which phase added it — has bias applied.
3362        // `!is_anomaly` gates out anomaly-injector entries (which already
3363        // got biased inline); the sweep is otherwise idempotent-ish:
3364        // weekend / off_hours re-fire to another valid weekend / off-hour,
3365        // post_close is guarded by `!is_post_close`, and round-dollar
3366        // rescaling on an already-round amount is a no-op (ratio = 1).
3367        {
3368            use datasynth_core::fraud_bias::{
3369                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3370            };
3371            use rand_chacha::rand_core::SeedableRng;
3372            let cfg = FraudBehavioralBiasConfig::default();
3373            if cfg.enabled {
3374                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3375                let mut swept = 0usize;
3376                for entry in entries.iter_mut() {
3377                    if entry.header.is_fraud && !entry.header.is_anomaly {
3378                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3379                        swept += 1;
3380                    }
3381                }
3382                if swept > 0 {
3383                    info!(
3384                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3385                         non-anomaly fraud entries (covers late-added JEs from \
3386                         ECL / provisions / treasury / tax / period-close)"
3387                    );
3388                }
3389            }
3390        }
3391
3392        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3393        // Build supplementary cash flow items from upstream JE data (depreciation,
3394        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3395        {
3396            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3397
3398            let framework_str = {
3399                use datasynth_config::schema::AccountingFrameworkConfig;
3400                match self
3401                    .config
3402                    .accounting_standards
3403                    .framework
3404                    .unwrap_or_default()
3405                {
3406                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3407                        "IFRS"
3408                    }
3409                    _ => "US_GAAP",
3410                }
3411            };
3412
3413            // Sum depreciation debits (account 6000) from close JEs
3414            let depreciation_total: rust_decimal::Decimal = entries
3415                .iter()
3416                .filter(|je| je.header.document_type == "CL")
3417                .flat_map(|je| je.lines.iter())
3418                .filter(|l| l.gl_account.starts_with("6000"))
3419                .map(|l| l.debit_amount)
3420                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3421
3422            // Sum interest expense debits (account 7100)
3423            let interest_paid: rust_decimal::Decimal = entries
3424                .iter()
3425                .flat_map(|je| je.lines.iter())
3426                .filter(|l| l.gl_account.starts_with("7100"))
3427                .map(|l| l.debit_amount)
3428                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3429
3430            // Sum tax expense debits (account 8000)
3431            let tax_paid: rust_decimal::Decimal = entries
3432                .iter()
3433                .flat_map(|je| je.lines.iter())
3434                .filter(|l| l.gl_account.starts_with("8000"))
3435                .map(|l| l.debit_amount)
3436                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3437
3438            // Sum capex debits on fixed assets (account 1500)
3439            let capex: rust_decimal::Decimal = entries
3440                .iter()
3441                .flat_map(|je| je.lines.iter())
3442                .filter(|l| l.gl_account.starts_with("1500"))
3443                .map(|l| l.debit_amount)
3444                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3445
3446            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3447            let dividends_paid: rust_decimal::Decimal = entries
3448                .iter()
3449                .flat_map(|je| je.lines.iter())
3450                .filter(|l| l.gl_account == "2170")
3451                .map(|l| l.debit_amount)
3452                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3453
3454            let cf_data = CashFlowSourceData {
3455                depreciation_total,
3456                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3457                delta_ar: rust_decimal::Decimal::ZERO,
3458                delta_ap: rust_decimal::Decimal::ZERO,
3459                delta_inventory: rust_decimal::Decimal::ZERO,
3460                capex,
3461                debt_issuance: rust_decimal::Decimal::ZERO,
3462                debt_repayment: rust_decimal::Decimal::ZERO,
3463                interest_paid,
3464                tax_paid,
3465                dividends_paid,
3466                framework: framework_str.to_string(),
3467            };
3468
3469            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3470            if !enhanced_cf_items.is_empty() {
3471                // Merge into ALL cash flow statements (standalone + consolidated)
3472                use datasynth_core::models::StatementType;
3473                let merge_count = enhanced_cf_items.len();
3474                for stmt in financial_reporting
3475                    .financial_statements
3476                    .iter_mut()
3477                    .chain(financial_reporting.consolidated_statements.iter_mut())
3478                    .chain(
3479                        financial_reporting
3480                            .standalone_statements
3481                            .values_mut()
3482                            .flat_map(|v| v.iter_mut()),
3483                    )
3484                {
3485                    if stmt.statement_type == StatementType::CashFlowStatement {
3486                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3487                    }
3488                }
3489                info!(
3490                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3491                    merge_count
3492                );
3493            }
3494        }
3495
3496        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3497        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3498        self.generate_notes_to_financial_statements(
3499            &mut financial_reporting,
3500            &accounting_standards,
3501            &tax,
3502            &hr,
3503            &audit,
3504            &treasury,
3505        );
3506
3507        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3508        // When we have 2+ companies, derive segment data from actual journal entries
3509        // to complement or replace the FS-generator-based segments.
3510        if self.config.companies.len() >= 2 && !entries.is_empty() {
3511            let companies: Vec<(String, String)> = self
3512                .config
3513                .companies
3514                .iter()
3515                .map(|c| (c.code.clone(), c.name.clone()))
3516                .collect();
3517            let ic_elim: rust_decimal::Decimal =
3518                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3519            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3520                .unwrap_or(NaiveDate::MIN);
3521            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3522            let period_label = format!(
3523                "{}-{:02}",
3524                end_date.year(),
3525                (end_date - chrono::Days::new(1)).month()
3526            );
3527
3528            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3529            let (je_segments, je_recon) =
3530                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3531            if !je_segments.is_empty() {
3532                info!(
3533                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3534                    je_segments.len(),
3535                    ic_elim,
3536                );
3537                // Replace if existing segment_reports were empty; otherwise supplement
3538                if financial_reporting.segment_reports.is_empty() {
3539                    financial_reporting.segment_reports = je_segments;
3540                    financial_reporting.segment_reconciliations = vec![je_recon];
3541                } else {
3542                    financial_reporting.segment_reports.extend(je_segments);
3543                    financial_reporting.segment_reconciliations.push(je_recon);
3544                }
3545            }
3546        }
3547
3548        // Phase 21: ESG Data Generation
3549        let esg_snap =
3550            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3551
3552        // Phase 23: Project Accounting Data Generation
3553        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3554
3555        // Phase 24: Process Evolution + Organizational Events
3556        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3557
3558        // Phase 24b: Disruption Events
3559        let disruption_events = self.phase_disruption_events(&mut stats)?;
3560
3561        // Phase 27: Bi-Temporal Vendor Version Chains
3562        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3563
3564        // Phase 28: Entity Relationship Graph + Cross-Process Links
3565        let (entity_relationship_graph, cross_process_links) =
3566            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3567
3568        // Phase 29: Industry-specific GL accounts
3569        let industry_output = self.phase_industry_data(&mut stats);
3570
3571        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3572        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3573
3574        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3575        //
3576        // The neural / hybrid diffusion path was a documented L2 stub
3577        // in v3.x; actual neural-network training requires ML
3578        // infrastructure (PyTorch / candle bindings, GPU access,
3579        // training loops) that was never wired through the
3580        // orchestrator. Rather than keep a silently-no-op block that
3581        // misleads users into thinking neural training happens, v4.0
3582        // acknowledges the config — exposing stats so downstream
3583        // tooling can see the request — but emits a clear warning
3584        // when a non-statistical backend is requested. The statistical
3585        // diffusion backend continues to run via
3586        // `phase_diffusion_enhancement`.
3587        //
3588        // Users who need real neural diffusion: track the roadmap item
3589        // in the v4.x backlog and consider contributing the backend
3590        // (the `DiffusionBackend` trait is the integration point).
3591        if self.config.diffusion.enabled
3592            && (self.config.diffusion.backend == "neural"
3593                || self.config.diffusion.backend == "hybrid")
3594        {
3595            let neural = &self.config.diffusion.neural;
3596            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3597            stats.neural_hybrid_weight = Some(weight);
3598            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3599            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3600            warn!(
3601                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3602                 the neural/hybrid training path is not yet shipped. Config \
3603                 is captured in stats (weight={weight:.2}, strategy={}, \
3604                 columns={}) but no neural training runs. Statistical \
3605                 diffusion (backend='statistical') continues to work.",
3606                self.config.diffusion.backend,
3607                neural.hybrid_strategy,
3608                neural.neural_columns.len(),
3609            );
3610        }
3611
3612        // Phase 19b: Hypergraph Export (after all data is available)
3613        self.phase_hypergraph_export(
3614            &coa,
3615            &entries,
3616            &document_flows,
3617            &sourcing,
3618            &hr,
3619            &manufacturing_snap,
3620            &banking,
3621            &audit,
3622            &financial_reporting,
3623            &ocpm,
3624            &compliance_regulations,
3625            &mut stats,
3626        )?;
3627
3628        // Phase 10c: Additional graph builders (approval, entity, banking)
3629        // These run after all data is available since they need banking/IC data.
3630        if self.phase_config.generate_graph_export {
3631            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3632        }
3633
3634        // Log informational messages for config sections not yet fully wired
3635        if self.config.streaming.enabled {
3636            info!("Note: streaming config is enabled but batch mode does not use it");
3637        }
3638        if self.config.vendor_network.enabled {
3639            debug!("Vendor network config available; relationship graph generation is partial");
3640        }
3641        if self.config.customer_segmentation.enabled {
3642            debug!("Customer segmentation config available; segment-aware generation is partial");
3643        }
3644
3645        // Log final resource statistics
3646        let resource_stats = self.resource_guard.stats();
3647        info!(
3648            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3649            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3650            resource_stats.disk.estimated_bytes_written,
3651            resource_stats.degradation_level
3652        );
3653
3654        // Flush any remaining stream sink data
3655        if let Some(ref sink) = self.phase_sink {
3656            if let Err(e) = sink.flush() {
3657                warn!("Stream sink flush failed: {e}");
3658            }
3659        }
3660
3661        // Build data lineage graph
3662        let lineage = self.build_lineage_graph();
3663
3664        // Evaluate quality gates if enabled in config
3665        let gate_result = if self.config.quality_gates.enabled {
3666            let profile_name = &self.config.quality_gates.profile;
3667            match datasynth_eval::gates::get_profile(profile_name) {
3668                Some(profile) => {
3669                    // Build an evaluation populated with actual generation metrics.
3670                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3671
3672                    // Populate balance sheet evaluation from balance validation results
3673                    if balance_validation.validated {
3674                        eval.coherence.balance =
3675                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3676                                equation_balanced: balance_validation.is_balanced,
3677                                max_imbalance: (balance_validation.total_debits
3678                                    - balance_validation.total_credits)
3679                                    .abs(),
3680                                periods_evaluated: 1,
3681                                periods_imbalanced: if balance_validation.is_balanced {
3682                                    0
3683                                } else {
3684                                    1
3685                                },
3686                                period_results: Vec::new(),
3687                                companies_evaluated: self.config.companies.len(),
3688                            });
3689                    }
3690
3691                    // Set coherence passes based on balance validation
3692                    eval.coherence.passes = balance_validation.is_balanced;
3693                    if !balance_validation.is_balanced {
3694                        eval.coherence
3695                            .failures
3696                            .push("Balance sheet equation not satisfied".to_string());
3697                    }
3698
3699                    // Set statistical score based on entry count (basic sanity)
3700                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3701                    eval.statistical.passes = !entries.is_empty();
3702
3703                    // Set quality score from data quality stats
3704                    eval.quality.overall_score = 0.9; // Default high for generated data
3705                    eval.quality.passes = true;
3706
3707                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3708                    info!(
3709                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3710                        profile_name, result.gates_passed, result.gates_total, result.summary
3711                    );
3712                    Some(result)
3713                }
3714                None => {
3715                    warn!(
3716                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3717                        profile_name
3718                    );
3719                    None
3720                }
3721            }
3722        } else {
3723            None
3724        };
3725
3726        // Generate internal controls if enabled
3727        let internal_controls = if self.config.internal_controls.enabled {
3728            InternalControl::standard_controls()
3729        } else {
3730            Vec::new()
3731        };
3732
3733        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3734        // phases (including fraud-bias sweep at Phase 20b) so derived
3735        // outputs reflect final data.
3736        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3737
3738        // v3.5.1: statistical validation over the final amount
3739        // distribution. Runs *after* all JE-adding phases so the report
3740        // reflects everything the user will see in the output. Returns
3741        // `None` unless `distributions.validation.enabled = true`.
3742        let statistical_validation = self.phase_statistical_validation(&entries)?;
3743
3744        // v4.1.3+: interconnectivity snapshot — tier assignments,
3745        // value-segment labels, industry-specific metadata. Runs after
3746        // master data is settled so it can index stable IDs.
3747        let interconnectivity = self.phase_interconnectivity();
3748
3749        // SP5.2 — snapshot the CoA semantic prior (if any) into the result so
3750        // output_writer can use it as a fallback index for account_description
3751        // resolution when the synthetic CoA index misses.
3752        let coa_semantic_prior = self
3753            .cached_priors
3754            .as_ref()
3755            .and_then(|p| p.coa_semantic.clone());
3756
3757        Ok(EnhancedGenerationResult {
3758            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3759            master_data: std::mem::take(&mut self.master_data),
3760            document_flows,
3761            subledger,
3762            ocpm,
3763            audit,
3764            banking,
3765            graph_export,
3766            sourcing,
3767            financial_reporting,
3768            hr,
3769            accounting_standards,
3770            manufacturing: manufacturing_snap,
3771            sales_kpi_budgets,
3772            tax,
3773            esg: esg_snap,
3774            treasury,
3775            project_accounting,
3776            process_evolution,
3777            organizational_events,
3778            disruption_events,
3779            intercompany,
3780            journal_entries: entries,
3781            anomaly_labels,
3782            balance_validation,
3783            data_quality_stats,
3784            quality_issues,
3785            statistics: stats,
3786            lineage: Some(lineage),
3787            gate_result,
3788            internal_controls,
3789            sod_violations,
3790            opening_balances,
3791            subledger_reconciliation,
3792            counterfactual_pairs,
3793            red_flags,
3794            collusion_rings,
3795            temporal_vendor_chains,
3796            entity_relationship_graph,
3797            cross_process_links,
3798            industry_output,
3799            coa_semantic_prior,
3800            compliance_regulations,
3801            analytics_metadata,
3802            statistical_validation,
3803            interconnectivity,
3804        })
3805    }
3806
3807    /// v4.1.3+: populate the interconnectivity snapshot from
3808    /// previously-inert schema sections. Empty when all sections are
3809    /// disabled.
3810    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3811        use rand::{RngExt, SeedableRng};
3812        use rand_chacha::ChaCha8Rng;
3813
3814        let mut snap = InterconnectivitySnapshot::default();
3815        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3816
3817        // --- Vendor network ---
3818        let vn = &self.config.vendor_network;
3819        if vn.enabled {
3820            let total = self.master_data.vendors.len();
3821            if total > 0 {
3822                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3823                let remaining_after_t1 = total.saturating_sub(tier1_count);
3824                let depth = vn.depth.clamp(1, 3);
3825                let tier2_count = if depth >= 2 {
3826                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3827                    (tier1_count * avg).min(remaining_after_t1)
3828                } else {
3829                    0
3830                };
3831                let tier3_count = total
3832                    .saturating_sub(tier1_count)
3833                    .saturating_sub(tier2_count);
3834
3835                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3836                    let tier = if idx < tier1_count {
3837                        1
3838                    } else if idx < tier1_count + tier2_count {
3839                        2
3840                    } else {
3841                        3
3842                    };
3843                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3844
3845                    // Cluster assignment via configured ratios.
3846                    let cl = &vn.clusters;
3847                    let roll: f64 = rng.random();
3848                    let cluster = if roll < cl.reliable_strategic {
3849                        "reliable_strategic"
3850                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3851                        "standard_operational"
3852                    } else if roll
3853                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3854                    {
3855                        "transactional"
3856                    } else {
3857                        "problematic"
3858                    };
3859                    snap.vendor_clusters
3860                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3861                }
3862                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3863            }
3864        }
3865
3866        // --- Customer segmentation ---
3867        let cs = &self.config.customer_segmentation;
3868        if cs.enabled {
3869            let seg = &cs.value_segments;
3870            for customer in &self.master_data.customers {
3871                let roll: f64 = rng.random();
3872                let value_segment = if roll < seg.enterprise.customer_share {
3873                    "enterprise"
3874                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3875                    "mid_market"
3876                } else if roll
3877                    < seg.enterprise.customer_share
3878                        + seg.mid_market.customer_share
3879                        + seg.smb.customer_share
3880                {
3881                    "smb"
3882                } else {
3883                    "consumer"
3884                };
3885                snap.customer_value_segments
3886                    .push((customer.customer_id.clone(), value_segment.to_string()));
3887
3888                let roll2: f64 = rng.random();
3889                let life = &cs.lifecycle;
3890                let lifecycle = if roll2 < life.prospect_rate {
3891                    "prospect"
3892                } else if roll2 < life.prospect_rate + life.new_rate {
3893                    "new"
3894                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3895                    "growth"
3896                } else if roll2
3897                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3898                {
3899                    "mature"
3900                } else if roll2
3901                    < life.prospect_rate
3902                        + life.new_rate
3903                        + life.growth_rate
3904                        + life.mature_rate
3905                        + life.at_risk_rate
3906                {
3907                    "at_risk"
3908                } else if roll2
3909                    < life.prospect_rate
3910                        + life.new_rate
3911                        + life.growth_rate
3912                        + life.mature_rate
3913                        + life.at_risk_rate
3914                        + life.churned_rate
3915                {
3916                    "churned"
3917                } else {
3918                    "won_back"
3919                };
3920                snap.customer_lifecycle_stages
3921                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3922            }
3923        }
3924
3925        // --- Industry-specific metadata (minimal) ---
3926        let is = &self.config.industry_specific;
3927        if is.enabled {
3928            snap.industry_metadata.push(format!(
3929                "industry_specific.enabled=true (industry={:?})",
3930                self.config.global.industry
3931            ));
3932        }
3933
3934        snap
3935    }
3936
3937    // ========================================================================
3938    // Generation Phase Methods
3939    // ========================================================================
3940
3941    /// Phase 1: Generate Chart of Accounts and update statistics.
3942    fn phase_chart_of_accounts(
3943        &mut self,
3944        stats: &mut EnhancedGenerationStatistics,
3945    ) -> SynthResult<Arc<ChartOfAccounts>> {
3946        info!("Phase 1: Generating Chart of Accounts");
3947        let coa = self.generate_coa()?;
3948        stats.accounts_count = coa.account_count();
3949        info!(
3950            "Chart of Accounts generated: {} accounts",
3951            stats.accounts_count
3952        );
3953        self.check_resources_with_log("post-coa")?;
3954        Ok(coa)
3955    }
3956
3957    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3958    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3959        if self.phase_config.generate_master_data {
3960            info!("Phase 2: Generating Master Data");
3961            self.generate_master_data()?;
3962            stats.vendor_count = self.master_data.vendors.len();
3963            stats.customer_count = self.master_data.customers.len();
3964            stats.material_count = self.master_data.materials.len();
3965            stats.asset_count = self.master_data.assets.len();
3966            stats.employee_count = self.master_data.employees.len();
3967            info!(
3968                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3969                stats.vendor_count, stats.customer_count, stats.material_count,
3970                stats.asset_count, stats.employee_count
3971            );
3972            self.check_resources_with_log("post-master-data")?;
3973        } else {
3974            debug!("Phase 2: Skipped (master data generation disabled)");
3975        }
3976        Ok(())
3977    }
3978
3979    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3980    fn phase_document_flows(
3981        &mut self,
3982        stats: &mut EnhancedGenerationStatistics,
3983    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3984        let mut document_flows = DocumentFlowSnapshot::default();
3985        let mut subledger = SubledgerSnapshot::default();
3986        // Dunning JEs (interest + charges) accumulated here and merged into the
3987        // main FA-JE list below so they appear in the GL.
3988        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3989
3990        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3991            info!("Phase 3: Generating Document Flows");
3992            self.generate_document_flows(&mut document_flows)?;
3993            stats.p2p_chain_count = document_flows.p2p_chains.len();
3994            stats.o2c_chain_count = document_flows.o2c_chains.len();
3995            info!(
3996                "Document flows generated: {} P2P chains, {} O2C chains",
3997                stats.p2p_chain_count, stats.o2c_chain_count
3998            );
3999
4000            // Phase 3b: Link document flows to subledgers (for data coherence)
4001            debug!("Phase 3b: Linking document flows to subledgers");
4002            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4003            stats.ap_invoice_count = subledger.ap_invoices.len();
4004            stats.ar_invoice_count = subledger.ar_invoices.len();
4005            debug!(
4006                "Subledgers linked: {} AP invoices, {} AR invoices",
4007                stats.ap_invoice_count, stats.ar_invoice_count
4008            );
4009
4010            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
4011            // Without this step the subledger is systematically overstated because
4012            // amount_remaining is set at invoice creation and never reduced by
4013            // the payments that were generated in the document-flow phase.
4014            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4015            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4016            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4017            debug!("Payment settlements applied to AP and AR subledgers");
4018
4019            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
4020            // The as-of date is the last day of the configured period.
4021            if let Ok(start_date) =
4022                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4023            {
4024                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4025                    - chrono::Days::new(1);
4026                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4027                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4028                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4029                // derived from JE-level aggregation and will typically differ. This is a known
4030                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4031                // generated independently. A future reconciliation phase should align them by
4032                // using subledger totals as the authoritative source for BS Receivables.
4033                for company in &self.config.companies {
4034                    let ar_report = ARAgingReport::from_invoices(
4035                        company.code.clone(),
4036                        &subledger.ar_invoices,
4037                        as_of_date,
4038                    );
4039                    subledger.ar_aging_reports.push(ar_report);
4040
4041                    let ap_report = APAgingReport::from_invoices(
4042                        company.code.clone(),
4043                        &subledger.ap_invoices,
4044                        as_of_date,
4045                    );
4046                    subledger.ap_aging_reports.push(ap_report);
4047                }
4048                debug!(
4049                    "AR/AP aging reports built: {} AR, {} AP",
4050                    subledger.ar_aging_reports.len(),
4051                    subledger.ap_aging_reports.len()
4052                );
4053
4054                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4055                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4056                {
4057                    use datasynth_generators::DunningGenerator;
4058                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4059                    for company in &self.config.companies {
4060                        let currency = company.currency.as_str();
4061                        // Collect mutable references to AR invoices for this company
4062                        // (dunning generator updates dunning_info on invoices in-place).
4063                        let mut company_invoices: Vec<
4064                            datasynth_core::models::subledger::ar::ARInvoice,
4065                        > = subledger
4066                            .ar_invoices
4067                            .iter()
4068                            .filter(|inv| inv.company_code == company.code)
4069                            .cloned()
4070                            .collect();
4071
4072                        if company_invoices.is_empty() {
4073                            continue;
4074                        }
4075
4076                        let result = dunning_gen.execute_dunning_run(
4077                            &company.code,
4078                            as_of_date,
4079                            &mut company_invoices,
4080                            currency,
4081                        );
4082
4083                        // Write back updated dunning info to the main AR invoice list
4084                        for updated in &company_invoices {
4085                            if let Some(orig) = subledger
4086                                .ar_invoices
4087                                .iter_mut()
4088                                .find(|i| i.invoice_number == updated.invoice_number)
4089                            {
4090                                orig.dunning_info = updated.dunning_info.clone();
4091                            }
4092                        }
4093
4094                        subledger.dunning_runs.push(result.dunning_run);
4095                        subledger.dunning_letters.extend(result.letters);
4096                        // Dunning JEs (interest + charges) collected into local buffer.
4097                        dunning_journal_entries.extend(result.journal_entries);
4098                    }
4099                    debug!(
4100                        "Dunning runs complete: {} runs, {} letters",
4101                        subledger.dunning_runs.len(),
4102                        subledger.dunning_letters.len()
4103                    );
4104                }
4105            }
4106
4107            self.check_resources_with_log("post-document-flows")?;
4108        } else {
4109            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4110        }
4111
4112        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4113        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4114        if !self.master_data.assets.is_empty() {
4115            debug!("Generating FA subledger records");
4116            let company_code = self
4117                .config
4118                .companies
4119                .first()
4120                .map(|c| c.code.as_str())
4121                .unwrap_or("1000");
4122            let currency = self
4123                .config
4124                .companies
4125                .first()
4126                .map(|c| c.currency.as_str())
4127                .unwrap_or("USD");
4128
4129            let mut fa_gen = datasynth_generators::FAGenerator::new(
4130                datasynth_generators::FAGeneratorConfig::default(),
4131                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4132            );
4133
4134            for asset in &self.master_data.assets {
4135                let (record, je) = fa_gen.generate_asset_acquisition(
4136                    company_code,
4137                    &format!("{:?}", asset.asset_class),
4138                    &asset.description,
4139                    asset.acquisition_date,
4140                    currency,
4141                    asset.cost_center.as_deref(),
4142                );
4143                subledger.fa_records.push(record);
4144                fa_journal_entries.push(je);
4145            }
4146
4147            stats.fa_subledger_count = subledger.fa_records.len();
4148            debug!(
4149                "FA subledger records generated: {} (with {} acquisition JEs)",
4150                stats.fa_subledger_count,
4151                fa_journal_entries.len()
4152            );
4153        }
4154
4155        // Generate Inventory subledger records from master data materials
4156        if !self.master_data.materials.is_empty() {
4157            debug!("Generating Inventory subledger records");
4158            let first_company = self.config.companies.first();
4159            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4160            let inv_currency = first_company
4161                .map(|c| c.currency.clone())
4162                .unwrap_or_else(|| "USD".to_string());
4163
4164            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4165                datasynth_generators::InventoryGeneratorConfig::default(),
4166                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4167                inv_currency.clone(),
4168            );
4169
4170            for (i, material) in self.master_data.materials.iter().enumerate() {
4171                let plant = format!("PLANT{:02}", (i % 3) + 1);
4172                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4173                let initial_qty = rust_decimal::Decimal::from(
4174                    material
4175                        .safety_stock
4176                        .to_string()
4177                        .parse::<i64>()
4178                        .unwrap_or(100),
4179                );
4180
4181                let position = inv_gen.generate_position(
4182                    company_code,
4183                    &plant,
4184                    &storage_loc,
4185                    &material.material_id,
4186                    &material.description,
4187                    initial_qty,
4188                    Some(material.standard_cost),
4189                    &inv_currency,
4190                );
4191                subledger.inventory_positions.push(position);
4192            }
4193
4194            stats.inventory_subledger_count = subledger.inventory_positions.len();
4195            debug!(
4196                "Inventory subledger records generated: {}",
4197                stats.inventory_subledger_count
4198            );
4199        }
4200
4201        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4202        if !subledger.fa_records.is_empty() {
4203            if let Ok(start_date) =
4204                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4205            {
4206                let company_code = self
4207                    .config
4208                    .companies
4209                    .first()
4210                    .map(|c| c.code.as_str())
4211                    .unwrap_or("1000");
4212                let fiscal_year = start_date.year();
4213                let start_period = start_date.month();
4214                let end_period =
4215                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4216
4217                let depr_cfg = FaDepreciationScheduleConfig {
4218                    fiscal_year,
4219                    start_period,
4220                    end_period,
4221                    seed_offset: 800,
4222                };
4223                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4224                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4225                let run_count = runs.len();
4226                subledger.depreciation_runs = runs;
4227                debug!(
4228                    "Depreciation runs generated: {} runs for {} periods",
4229                    run_count, self.config.global.period_months
4230                );
4231            }
4232        }
4233
4234        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4235        if !subledger.inventory_positions.is_empty() {
4236            if let Ok(start_date) =
4237                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4238            {
4239                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4240                    - chrono::Days::new(1);
4241
4242                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4243                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4244
4245                for company in &self.config.companies {
4246                    let result = inv_val_gen.generate(
4247                        &company.code,
4248                        &subledger.inventory_positions,
4249                        as_of_date,
4250                    );
4251                    subledger.inventory_valuations.push(result);
4252                }
4253                debug!(
4254                    "Inventory valuations generated: {} company reports",
4255                    subledger.inventory_valuations.len()
4256                );
4257            }
4258        }
4259
4260        Ok((document_flows, subledger, fa_journal_entries))
4261    }
4262
4263    /// Phase 3c: Generate OCPM events from document flows.
4264    #[allow(clippy::too_many_arguments)]
4265    fn phase_ocpm_events(
4266        &mut self,
4267        document_flows: &DocumentFlowSnapshot,
4268        sourcing: &SourcingSnapshot,
4269        hr: &HrSnapshot,
4270        manufacturing: &ManufacturingSnapshot,
4271        banking: &BankingSnapshot,
4272        audit: &AuditSnapshot,
4273        financial_reporting: &FinancialReportingSnapshot,
4274        stats: &mut EnhancedGenerationStatistics,
4275    ) -> SynthResult<OcpmSnapshot> {
4276        let degradation = self.check_resources()?;
4277        if degradation >= DegradationLevel::Reduced {
4278            debug!(
4279                "Phase skipped due to resource pressure (degradation: {:?})",
4280                degradation
4281            );
4282            return Ok(OcpmSnapshot::default());
4283        }
4284        if self.phase_config.generate_ocpm_events {
4285            info!("Phase 3c: Generating OCPM Events");
4286            let ocpm_snapshot = self.generate_ocpm_events(
4287                document_flows,
4288                sourcing,
4289                hr,
4290                manufacturing,
4291                banking,
4292                audit,
4293                financial_reporting,
4294            )?;
4295            stats.ocpm_event_count = ocpm_snapshot.event_count;
4296            stats.ocpm_object_count = ocpm_snapshot.object_count;
4297            stats.ocpm_case_count = ocpm_snapshot.case_count;
4298            info!(
4299                "OCPM events generated: {} events, {} objects, {} cases",
4300                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4301            );
4302            self.check_resources_with_log("post-ocpm")?;
4303            Ok(ocpm_snapshot)
4304        } else {
4305            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4306            Ok(OcpmSnapshot::default())
4307        }
4308    }
4309
4310    /// Phase 4: Generate journal entries from document flows and standalone generation.
4311    fn phase_journal_entries(
4312        &mut self,
4313        coa: &Arc<ChartOfAccounts>,
4314        document_flows: &DocumentFlowSnapshot,
4315        _stats: &mut EnhancedGenerationStatistics,
4316    ) -> SynthResult<Vec<JournalEntry>> {
4317        let mut entries = Vec::new();
4318
4319        // Phase 4a: Generate JEs from document flows (for data coherence)
4320        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4321            debug!("Phase 4a: Generating JEs from document flows");
4322            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4323            debug!("Generated {} JEs from document flows", flow_entries.len());
4324            entries.extend(flow_entries);
4325        }
4326
4327        // Phase 4b: Generate standalone journal entries
4328        if self.phase_config.generate_journal_entries {
4329            info!("Phase 4: Generating Journal Entries");
4330            let je_entries = self.generate_journal_entries(coa)?;
4331            info!("Generated {} standalone journal entries", je_entries.len());
4332            entries.extend(je_entries);
4333        } else {
4334            debug!("Phase 4: Skipped (journal entry generation disabled)");
4335        }
4336
4337        // Phase 4c (shard mode): inject pre-built IC journal entries from
4338        // `ShardContext`. When running standalone (no group engine), this
4339        // is a no-op. See crate::shard_context::ShardContext for rationale.
4340        if let Some(ctx) = &self.shard_context {
4341            if !ctx.extra_journal_entries.is_empty() {
4342                debug!(
4343                    "Phase 4c: appending {} shard-mode IC journal entries",
4344                    ctx.extra_journal_entries.len()
4345                );
4346                entries.extend(ctx.extra_journal_entries.iter().cloned());
4347            }
4348        }
4349
4350        if !entries.is_empty() {
4351            // Note: stats.total_entries/total_line_items are set in generate()
4352            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4353            self.check_resources_with_log("post-journal-entries")?;
4354        }
4355
4356        Ok(entries)
4357    }
4358
4359    /// Phase 5: Inject anomalies into journal entries.
4360    fn phase_anomaly_injection(
4361        &mut self,
4362        entries: &mut [JournalEntry],
4363        actions: &DegradationActions,
4364        stats: &mut EnhancedGenerationStatistics,
4365    ) -> SynthResult<AnomalyLabels> {
4366        if self.phase_config.inject_anomalies
4367            && !entries.is_empty()
4368            && !actions.skip_anomaly_injection
4369        {
4370            info!("Phase 5: Injecting Anomalies");
4371            let result = self.inject_anomalies(entries)?;
4372            stats.anomalies_injected = result.labels.len();
4373            info!("Injected {} anomalies", stats.anomalies_injected);
4374            self.check_resources_with_log("post-anomaly-injection")?;
4375            Ok(result)
4376        } else if actions.skip_anomaly_injection {
4377            warn!("Phase 5: Skipped due to resource degradation");
4378            Ok(AnomalyLabels::default())
4379        } else {
4380            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4381            Ok(AnomalyLabels::default())
4382        }
4383    }
4384
4385    /// Phase 8d (W8.1): TB drift-correction pass.
4386    ///
4387    /// Builds a `RunningBalanceTracker` over all JEs assembled so far, attaches
4388    /// the TB anchor prior (when available), and — if `drift_correction_needed()`
4389    /// fires for any company — emits one balanced "SA" adjustment JE per company
4390    /// to pull the synthetic balances toward the corpus-median targets.
4391    ///
4392    /// No-op when no TB anchor is loaded (backwards-compatible).
4393    fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4394        // Only proceed when priors with a TB anchor are loaded.
4395        let tb_anchor = match &self.cached_priors {
4396            Some(priors) => match &priors.tb_anchor {
4397                Some(anchor) => anchor.clone(),
4398                None => return Ok(()),
4399            },
4400            None => return Ok(()),
4401        };
4402
4403        if !tb_anchor.has_data() {
4404            return Ok(());
4405        }
4406
4407        tracing::info!(
4408            target: "datasynth_runtime::tb_anchor",
4409            accounts = tb_anchor.per_account.len(),
4410            total_assets = tb_anchor.total_assets,
4411            "W8.1 — TB anchor loaded; running drift-correction pass"
4412        );
4413
4414        // Build a tracker over all current JEs.
4415        let tracker_config = BalanceTrackerConfig {
4416            validate_on_each_entry: false,
4417            track_history: false,
4418            fail_on_validation_error: false,
4419            ..Default::default()
4420        };
4421        let currency = self
4422            .config
4423            .companies
4424            .first()
4425            .map(|c| c.currency.clone())
4426            .unwrap_or_else(|| "USD".to_string());
4427
4428        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4429        tracker.set_tb_anchor(tb_anchor.clone());
4430        let _ = tracker.apply_entries(entries);
4431
4432        // SP5.1 — Diagnostic: log the number of accounts being tracked vs in the
4433        // anchor, plus the top-5 most-drifted accounts for each company so we
4434        // can distinguish "no drift" from "drift below threshold" at a glance.
4435        for company in &self.config.companies {
4436            let code = &company.code;
4437            let drifts = tracker.account_drift(code);
4438            let mut sorted_drifts = drifts.clone();
4439            sorted_drifts.sort_by(|a, b| {
4440                b.1.abs()
4441                    .partial_cmp(&a.1.abs())
4442                    .unwrap_or(std::cmp::Ordering::Equal)
4443            });
4444            let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4445            let correction_needed = tracker.drift_correction_needed(code);
4446            tracing::info!(
4447                target: "datasynth_runtime::tb_anchor",
4448                company = %code,
4449                anchor_accounts = tb_anchor.per_account.len(),
4450                tracked_accounts = drifts.len(),
4451                aggregate_drift = aggregate_drift,
4452                correction_needed = correction_needed,
4453                "W8.1 SP5.1 — per-company drift summary before correction"
4454            );
4455            for (acc, drift) in sorted_drifts.iter().take(5) {
4456                tracing::info!(
4457                    target: "datasynth_runtime::tb_anchor",
4458                    company = %code,
4459                    account = %acc,
4460                    drift = drift,
4461                    "W8.1 SP5.1 — top-5 drifted accounts"
4462                );
4463            }
4464        }
4465
4466        // Derive the posting date: use the last day of the simulation period.
4467        let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4468            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4469            .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4470
4471        // Distinct seed offset so drift-correction draws are independent of other phases.
4472        use rand_chacha::rand_core::SeedableRng as _;
4473        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4474
4475        let mut correction_count = 0usize;
4476        for company in &self.config.companies {
4477            let code = &company.code;
4478            if !tracker.drift_correction_needed(code) {
4479                tracing::debug!(
4480                    target: "datasynth_runtime::tb_anchor",
4481                    company = %code,
4482                    "W8.1 — drift_correction_needed returned false; skipping company"
4483                );
4484                continue;
4485            }
4486            if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4487                tracing::debug!(
4488                    target: "datasynth_runtime::tb_anchor",
4489                    company = %code,
4490                    lines = je.lines.len(),
4491                    debit = %je.total_debit(),
4492                    credit = %je.total_credit(),
4493                    "W8.1 — emitting drift-correction JE"
4494                );
4495                // Apply the correction to the tracker so the running state is current.
4496                let _ = tracker.apply_entry(&je);
4497                entries.push(je);
4498                correction_count += 1;
4499            }
4500        }
4501
4502        if correction_count > 0 {
4503            tracing::info!(
4504                target: "datasynth_runtime::tb_anchor",
4505                correction_count,
4506                "W8.1 — drift-correction pass emitted {} JE(s)",
4507                correction_count
4508            );
4509        } else {
4510            tracing::debug!(
4511                target: "datasynth_runtime::tb_anchor",
4512                "W8.1 — drift-correction pass: no corrections needed"
4513            );
4514        }
4515
4516        Ok(())
4517    }
4518
4519    /// Phase 6: Validate balance sheet equation on journal entries.
4520    fn phase_balance_validation(
4521        &mut self,
4522        entries: &[JournalEntry],
4523    ) -> SynthResult<BalanceValidationResult> {
4524        if self.phase_config.validate_balances && !entries.is_empty() {
4525            debug!("Phase 6: Validating Balances");
4526            let balance_validation = self.validate_journal_entries(entries)?;
4527            if balance_validation.is_balanced {
4528                debug!("Balance validation passed");
4529            } else {
4530                warn!(
4531                    "Balance validation found {} errors",
4532                    balance_validation.validation_errors.len()
4533                );
4534            }
4535            Ok(balance_validation)
4536        } else {
4537            Ok(BalanceValidationResult::default())
4538        }
4539    }
4540
4541    /// Validate that every `gl_account` referenced in `entries` exists in the
4542    /// chart of accounts.
4543    ///
4544    /// Always emits a warn-level log when the COA is missing accounts; in
4545    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4546    /// `SynthError::generation` so the caller can fail fast.
4547    fn validate_coa_coverage(
4548        &self,
4549        entries: &[JournalEntry],
4550        coa: &ChartOfAccounts,
4551    ) -> SynthResult<()> {
4552        if entries.is_empty() {
4553            return Ok(());
4554        }
4555        let coa_set: std::collections::HashSet<&str> = coa
4556            .accounts
4557            .iter()
4558            .map(|a| a.account_number.as_str())
4559            .collect();
4560        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4561        for je in entries {
4562            for line in je.lines.iter() {
4563                if !coa_set.contains(line.gl_account.as_str()) {
4564                    missing.insert(line.gl_account.clone());
4565                }
4566            }
4567        }
4568        if missing.is_empty() {
4569            debug!("COA coverage validation passed");
4570            return Ok(());
4571        }
4572        let msg = format!(
4573            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4574            missing.len(),
4575            missing.iter().take(10).collect::<Vec<_>>()
4576        );
4577        if self.phase_config.validate_coa_coverage_strict {
4578            Err(SynthError::generation(msg))
4579        } else {
4580            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4581            Ok(())
4582        }
4583    }
4584
4585    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4586    fn phase_data_quality_injection(
4587        &mut self,
4588        entries: &mut [JournalEntry],
4589        actions: &DegradationActions,
4590        stats: &mut EnhancedGenerationStatistics,
4591    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4592        if self.phase_config.inject_data_quality
4593            && !entries.is_empty()
4594            && !actions.skip_data_quality
4595        {
4596            info!("Phase 7: Injecting Data Quality Variations");
4597            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4598            stats.data_quality_issues = dq_stats.records_with_issues;
4599            info!("Injected {} data quality issues", stats.data_quality_issues);
4600            self.check_resources_with_log("post-data-quality")?;
4601            Ok((dq_stats, quality_issues))
4602        } else if actions.skip_data_quality {
4603            warn!("Phase 7: Skipped due to resource degradation");
4604            // v4.4.1: report the denominator (entries seen) even when
4605            // injection is skipped, so downstream consumers can tell
4606            // "skipped, 0/N" apart from "ran but found nothing".
4607            Ok((stats_with_denominator(entries.len()), Vec::new()))
4608        } else {
4609            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4610            Ok((stats_with_denominator(entries.len()), Vec::new()))
4611        }
4612    }
4613
4614    /// Phase 10b: Generate period-close journal entries.
4615    ///
4616    /// Generates:
4617    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4618    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4619    ///    for the configured period.
4620    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4621    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4622    ///    earnings via the Income Summary (3600) clearing account.
4623    fn phase_period_close(
4624        &mut self,
4625        entries: &mut Vec<JournalEntry>,
4626        subledger: &SubledgerSnapshot,
4627        stats: &mut EnhancedGenerationStatistics,
4628    ) -> SynthResult<()> {
4629        if !self.phase_config.generate_period_close || entries.is_empty() {
4630            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4631            return Ok(());
4632        }
4633
4634        info!("Phase 10b: Generating period-close journal entries");
4635
4636        use datasynth_core::accounts::{
4637            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4638        };
4639        use rust_decimal::Decimal;
4640
4641        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4642            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4643        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4644        // Posting date for close entries is the last day of the period
4645        let close_date = end_date - chrono::Days::new(1);
4646
4647        // Statutory tax rate (21% — configurable rates come in later tiers)
4648        let tax_rate = Decimal::new(21, 2); // 0.21
4649
4650        // Collect company codes from config
4651        let company_codes: Vec<String> = self
4652            .config
4653            .companies
4654            .iter()
4655            .map(|c| c.code.clone())
4656            .collect();
4657
4658        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4659        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4660        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4661
4662        // --- Depreciation JEs (per asset) ---
4663        // Compute period depreciation for each active fixed asset using straight-line method.
4664        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4665        let period_months = self.config.global.period_months;
4666        for asset in &subledger.fa_records {
4667            // Skip assets that are inactive / fully depreciated / non-depreciable
4668            use datasynth_core::models::subledger::fa::AssetStatus;
4669            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4670                continue;
4671            }
4672            let useful_life_months = asset.useful_life_months();
4673            if useful_life_months == 0 {
4674                // Land or CIP — not depreciated
4675                continue;
4676            }
4677            let salvage_value = asset.salvage_value();
4678            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4679            if depreciable_base == Decimal::ZERO {
4680                continue;
4681            }
4682            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4683                * Decimal::from(period_months))
4684            .round_dp(2);
4685            if period_depr <= Decimal::ZERO {
4686                continue;
4687            }
4688
4689            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4690            depr_header.document_type = "CL".to_string();
4691            depr_header.header_text = Some(format!(
4692                "Depreciation - {} {}",
4693                asset.asset_number, asset.description
4694            ));
4695            depr_header.created_by = "CLOSE_ENGINE".to_string();
4696            depr_header.source = TransactionSource::Automated;
4697            depr_header.business_process = Some(BusinessProcess::R2R);
4698
4699            let doc_id = depr_header.document_id;
4700            let mut depr_je = JournalEntry::new(depr_header);
4701
4702            // DR Depreciation Expense (6000)
4703            depr_je.add_line(JournalEntryLine::debit(
4704                doc_id,
4705                1,
4706                expense_accounts::DEPRECIATION.to_string(),
4707                period_depr,
4708            ));
4709            // CR Accumulated Depreciation (1510)
4710            depr_je.add_line(JournalEntryLine::credit(
4711                doc_id,
4712                2,
4713                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4714                period_depr,
4715            ));
4716
4717            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4718            close_jes.push(depr_je);
4719        }
4720
4721        if !subledger.fa_records.is_empty() {
4722            debug!(
4723                "Generated {} depreciation JEs from {} FA records",
4724                close_jes.len(),
4725                subledger.fa_records.len()
4726            );
4727        }
4728
4729        // --- Accrual entries (standard period-end accruals per company) ---
4730        // Generate standard accrued expense entries (utilities, rent, interest) using
4731        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4732        {
4733            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4734            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4735            // v3.4.3: snap reversal dates to business days. No-op when
4736            // temporal_patterns.business_days is disabled.
4737            if let Some(ctx) = &self.temporal_context {
4738                accrual_gen.set_temporal_context(Arc::clone(ctx));
4739            }
4740
4741            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4742            let accrual_items: &[(&str, &str, &str)] = &[
4743                ("Accrued Utilities", "6200", "2100"),
4744                ("Accrued Rent", "6300", "2100"),
4745                ("Accrued Interest", "6100", "2150"),
4746            ];
4747
4748            for company_code in &company_codes {
4749                // Estimate company revenue from existing JEs
4750                let company_revenue: Decimal = entries
4751                    .iter()
4752                    .filter(|e| e.header.company_code == *company_code)
4753                    .flat_map(|e| e.lines.iter())
4754                    .filter(|l| l.gl_account.starts_with('4'))
4755                    .map(|l| l.credit_amount - l.debit_amount)
4756                    .fold(Decimal::ZERO, |acc, v| acc + v);
4757
4758                if company_revenue <= Decimal::ZERO {
4759                    continue;
4760                }
4761
4762                // Use 0.5% of period revenue per accrual item as a proxy
4763                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4764                if accrual_base <= Decimal::ZERO {
4765                    continue;
4766                }
4767
4768                for (description, expense_acct, liability_acct) in accrual_items {
4769                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4770                        company_code,
4771                        description,
4772                        accrual_base,
4773                        expense_acct,
4774                        liability_acct,
4775                        close_date,
4776                        None,
4777                    );
4778                    close_jes.push(accrual_je);
4779                    if let Some(rev_je) = reversal_je {
4780                        close_jes.push(rev_je);
4781                    }
4782                }
4783            }
4784
4785            debug!(
4786                "Generated accrual entries for {} companies",
4787                company_codes.len()
4788            );
4789        }
4790
4791        for company_code in &company_codes {
4792            // Calculate net income for this company from existing JEs:
4793            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4794            // Revenue (4xxx): credit-normal, so net = credits - debits
4795            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4796            let mut total_revenue = Decimal::ZERO;
4797            let mut total_expenses = Decimal::ZERO;
4798
4799            for entry in entries.iter() {
4800                if entry.header.company_code != *company_code {
4801                    continue;
4802                }
4803                for line in &entry.lines {
4804                    let category = AccountCategory::from_account(&line.gl_account);
4805                    match category {
4806                        AccountCategory::Revenue => {
4807                            // Revenue is credit-normal: net revenue = credits - debits
4808                            total_revenue += line.credit_amount - line.debit_amount;
4809                        }
4810                        AccountCategory::Cogs
4811                        | AccountCategory::OperatingExpense
4812                        | AccountCategory::OtherIncomeExpense
4813                        | AccountCategory::Tax => {
4814                            // Expenses are debit-normal: net expense = debits - credits
4815                            total_expenses += line.debit_amount - line.credit_amount;
4816                        }
4817                        _ => {}
4818                    }
4819                }
4820            }
4821
4822            let pre_tax_income = total_revenue - total_expenses;
4823
4824            // Skip if no income statement activity
4825            if pre_tax_income == Decimal::ZERO {
4826                debug!(
4827                    "Company {}: no pre-tax income, skipping period close",
4828                    company_code
4829                );
4830                continue;
4831            }
4832
4833            // --- Tax provision / DTA JE ---
4834            if pre_tax_income > Decimal::ZERO {
4835                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4836                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4837
4838                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4839                tax_header.document_type = "CL".to_string();
4840                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4841                tax_header.created_by = "CLOSE_ENGINE".to_string();
4842                tax_header.source = TransactionSource::Automated;
4843                tax_header.business_process = Some(BusinessProcess::R2R);
4844
4845                let doc_id = tax_header.document_id;
4846                let mut tax_je = JournalEntry::new(tax_header);
4847
4848                // DR Tax Expense (8000)
4849                tax_je.add_line(JournalEntryLine::debit(
4850                    doc_id,
4851                    1,
4852                    tax_accounts::TAX_EXPENSE.to_string(),
4853                    tax_amount,
4854                ));
4855                // CR Income Tax Payable (2130)
4856                tax_je.add_line(JournalEntryLine::credit(
4857                    doc_id,
4858                    2,
4859                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4860                    tax_amount,
4861                ));
4862
4863                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4864                close_jes.push(tax_je);
4865            } else {
4866                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4867                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4868                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4869                if dta_amount > Decimal::ZERO {
4870                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4871                    dta_header.document_type = "CL".to_string();
4872                    dta_header.header_text =
4873                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4874                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4875                    dta_header.source = TransactionSource::Automated;
4876                    dta_header.business_process = Some(BusinessProcess::R2R);
4877
4878                    let doc_id = dta_header.document_id;
4879                    let mut dta_je = JournalEntry::new(dta_header);
4880
4881                    // DR Deferred Tax Asset (1600)
4882                    dta_je.add_line(JournalEntryLine::debit(
4883                        doc_id,
4884                        1,
4885                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4886                        dta_amount,
4887                    ));
4888                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4889                    // reflecting the benefit of the future deductible temporary difference.
4890                    dta_je.add_line(JournalEntryLine::credit(
4891                        doc_id,
4892                        2,
4893                        tax_accounts::TAX_EXPENSE.to_string(),
4894                        dta_amount,
4895                    ));
4896
4897                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4898                    close_jes.push(dta_je);
4899                    debug!(
4900                        "Company {}: loss year — recognised DTA of {}",
4901                        company_code, dta_amount
4902                    );
4903                }
4904            }
4905
4906            // --- Dividend JEs (v2.4) ---
4907            // If the entity is profitable after tax, declare a 10% dividend payout.
4908            // This runs AFTER tax provision so the dividend is based on post-tax income
4909            // but BEFORE the retained earnings close so the RE transfer reflects the
4910            // reduced balance.
4911            let tax_provision = if pre_tax_income > Decimal::ZERO {
4912                (pre_tax_income * tax_rate).round_dp(2)
4913            } else {
4914                Decimal::ZERO
4915            };
4916            let net_income = pre_tax_income - tax_provision;
4917
4918            if net_income > Decimal::ZERO {
4919                use datasynth_generators::DividendGenerator;
4920                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4921                let mut div_gen = DividendGenerator::new(self.seed + 460);
4922                let currency_str = self
4923                    .config
4924                    .companies
4925                    .iter()
4926                    .find(|c| c.code == *company_code)
4927                    .map(|c| c.currency.as_str())
4928                    .unwrap_or("USD");
4929                let div_result = div_gen.generate(
4930                    company_code,
4931                    close_date,
4932                    Decimal::new(1, 0), // $1 per share placeholder
4933                    dividend_amount,
4934                    currency_str,
4935                );
4936                let div_je_count = div_result.journal_entries.len();
4937                close_jes.extend(div_result.journal_entries);
4938                debug!(
4939                    "Company {}: declared dividend of {} ({} JEs)",
4940                    company_code, dividend_amount, div_je_count
4941                );
4942            }
4943
4944            // --- Income statement closing JE ---
4945            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4946            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4947            // close the pre-tax loss into Retained Earnings as-is.
4948            if net_income != Decimal::ZERO {
4949                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4950                close_header.document_type = "CL".to_string();
4951                close_header.header_text =
4952                    Some(format!("Income statement close - {}", company_code));
4953                close_header.created_by = "CLOSE_ENGINE".to_string();
4954                close_header.source = TransactionSource::Automated;
4955                close_header.business_process = Some(BusinessProcess::R2R);
4956
4957                let doc_id = close_header.document_id;
4958                let mut close_je = JournalEntry::new(close_header);
4959
4960                let abs_net_income = net_income.abs();
4961
4962                if net_income > Decimal::ZERO {
4963                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4964                    close_je.add_line(JournalEntryLine::debit(
4965                        doc_id,
4966                        1,
4967                        equity_accounts::INCOME_SUMMARY.to_string(),
4968                        abs_net_income,
4969                    ));
4970                    close_je.add_line(JournalEntryLine::credit(
4971                        doc_id,
4972                        2,
4973                        equity_accounts::RETAINED_EARNINGS.to_string(),
4974                        abs_net_income,
4975                    ));
4976                } else {
4977                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4978                    close_je.add_line(JournalEntryLine::debit(
4979                        doc_id,
4980                        1,
4981                        equity_accounts::RETAINED_EARNINGS.to_string(),
4982                        abs_net_income,
4983                    ));
4984                    close_je.add_line(JournalEntryLine::credit(
4985                        doc_id,
4986                        2,
4987                        equity_accounts::INCOME_SUMMARY.to_string(),
4988                        abs_net_income,
4989                    ));
4990                }
4991
4992                debug_assert!(
4993                    close_je.is_balanced(),
4994                    "Income statement closing JE must be balanced"
4995                );
4996                close_jes.push(close_je);
4997            }
4998        }
4999
5000        let close_count = close_jes.len();
5001        if close_count > 0 {
5002            info!("Generated {} period-close journal entries", close_count);
5003            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5004            entries.extend(close_jes);
5005            stats.period_close_je_count = close_count;
5006
5007            // Update total entry/line-item stats
5008            stats.total_entries = entries.len() as u64;
5009            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5010        } else {
5011            debug!("No period-close entries generated (no income statement activity)");
5012        }
5013
5014        Ok(())
5015    }
5016
5017    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
5018    fn phase_audit_data(
5019        &mut self,
5020        entries: &[JournalEntry],
5021        stats: &mut EnhancedGenerationStatistics,
5022    ) -> SynthResult<AuditSnapshot> {
5023        if self.phase_config.generate_audit {
5024            info!("Phase 8: Generating Audit Data");
5025            let audit_snapshot = self.generate_audit_data(entries)?;
5026            stats.audit_engagement_count = audit_snapshot.engagements.len();
5027            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5028            stats.audit_evidence_count = audit_snapshot.evidence.len();
5029            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5030            stats.audit_finding_count = audit_snapshot.findings.len();
5031            stats.audit_judgment_count = audit_snapshot.judgments.len();
5032            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5033            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5034            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5035            stats.audit_sample_count = audit_snapshot.samples.len();
5036            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5037            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5038            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5039            stats.audit_related_party_count = audit_snapshot.related_parties.len();
5040            stats.audit_related_party_transaction_count =
5041                audit_snapshot.related_party_transactions.len();
5042            info!(
5043                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5044                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5045                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5046                 {} RP transactions",
5047                stats.audit_engagement_count,
5048                stats.audit_workpaper_count,
5049                stats.audit_evidence_count,
5050                stats.audit_risk_count,
5051                stats.audit_finding_count,
5052                stats.audit_judgment_count,
5053                stats.audit_confirmation_count,
5054                stats.audit_procedure_step_count,
5055                stats.audit_sample_count,
5056                stats.audit_analytical_result_count,
5057                stats.audit_ia_function_count,
5058                stats.audit_ia_report_count,
5059                stats.audit_related_party_count,
5060                stats.audit_related_party_transaction_count,
5061            );
5062            self.check_resources_with_log("post-audit")?;
5063            Ok(audit_snapshot)
5064        } else {
5065            debug!("Phase 8: Skipped (audit generation disabled)");
5066            Ok(AuditSnapshot::default())
5067        }
5068    }
5069
5070    /// Phase 9: Generate banking KYC/AML data.
5071    fn phase_banking_data(
5072        &mut self,
5073        stats: &mut EnhancedGenerationStatistics,
5074    ) -> SynthResult<BankingSnapshot> {
5075        if self.phase_config.generate_banking {
5076            info!("Phase 9: Generating Banking KYC/AML Data");
5077            let banking_snapshot = self.generate_banking_data()?;
5078            stats.banking_customer_count = banking_snapshot.customers.len();
5079            stats.banking_account_count = banking_snapshot.accounts.len();
5080            stats.banking_transaction_count = banking_snapshot.transactions.len();
5081            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5082            info!(
5083                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5084                stats.banking_customer_count, stats.banking_account_count,
5085                stats.banking_transaction_count, stats.banking_suspicious_count
5086            );
5087            self.check_resources_with_log("post-banking")?;
5088            Ok(banking_snapshot)
5089        } else {
5090            debug!("Phase 9: Skipped (banking generation disabled)");
5091            Ok(BankingSnapshot::default())
5092        }
5093    }
5094
5095    /// Phase 10: Export accounting network graphs for ML training.
5096    fn phase_graph_export(
5097        &mut self,
5098        entries: &[JournalEntry],
5099        coa: &Arc<ChartOfAccounts>,
5100        stats: &mut EnhancedGenerationStatistics,
5101    ) -> SynthResult<GraphExportSnapshot> {
5102        if self.phase_config.generate_graph_export && !entries.is_empty() {
5103            info!("Phase 10: Exporting Accounting Network Graphs");
5104            match self.export_graphs(entries, coa, stats) {
5105                Ok(snapshot) => {
5106                    info!(
5107                        "Graph export complete: {} graphs ({} nodes, {} edges)",
5108                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5109                    );
5110                    Ok(snapshot)
5111                }
5112                Err(e) => {
5113                    warn!("Phase 10: Graph export failed: {}", e);
5114                    Ok(GraphExportSnapshot::default())
5115                }
5116            }
5117        } else {
5118            debug!("Phase 10: Skipped (graph export disabled or no entries)");
5119            Ok(GraphExportSnapshot::default())
5120        }
5121    }
5122
5123    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
5124    #[allow(clippy::too_many_arguments)]
5125    fn phase_hypergraph_export(
5126        &self,
5127        coa: &Arc<ChartOfAccounts>,
5128        entries: &[JournalEntry],
5129        document_flows: &DocumentFlowSnapshot,
5130        sourcing: &SourcingSnapshot,
5131        hr: &HrSnapshot,
5132        manufacturing: &ManufacturingSnapshot,
5133        banking: &BankingSnapshot,
5134        audit: &AuditSnapshot,
5135        financial_reporting: &FinancialReportingSnapshot,
5136        ocpm: &OcpmSnapshot,
5137        compliance: &ComplianceRegulationsSnapshot,
5138        stats: &mut EnhancedGenerationStatistics,
5139    ) -> SynthResult<()> {
5140        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5141            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5142            match self.export_hypergraph(
5143                coa,
5144                entries,
5145                document_flows,
5146                sourcing,
5147                hr,
5148                manufacturing,
5149                banking,
5150                audit,
5151                financial_reporting,
5152                ocpm,
5153                compliance,
5154                stats,
5155            ) {
5156                Ok(info) => {
5157                    info!(
5158                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5159                        info.node_count, info.edge_count, info.hyperedge_count
5160                    );
5161                }
5162                Err(e) => {
5163                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5164                }
5165            }
5166        } else {
5167            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5168        }
5169        Ok(())
5170    }
5171
5172    /// Phase 11: LLM Enrichment.
5173    ///
5174    /// Uses an LLM provider (mock by default) to enrich vendor names with
5175    /// realistic, context-aware names. This phase is non-blocking: failures
5176    /// log a warning but do not stop the generation pipeline.
5177    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5178        if !self.config.llm.enabled {
5179            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5180            return;
5181        }
5182
5183        info!("Phase 11: Starting LLM Enrichment");
5184        let start = std::time::Instant::now();
5185
5186        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5187            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5188            // and the corresponding API key environment variable is present.
5189            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5190                let schema_provider = &self.config.llm.provider;
5191                let api_key_env = match schema_provider.as_str() {
5192                    "openai" => Some("OPENAI_API_KEY"),
5193                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5194                    "custom" => Some("LLM_API_KEY"),
5195                    _ => None,
5196                };
5197                if let Some(key_env) = api_key_env {
5198                    if std::env::var(key_env).is_ok() {
5199                        let llm_config = datasynth_core::llm::LlmConfig {
5200                            model: self.config.llm.model.clone(),
5201                            api_key_env: key_env.to_string(),
5202                            ..datasynth_core::llm::LlmConfig::default()
5203                        };
5204                        match HttpLlmProvider::new(llm_config) {
5205                            Ok(p) => Arc::new(p),
5206                            Err(e) => {
5207                                warn!(
5208                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5209                                    e
5210                                );
5211                                Arc::new(MockLlmProvider::new(self.seed))
5212                            }
5213                        }
5214                    } else {
5215                        Arc::new(MockLlmProvider::new(self.seed))
5216                    }
5217                } else {
5218                    Arc::new(MockLlmProvider::new(self.seed))
5219                }
5220            };
5221            // v4.1.1+: multi-category enrichment. Vendors remain the
5222            // default path; customers and materials opt in via
5223            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5224            let industry = format!("{:?}", self.config.global.industry);
5225
5226            let vendor_enricher =
5227                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5228            let max_vendors = self
5229                .config
5230                .llm
5231                .max_vendor_enrichments
5232                .min(self.master_data.vendors.len());
5233            let mut vendors_enriched = 0usize;
5234            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5235                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5236                    Ok(name) => {
5237                        vendor.name = name;
5238                        vendors_enriched += 1;
5239                    }
5240                    Err(e) => warn!(
5241                        "LLM vendor enrichment failed for {}: {}",
5242                        vendor.vendor_id, e
5243                    ),
5244                }
5245            }
5246
5247            let mut customers_enriched = 0usize;
5248            if self.config.llm.enrich_customers {
5249                let customer_enricher =
5250                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5251                        &provider,
5252                    ));
5253                let max_customers = self
5254                    .config
5255                    .llm
5256                    .max_customer_enrichments
5257                    .min(self.master_data.customers.len());
5258                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5259                    match customer_enricher.enrich_customer_name(
5260                        &industry,
5261                        "general",
5262                        &customer.country,
5263                    ) {
5264                        Ok(name) => {
5265                            customer.name = name;
5266                            customers_enriched += 1;
5267                        }
5268                        Err(e) => warn!(
5269                            "LLM customer enrichment failed for {}: {}",
5270                            customer.customer_id, e
5271                        ),
5272                    }
5273                }
5274            }
5275
5276            let mut materials_enriched = 0usize;
5277            if self.config.llm.enrich_materials {
5278                let material_enricher =
5279                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5280                        &provider,
5281                    ));
5282                let max_materials = self
5283                    .config
5284                    .llm
5285                    .max_material_enrichments
5286                    .min(self.master_data.materials.len());
5287                for material in self.master_data.materials.iter_mut().take(max_materials) {
5288                    let material_type = format!("{:?}", material.material_type);
5289                    match material_enricher.enrich_material_description(&material_type, &industry) {
5290                        Ok(desc) => {
5291                            material.description = desc;
5292                            materials_enriched += 1;
5293                        }
5294                        Err(e) => warn!(
5295                            "LLM material enrichment failed for {}: {}",
5296                            material.material_id, e
5297                        ),
5298                    }
5299                }
5300            }
5301
5302            (vendors_enriched, customers_enriched, materials_enriched)
5303        }));
5304
5305        match result {
5306            Ok((v, c, m)) => {
5307                stats.llm_vendors_enriched = v;
5308                stats.llm_customers_enriched = c;
5309                stats.llm_materials_enriched = m;
5310                let elapsed = start.elapsed();
5311                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5312                info!(
5313                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5314                    v, c, m, stats.llm_enrichment_ms
5315                );
5316            }
5317            Err(_) => {
5318                let elapsed = start.elapsed();
5319                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5320                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5321            }
5322        }
5323    }
5324
5325    /// Phase 12: Diffusion Enhancement.
5326    ///
5327    /// Generates a sample set matching distribution properties from the
5328    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5329    /// - `"statistical"` (default) — moment-matching backend, always fast.
5330    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5331    ///   the `neural` Cargo feature; falls back to statistical when the
5332    ///   feature isn't compiled in, with a loud warning.
5333    ///
5334    /// This phase is non-blocking: failures log a warning but do not
5335    /// stop the pipeline.
5336    fn phase_diffusion_enhancement(
5337        &self,
5338        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5339        stats: &mut EnhancedGenerationStatistics,
5340    ) {
5341        if !self.config.diffusion.enabled {
5342            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5343            return;
5344        }
5345
5346        info!("Phase 12: Starting Diffusion Enhancement");
5347        let start = std::time::Instant::now();
5348
5349        let backend_choice = self.config.diffusion.backend.as_str();
5350        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5351
5352        if use_neural {
5353            #[cfg(feature = "neural")]
5354            {
5355                match self.run_neural_diffusion_phase(entries) {
5356                    Ok(sample_count) => {
5357                        stats.diffusion_samples_generated = sample_count;
5358                        let elapsed = start.elapsed();
5359                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5360                        info!(
5361                            "Phase 12 complete ({}): {} samples in {}ms",
5362                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5363                        );
5364                        return;
5365                    }
5366                    Err(e) => {
5367                        warn!(
5368                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5369                        );
5370                        // Fall through to statistical path below.
5371                    }
5372                }
5373            }
5374            #[cfg(not(feature = "neural"))]
5375            {
5376                warn!(
5377                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5378                     not compiled in — falling back to statistical. Rebuild with \
5379                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5380                    backend_choice
5381                );
5382            }
5383        } else if !matches!(backend_choice, "statistical" | "") {
5384            warn!(
5385                "Phase 12: unknown backend '{}', falling back to statistical",
5386                backend_choice
5387            );
5388        }
5389
5390        // Statistical path (default + fallback).
5391        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5392            let means = vec![5000.0, 3.0, 2.0];
5393            let stds = vec![2000.0, 1.5, 1.0];
5394
5395            let diffusion_config = DiffusionConfig {
5396                n_steps: self.config.diffusion.n_steps,
5397                seed: self.seed,
5398                ..Default::default()
5399            };
5400
5401            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5402            let n_samples = self.config.diffusion.sample_size;
5403            let n_features = 3;
5404            backend.generate(n_samples, n_features, self.seed).len()
5405        }));
5406
5407        match result {
5408            Ok(sample_count) => {
5409                stats.diffusion_samples_generated = sample_count;
5410                let elapsed = start.elapsed();
5411                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5412                info!(
5413                    "Phase 12 complete (statistical): {} samples in {}ms",
5414                    sample_count, stats.diffusion_enhancement_ms
5415                );
5416            }
5417            Err(_) => {
5418                let elapsed = start.elapsed();
5419                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5420                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5421            }
5422        }
5423    }
5424
5425    /// Neural-backend execution — either load a pre-trained checkpoint
5426    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5427    /// from the first batch of JE amounts. Returns the sample count
5428    /// produced; any error bubbles up to the statistical fallback.
5429    #[cfg(feature = "neural")]
5430    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5431        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5432
5433        if entries.is_empty() {
5434            return Err(SynthError::generation(
5435                "neural diffusion: no journal entries available as training data",
5436            ));
5437        }
5438
5439        let training_data: Vec<Vec<f64>> = entries
5440            .iter()
5441            .take(5000)
5442            .map(|je| {
5443                let total_amount: f64 = je
5444                    .lines
5445                    .iter()
5446                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5447                    .map(|l| {
5448                        use rust_decimal::prelude::ToPrimitive;
5449                        l.debit_amount.to_f64().unwrap_or(0.0)
5450                    })
5451                    .sum();
5452                let line_count = je.lines.len() as f64;
5453                // Use the approval-workflow depth as the third feature
5454                // (proxy for complexity / risk). `None` → 1.
5455                let approval_level = je
5456                    .header
5457                    .approval_workflow
5458                    .as_ref()
5459                    .map(|w| w.required_levels as f64)
5460                    .unwrap_or(1.0);
5461                vec![total_amount, line_count, approval_level]
5462            })
5463            .collect();
5464
5465        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5466
5467        let cfg = &self.config.diffusion;
5468        let neural_cfg = &cfg.neural;
5469
5470        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5471            neural_cfg.checkpoint_path.as_ref()
5472        {
5473            let path = std::path::Path::new(ckpt_path);
5474            info!(
5475                "  Neural diffusion: loading checkpoint from {}",
5476                path.display()
5477            );
5478            NeuralDiffusionBackend::load(path)
5479                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5480        } else {
5481            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5482            info!(
5483                "  Neural diffusion: training score network on {} rows × {} features, \
5484                     {} epochs, hidden_dims={:?}",
5485                training_data.len(),
5486                n_features,
5487                neural_cfg.training_epochs,
5488                neural_cfg.hidden_dims
5489            );
5490            let training_config = NeuralTrainingConfig {
5491                n_steps: cfg.n_steps,
5492                schedule: cfg.schedule.clone(),
5493                hidden_dims: neural_cfg.hidden_dims.clone(),
5494                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5495                learning_rate: neural_cfg.learning_rate,
5496                epochs: neural_cfg.training_epochs,
5497                batch_size: neural_cfg.batch_size,
5498            };
5499            let (backend, report) =
5500                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5501                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5502            info!(
5503                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5504                report.epochs_completed, report.final_loss
5505            );
5506            backend
5507        };
5508
5509        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5510        Ok(samples.len())
5511    }
5512
5513    /// Phase 13: Causal Overlay.
5514    ///
5515    /// Builds a structural causal model from a built-in template (e.g.,
5516    /// fraud_detection) and generates causal samples. Optionally validates
5517    /// that the output respects the causal structure. This phase is
5518    /// non-blocking: failures log a warning but do not stop the pipeline.
5519    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5520        if !self.config.causal.enabled {
5521            debug!("Phase 13: Skipped (causal generation disabled)");
5522            return;
5523        }
5524
5525        info!("Phase 13: Starting Causal Overlay");
5526        let start = std::time::Instant::now();
5527
5528        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5529            // Select template based on config
5530            let graph = match self.config.causal.template.as_str() {
5531                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5532                _ => CausalGraph::fraud_detection_template(),
5533            };
5534
5535            let scm = StructuralCausalModel::new(graph.clone())
5536                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5537
5538            let n_samples = self.config.causal.sample_size;
5539            let samples = scm
5540                .generate(n_samples, self.seed)
5541                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5542
5543            // Optionally validate causal structure
5544            let validation_passed = if self.config.causal.validate {
5545                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5546                if report.valid {
5547                    info!(
5548                        "Causal validation passed: all {} checks OK",
5549                        report.checks.len()
5550                    );
5551                } else {
5552                    warn!(
5553                        "Causal validation: {} violations detected: {:?}",
5554                        report.violations.len(),
5555                        report.violations
5556                    );
5557                }
5558                Some(report.valid)
5559            } else {
5560                None
5561            };
5562
5563            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5564        }));
5565
5566        match result {
5567            Ok(Ok((sample_count, validation_passed))) => {
5568                stats.causal_samples_generated = sample_count;
5569                stats.causal_validation_passed = validation_passed;
5570                let elapsed = start.elapsed();
5571                stats.causal_generation_ms = elapsed.as_millis() as u64;
5572                info!(
5573                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5574                    sample_count, stats.causal_generation_ms, validation_passed,
5575                );
5576            }
5577            Ok(Err(e)) => {
5578                let elapsed = start.elapsed();
5579                stats.causal_generation_ms = elapsed.as_millis() as u64;
5580                warn!("Phase 13: Causal generation failed: {}", e);
5581            }
5582            Err(_) => {
5583                let elapsed = start.elapsed();
5584                stats.causal_generation_ms = elapsed.as_millis() as u64;
5585                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5586            }
5587        }
5588    }
5589
5590    /// Phase 14: Generate S2C sourcing data.
5591    fn phase_sourcing_data(
5592        &mut self,
5593        stats: &mut EnhancedGenerationStatistics,
5594    ) -> SynthResult<SourcingSnapshot> {
5595        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5596            debug!("Phase 14: Skipped (sourcing generation disabled)");
5597            return Ok(SourcingSnapshot::default());
5598        }
5599        let degradation = self.check_resources()?;
5600        if degradation >= DegradationLevel::Reduced {
5601            debug!(
5602                "Phase skipped due to resource pressure (degradation: {:?})",
5603                degradation
5604            );
5605            return Ok(SourcingSnapshot::default());
5606        }
5607
5608        info!("Phase 14: Generating S2C Sourcing Data");
5609        let seed = self.seed;
5610
5611        // Gather vendor data from master data
5612        let vendor_ids: Vec<String> = self
5613            .master_data
5614            .vendors
5615            .iter()
5616            .map(|v| v.vendor_id.clone())
5617            .collect();
5618        if vendor_ids.is_empty() {
5619            debug!("Phase 14: Skipped (no vendors available)");
5620            return Ok(SourcingSnapshot::default());
5621        }
5622
5623        let categories: Vec<(String, String)> = vec![
5624            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5625            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5626            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5627            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5628            ("CAT-LOG".to_string(), "Logistics".to_string()),
5629        ];
5630        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5631            .iter()
5632            .map(|(id, name)| {
5633                (
5634                    id.clone(),
5635                    name.clone(),
5636                    rust_decimal::Decimal::from(100_000),
5637                )
5638            })
5639            .collect();
5640
5641        let company_code = self
5642            .config
5643            .companies
5644            .first()
5645            .map(|c| c.code.as_str())
5646            .unwrap_or("1000");
5647        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5648            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5649        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5650        let fiscal_year = start_date.year() as u16;
5651        let owner_ids: Vec<String> = self
5652            .master_data
5653            .employees
5654            .iter()
5655            .take(5)
5656            .map(|e| e.employee_id.clone())
5657            .collect();
5658        let owner_id = owner_ids
5659            .first()
5660            .map(std::string::String::as_str)
5661            .unwrap_or("BUYER-001");
5662
5663        // Step 1: Spend Analysis
5664        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5665        let spend_analyses =
5666            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5667
5668        // Step 2: Sourcing Projects
5669        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5670        let sourcing_projects = if owner_ids.is_empty() {
5671            Vec::new()
5672        } else {
5673            project_gen.generate(
5674                company_code,
5675                &categories_with_spend,
5676                &owner_ids,
5677                start_date,
5678                self.config.global.period_months,
5679            )
5680        };
5681        stats.sourcing_project_count = sourcing_projects.len();
5682
5683        // Step 3: Qualifications
5684        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5685        let mut qual_gen = QualificationGenerator::new(seed + 2);
5686        let qualifications = qual_gen.generate(
5687            company_code,
5688            &qual_vendor_ids,
5689            sourcing_projects.first().map(|p| p.project_id.as_str()),
5690            owner_id,
5691            start_date,
5692        );
5693
5694        // Step 4: RFx Events
5695        let mut rfx_gen = RfxGenerator::new(seed + 3);
5696        let rfx_events: Vec<RfxEvent> = sourcing_projects
5697            .iter()
5698            .map(|proj| {
5699                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5700                rfx_gen.generate(
5701                    company_code,
5702                    &proj.project_id,
5703                    &proj.category_id,
5704                    &qualified_vids,
5705                    owner_id,
5706                    start_date,
5707                    50000.0,
5708                )
5709            })
5710            .collect();
5711        stats.rfx_event_count = rfx_events.len();
5712
5713        // Step 5: Bids
5714        let mut bid_gen = BidGenerator::new(seed + 4);
5715        let mut all_bids = Vec::new();
5716        for rfx in &rfx_events {
5717            let bidder_count = vendor_ids.len().clamp(2, 5);
5718            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5719            let bids = bid_gen.generate(rfx, &responding, start_date);
5720            all_bids.extend(bids);
5721        }
5722        stats.bid_count = all_bids.len();
5723
5724        // Step 6: Bid Evaluations
5725        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5726        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5727            .iter()
5728            .map(|rfx| {
5729                let rfx_bids: Vec<SupplierBid> = all_bids
5730                    .iter()
5731                    .filter(|b| b.rfx_id == rfx.rfx_id)
5732                    .cloned()
5733                    .collect();
5734                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5735            })
5736            .collect();
5737
5738        // Step 7: Contracts from winning bids
5739        let mut contract_gen = ContractGenerator::new(seed + 6);
5740        let contracts: Vec<ProcurementContract> = bid_evaluations
5741            .iter()
5742            .zip(rfx_events.iter())
5743            .filter_map(|(eval, rfx)| {
5744                eval.ranked_bids.first().and_then(|winner| {
5745                    all_bids
5746                        .iter()
5747                        .find(|b| b.bid_id == winner.bid_id)
5748                        .map(|winning_bid| {
5749                            contract_gen.generate_from_bid(
5750                                winning_bid,
5751                                Some(&rfx.sourcing_project_id),
5752                                &rfx.category_id,
5753                                owner_id,
5754                                start_date,
5755                            )
5756                        })
5757                })
5758            })
5759            .collect();
5760        stats.contract_count = contracts.len();
5761
5762        // Step 8: Catalog Items
5763        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5764        let catalog_items = catalog_gen.generate(&contracts);
5765        stats.catalog_item_count = catalog_items.len();
5766
5767        // Step 9: Scorecards
5768        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5769        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5770            .iter()
5771            .fold(
5772                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5773                |mut acc, c| {
5774                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5775                    acc
5776                },
5777            )
5778            .into_iter()
5779            .collect();
5780        let scorecards = scorecard_gen.generate(
5781            company_code,
5782            &vendor_contracts,
5783            start_date,
5784            end_date,
5785            owner_id,
5786        );
5787        stats.scorecard_count = scorecards.len();
5788
5789        // Back-populate cross-references on sourcing projects (Task 35)
5790        // Link each project to its RFx events, contracts, and spend analyses
5791        let mut sourcing_projects = sourcing_projects;
5792        for project in &mut sourcing_projects {
5793            // Link RFx events generated for this project
5794            project.rfx_ids = rfx_events
5795                .iter()
5796                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5797                .map(|rfx| rfx.rfx_id.clone())
5798                .collect();
5799
5800            // Link contract awarded from this project's RFx
5801            project.contract_id = contracts
5802                .iter()
5803                .find(|c| {
5804                    c.sourcing_project_id
5805                        .as_deref()
5806                        .is_some_and(|sp| sp == project.project_id)
5807                })
5808                .map(|c| c.contract_id.clone());
5809
5810            // Link spend analysis for matching category (use category_id as the reference)
5811            project.spend_analysis_id = spend_analyses
5812                .iter()
5813                .find(|sa| sa.category_id == project.category_id)
5814                .map(|sa| sa.category_id.clone());
5815        }
5816
5817        info!(
5818            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5819            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5820            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5821        );
5822        self.check_resources_with_log("post-sourcing")?;
5823
5824        Ok(SourcingSnapshot {
5825            spend_analyses,
5826            sourcing_projects,
5827            qualifications,
5828            rfx_events,
5829            bids: all_bids,
5830            bid_evaluations,
5831            contracts,
5832            catalog_items,
5833            scorecards,
5834        })
5835    }
5836
5837    /// Build a [`GroupStructure`] from the current company configuration.
5838    ///
5839    /// The first company in the configuration is treated as the ultimate parent.
5840    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5841    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5842    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5843        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5844
5845        let parent_code = self
5846            .config
5847            .companies
5848            .first()
5849            .map(|c| c.code.clone())
5850            .unwrap_or_else(|| "PARENT".to_string());
5851
5852        let mut group = GroupStructure::new(parent_code);
5853
5854        for company in self.config.companies.iter().skip(1) {
5855            let sub =
5856                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5857            group.add_subsidiary(sub);
5858        }
5859
5860        group
5861    }
5862
5863    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5864    fn phase_intercompany(
5865        &mut self,
5866        journal_entries: &[JournalEntry],
5867        stats: &mut EnhancedGenerationStatistics,
5868    ) -> SynthResult<IntercompanySnapshot> {
5869        // Skip if intercompany is disabled in config
5870        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5871            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5872            return Ok(IntercompanySnapshot::default());
5873        }
5874
5875        // Intercompany requires at least 2 companies
5876        if self.config.companies.len() < 2 {
5877            debug!(
5878                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5879                self.config.companies.len()
5880            );
5881            return Ok(IntercompanySnapshot::default());
5882        }
5883
5884        info!("Phase 14b: Generating Intercompany Transactions");
5885
5886        // Build the group structure early — used by ISA 600 component auditor scope
5887        // and consolidated financial statement generators downstream.
5888        let group_structure = self.build_group_structure();
5889        debug!(
5890            "Group structure built: parent={}, subsidiaries={}",
5891            group_structure.parent_entity,
5892            group_structure.subsidiaries.len()
5893        );
5894
5895        let seed = self.seed;
5896        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5897            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5898        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5899
5900        // Build ownership structure from company configs
5901        // First company is treated as the parent, remaining are subsidiaries
5902        let parent_code = self.config.companies[0].code.clone();
5903        let mut ownership_structure =
5904            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5905
5906        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5907            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5908                format!("REL{:03}", i + 1),
5909                parent_code.clone(),
5910                company.code.clone(),
5911                rust_decimal::Decimal::from(100), // Default 100% ownership
5912                start_date,
5913            );
5914            ownership_structure.add_relationship(relationship);
5915        }
5916
5917        // Convert config transfer pricing method to core model enum
5918        let tp_method = match self.config.intercompany.transfer_pricing_method {
5919            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5920                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5921            }
5922            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5923                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5924            }
5925            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5926                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5927            }
5928            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5929                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5930            }
5931            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5932                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5933            }
5934        };
5935
5936        // Build IC generator config from schema config
5937        let ic_currency = self
5938            .config
5939            .companies
5940            .first()
5941            .map(|c| c.currency.clone())
5942            .unwrap_or_else(|| "USD".to_string());
5943        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5944            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5945            transfer_pricing_method: tp_method,
5946            markup_percent: rust_decimal::Decimal::from_f64_retain(
5947                self.config.intercompany.markup_percent,
5948            )
5949            .unwrap_or(rust_decimal::Decimal::from(5)),
5950            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5951            default_currency: ic_currency,
5952            ..Default::default()
5953        };
5954
5955        // Create IC generator
5956        let mut ic_generator = datasynth_generators::ICGenerator::new(
5957            ic_gen_config,
5958            ownership_structure.clone(),
5959            seed + 50,
5960        );
5961
5962        // Generate IC transactions for the period
5963        // Use ~3 transactions per day as a reasonable default
5964        let transactions_per_day = 3;
5965        let matched_pairs = ic_generator.generate_transactions_for_period(
5966            start_date,
5967            end_date,
5968            transactions_per_day,
5969        );
5970
5971        // Generate IC source P2P/O2C documents
5972        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5973        debug!(
5974            "Generated {} IC seller invoices, {} IC buyer POs",
5975            ic_doc_chains.seller_invoices.len(),
5976            ic_doc_chains.buyer_orders.len()
5977        );
5978
5979        // Generate journal entries from matched pairs
5980        let mut seller_entries = Vec::new();
5981        let mut buyer_entries = Vec::new();
5982        let fiscal_year = start_date.year();
5983
5984        for pair in &matched_pairs {
5985            let fiscal_period = pair.posting_date.month();
5986            let (seller_je, buyer_je) =
5987                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5988            seller_entries.push(seller_je);
5989            buyer_entries.push(buyer_je);
5990        }
5991
5992        // Run matching engine
5993        let matching_config = datasynth_generators::ICMatchingConfig {
5994            base_currency: self
5995                .config
5996                .companies
5997                .first()
5998                .map(|c| c.currency.clone())
5999                .unwrap_or_else(|| "USD".to_string()),
6000            ..Default::default()
6001        };
6002        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6003        matching_engine.load_matched_pairs(&matched_pairs);
6004        let matching_result = matching_engine.run_matching(end_date);
6005
6006        // Generate elimination entries if configured
6007        let mut elimination_entries = Vec::new();
6008        if self.config.intercompany.generate_eliminations {
6009            let elim_config = datasynth_generators::EliminationConfig {
6010                consolidation_entity: "GROUP".to_string(),
6011                base_currency: self
6012                    .config
6013                    .companies
6014                    .first()
6015                    .map(|c| c.currency.clone())
6016                    .unwrap_or_else(|| "USD".to_string()),
6017                ..Default::default()
6018            };
6019
6020            let mut elim_generator =
6021                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6022
6023            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6024            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6025                matching_result
6026                    .matched_balances
6027                    .iter()
6028                    .chain(matching_result.unmatched_balances.iter())
6029                    .cloned()
6030                    .collect();
6031
6032            // Build investment and equity maps from the group structure so that the
6033            // elimination generator can produce equity-investment elimination entries
6034            // (parent's investment in subsidiary vs. subsidiary's equity capital).
6035            //
6036            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
6037            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
6038            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
6039            //
6040            // Net assets are derived from the journal entries using account-range heuristics:
6041            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
6042            // no JE data is available (IC phase runs early in the generation pipeline).
6043            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6044                std::collections::HashMap::new();
6045            let mut equity_amounts: std::collections::HashMap<
6046                String,
6047                std::collections::HashMap<String, rust_decimal::Decimal>,
6048            > = std::collections::HashMap::new();
6049            {
6050                use rust_decimal::Decimal;
6051                let hundred = Decimal::from(100u32);
6052                let ten_pct = Decimal::new(10, 2); // 0.10
6053                let thirty_pct = Decimal::new(30, 2); // 0.30
6054                let sixty_pct = Decimal::new(60, 2); // 0.60
6055                let parent_code = &group_structure.parent_entity;
6056                for sub in &group_structure.subsidiaries {
6057                    let net_assets = {
6058                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6059                        if na > Decimal::ZERO {
6060                            na
6061                        } else {
6062                            Decimal::from(1_000_000u64)
6063                        }
6064                    };
6065                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
6066                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6067                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6068
6069                    // Split subsidiary equity into conventional components:
6070                    // 10 % share capital / 30 % APIC / 60 % retained earnings
6071                    let mut eq_map = std::collections::HashMap::new();
6072                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6073                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6074                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6075                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
6076                }
6077            }
6078
6079            let journal = elim_generator.generate_eliminations(
6080                &fiscal_period,
6081                end_date,
6082                &all_balances,
6083                &matched_pairs,
6084                &investment_amounts,
6085                &equity_amounts,
6086            );
6087
6088            elimination_entries = journal.entries.clone();
6089        }
6090
6091        let matched_pair_count = matched_pairs.len();
6092        let elimination_entry_count = elimination_entries.len();
6093        let match_rate = matching_result.match_rate;
6094
6095        stats.ic_matched_pair_count = matched_pair_count;
6096        stats.ic_elimination_count = elimination_entry_count;
6097        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6098
6099        info!(
6100            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6101            matched_pair_count,
6102            stats.ic_transaction_count,
6103            seller_entries.len(),
6104            buyer_entries.len(),
6105            elimination_entry_count,
6106            match_rate * 100.0
6107        );
6108        self.check_resources_with_log("post-intercompany")?;
6109
6110        // ----------------------------------------------------------------
6111        // NCI measurements: derive from group structure ownership percentages
6112        // ----------------------------------------------------------------
6113        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6114            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6115            use rust_decimal::Decimal;
6116
6117            let eight_pct = Decimal::new(8, 2); // 0.08
6118
6119            group_structure
6120                .subsidiaries
6121                .iter()
6122                .filter(|sub| {
6123                    sub.nci_percentage > Decimal::ZERO
6124                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6125                })
6126                .map(|sub| {
6127                    // Compute net assets from actual journal entries for this subsidiary.
6128                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
6129                    // IC phase runs before the main JE batch has been populated).
6130                    let net_assets_from_jes =
6131                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6132
6133                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
6134                        net_assets_from_jes.round_dp(2)
6135                    } else {
6136                        // Fallback: use a plausible base amount
6137                        Decimal::from(1_000_000u64)
6138                    };
6139
6140                    // Net income approximated as 8% of net assets
6141                    let net_income = (net_assets * eight_pct).round_dp(2);
6142
6143                    NciMeasurement::compute(
6144                        sub.entity_code.clone(),
6145                        sub.nci_percentage,
6146                        net_assets,
6147                        net_income,
6148                    )
6149                })
6150                .collect()
6151        };
6152
6153        if !nci_measurements.is_empty() {
6154            info!(
6155                "NCI measurements: {} subsidiaries with non-controlling interests",
6156                nci_measurements.len()
6157            );
6158        }
6159
6160        Ok(IntercompanySnapshot {
6161            group_structure: Some(group_structure),
6162            matched_pairs,
6163            seller_journal_entries: seller_entries,
6164            buyer_journal_entries: buyer_entries,
6165            elimination_entries,
6166            nci_measurements,
6167            ic_document_chains: Some(ic_doc_chains),
6168            matched_pair_count,
6169            elimination_entry_count,
6170            match_rate,
6171        })
6172    }
6173
6174    /// Phase 15: Generate bank reconciliations and financial statements.
6175    fn phase_financial_reporting(
6176        &mut self,
6177        document_flows: &DocumentFlowSnapshot,
6178        journal_entries: &[JournalEntry],
6179        coa: &Arc<ChartOfAccounts>,
6180        _hr: &HrSnapshot,
6181        _audit: &AuditSnapshot,
6182        stats: &mut EnhancedGenerationStatistics,
6183    ) -> SynthResult<FinancialReportingSnapshot> {
6184        let fs_enabled = self.phase_config.generate_financial_statements
6185            || self.config.financial_reporting.enabled;
6186        let br_enabled = self.phase_config.generate_bank_reconciliation;
6187
6188        if !fs_enabled && !br_enabled {
6189            debug!("Phase 15: Skipped (financial reporting disabled)");
6190            return Ok(FinancialReportingSnapshot::default());
6191        }
6192
6193        info!("Phase 15: Generating Financial Reporting Data");
6194
6195        let seed = self.seed;
6196        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6197            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6198
6199        let mut financial_statements = Vec::new();
6200        let mut bank_reconciliations = Vec::new();
6201        let mut trial_balances = Vec::new();
6202        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6203        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6204            Vec::new();
6205        // Standalone statements keyed by entity code
6206        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6207            std::collections::HashMap::new();
6208        // Consolidated statements (one per period)
6209        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6210        // Consolidation schedules (one per period)
6211        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6212
6213        // Generate financial statements from JE-derived trial balances.
6214        //
6215        // When journal entries are available, we use cumulative trial balances for
6216        // balance sheet accounts and current-period trial balances for income
6217        // statement accounts. We also track prior-period trial balances so the
6218        // generator can produce comparative amounts, and we build a proper
6219        // cash flow statement from working capital changes rather than random data.
6220        if fs_enabled {
6221            let has_journal_entries = !journal_entries.is_empty();
6222
6223            // Use FinancialStatementGenerator for balance sheet and income statement,
6224            // but build cash flow ourselves from TB data when JEs are available.
6225            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6226            // Separate generator for consolidated statements (different seed offset)
6227            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6228
6229            // Collect elimination JEs once (reused across periods)
6230            let elimination_entries: Vec<&JournalEntry> = journal_entries
6231                .iter()
6232                .filter(|je| je.header.is_elimination)
6233                .collect();
6234
6235            // Generate one set of statements per period, per entity
6236            for period in 0..self.config.global.period_months {
6237                let period_start = start_date + chrono::Months::new(period);
6238                let period_end =
6239                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6240                let fiscal_year = period_end.year() as u16;
6241                let fiscal_period = period_end.month() as u8;
6242                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6243
6244                // Build per-entity trial balances for this period (non-elimination JEs)
6245                // We accumulate them for the consolidation step.
6246                let mut entity_tb_map: std::collections::HashMap<
6247                    String,
6248                    std::collections::HashMap<String, rust_decimal::Decimal>,
6249                > = std::collections::HashMap::new();
6250
6251                // --- Standalone: one set of statements per company ---
6252                for (company_idx, company) in self.config.companies.iter().enumerate() {
6253                    let company_code = company.code.as_str();
6254                    let currency = company.currency.as_str();
6255                    // Use a unique seed offset per company to keep statements deterministic
6256                    // and distinct across companies
6257                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6258                    let mut company_fs_gen =
6259                        FinancialStatementGenerator::new(seed + company_seed_offset);
6260
6261                    if has_journal_entries {
6262                        let tb_entries = Self::build_cumulative_trial_balance(
6263                            journal_entries,
6264                            coa,
6265                            company_code,
6266                            start_date,
6267                            period_end,
6268                            fiscal_year,
6269                            fiscal_period,
6270                        );
6271
6272                        // Accumulate per-entity category balances for consolidation
6273                        let entity_cat_map =
6274                            entity_tb_map.entry(company_code.to_string()).or_default();
6275                        for tb_entry in &tb_entries {
6276                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6277                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6278                        }
6279
6280                        let stmts = company_fs_gen.generate(
6281                            company_code,
6282                            currency,
6283                            &tb_entries,
6284                            period_start,
6285                            period_end,
6286                            fiscal_year,
6287                            fiscal_period,
6288                            None,
6289                            "SYS-AUTOCLOSE",
6290                        );
6291
6292                        let mut entity_stmts = Vec::new();
6293                        for stmt in stmts {
6294                            if stmt.statement_type == StatementType::CashFlowStatement {
6295                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6296                                let cf_items = Self::build_cash_flow_from_trial_balances(
6297                                    &tb_entries,
6298                                    None,
6299                                    net_income,
6300                                );
6301                                entity_stmts.push(FinancialStatement {
6302                                    cash_flow_items: cf_items,
6303                                    ..stmt
6304                                });
6305                            } else {
6306                                entity_stmts.push(stmt);
6307                            }
6308                        }
6309
6310                        // Add to the flat financial_statements list (used by KPI/budget)
6311                        financial_statements.extend(entity_stmts.clone());
6312
6313                        // Store standalone per-entity
6314                        standalone_statements
6315                            .entry(company_code.to_string())
6316                            .or_default()
6317                            .extend(entity_stmts);
6318
6319                        // Only store trial balance for the first company in the period
6320                        // to avoid duplicates in the trial_balances list
6321                        if company_idx == 0 {
6322                            trial_balances.push(PeriodTrialBalance {
6323                                fiscal_year,
6324                                fiscal_period,
6325                                period_start,
6326                                period_end,
6327                                entries: tb_entries,
6328                            });
6329                        }
6330                    } else {
6331                        // Fallback: no JEs available
6332                        let tb_entries = Self::build_trial_balance_from_entries(
6333                            journal_entries,
6334                            coa,
6335                            company_code,
6336                            fiscal_year,
6337                            fiscal_period,
6338                        );
6339
6340                        let stmts = company_fs_gen.generate(
6341                            company_code,
6342                            currency,
6343                            &tb_entries,
6344                            period_start,
6345                            period_end,
6346                            fiscal_year,
6347                            fiscal_period,
6348                            None,
6349                            "SYS-AUTOCLOSE",
6350                        );
6351                        financial_statements.extend(stmts.clone());
6352                        standalone_statements
6353                            .entry(company_code.to_string())
6354                            .or_default()
6355                            .extend(stmts);
6356
6357                        if company_idx == 0 && !tb_entries.is_empty() {
6358                            trial_balances.push(PeriodTrialBalance {
6359                                fiscal_year,
6360                                fiscal_period,
6361                                period_start,
6362                                period_end,
6363                                entries: tb_entries,
6364                            });
6365                        }
6366                    }
6367                }
6368
6369                // --- Consolidated: aggregate all entities + apply eliminations ---
6370                // Use the primary (first) company's currency for the consolidated statement
6371                let group_currency = self
6372                    .config
6373                    .companies
6374                    .first()
6375                    .map(|c| c.currency.as_str())
6376                    .unwrap_or("USD");
6377
6378                // Build owned elimination entries for this period
6379                let period_eliminations: Vec<JournalEntry> = elimination_entries
6380                    .iter()
6381                    .filter(|je| {
6382                        je.header.fiscal_year == fiscal_year
6383                            && je.header.fiscal_period == fiscal_period
6384                    })
6385                    .map(|je| (*je).clone())
6386                    .collect();
6387
6388                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6389                    &entity_tb_map,
6390                    &period_eliminations,
6391                    &period_label,
6392                );
6393
6394                // Build a pseudo trial balance from consolidated line items for the
6395                // FinancialStatementGenerator to use (only for cash flow direction).
6396                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6397                    .line_items
6398                    .iter()
6399                    .map(|li| {
6400                        let net = li.post_elimination_total;
6401                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6402                            (net, rust_decimal::Decimal::ZERO)
6403                        } else {
6404                            (rust_decimal::Decimal::ZERO, -net)
6405                        };
6406                        datasynth_generators::TrialBalanceEntry {
6407                            account_code: li.account_category.clone(),
6408                            account_name: li.account_category.clone(),
6409                            category: li.account_category.clone(),
6410                            debit_balance: debit,
6411                            credit_balance: credit,
6412                        }
6413                    })
6414                    .collect();
6415
6416                let mut cons_stmts = cons_gen.generate(
6417                    "GROUP",
6418                    group_currency,
6419                    &cons_tb,
6420                    period_start,
6421                    period_end,
6422                    fiscal_year,
6423                    fiscal_period,
6424                    None,
6425                    "SYS-AUTOCLOSE",
6426                );
6427
6428                // Split consolidated line items by statement type.
6429                // The consolidation generator returns BS items first, then IS items,
6430                // identified by their CONS- prefix and category.
6431                let bs_categories: &[&str] = &[
6432                    "CASH",
6433                    "RECEIVABLES",
6434                    "INVENTORY",
6435                    "FIXEDASSETS",
6436                    "PAYABLES",
6437                    "ACCRUEDLIABILITIES",
6438                    "LONGTERMDEBT",
6439                    "EQUITY",
6440                ];
6441                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6442                    cons_line_items.into_iter().partition(|li| {
6443                        let upper = li.label.to_uppercase();
6444                        bs_categories.iter().any(|c| upper == *c)
6445                    });
6446
6447                for stmt in &mut cons_stmts {
6448                    stmt.is_consolidated = true;
6449                    match stmt.statement_type {
6450                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6451                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6452                        _ => {} // CF and equity change statements keep generator output
6453                    }
6454                }
6455
6456                consolidated_statements.extend(cons_stmts);
6457                consolidation_schedules.push(schedule);
6458            }
6459
6460            // Backward compat: if only 1 company, use existing code path logic
6461            // (prior_cumulative_tb for comparative amounts). Already handled above;
6462            // the prior_ref is omitted to keep this change minimal.
6463            let _ = &mut fs_gen; // suppress unused warning
6464
6465            stats.financial_statement_count = financial_statements.len();
6466            info!(
6467                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6468                stats.financial_statement_count,
6469                consolidated_statements.len(),
6470                has_journal_entries
6471            );
6472
6473            // ----------------------------------------------------------------
6474            // IFRS 8 / ASC 280: Operating Segment Reporting
6475            // ----------------------------------------------------------------
6476            // Build entity seeds from the company configuration.
6477            let entity_seeds: Vec<SegmentSeed> = self
6478                .config
6479                .companies
6480                .iter()
6481                .map(|c| SegmentSeed {
6482                    code: c.code.clone(),
6483                    name: c.name.clone(),
6484                    currency: c.currency.clone(),
6485                })
6486                .collect();
6487
6488            let mut seg_gen = SegmentGenerator::new(seed + 30);
6489
6490            // Generate one set of segment reports per period.
6491            // We extract consolidated revenue / profit / assets from the consolidated
6492            // financial statements produced above, falling back to simple sums when
6493            // no consolidated statements were generated (single-entity path).
6494            for period in 0..self.config.global.period_months {
6495                let period_end =
6496                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6497                let fiscal_year = period_end.year() as u16;
6498                let fiscal_period = period_end.month() as u8;
6499                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6500
6501                use datasynth_core::models::StatementType;
6502
6503                // Try to find consolidated income statement for this period
6504                let cons_is = consolidated_statements.iter().find(|s| {
6505                    s.fiscal_year == fiscal_year
6506                        && s.fiscal_period == fiscal_period
6507                        && s.statement_type == StatementType::IncomeStatement
6508                });
6509                let cons_bs = consolidated_statements.iter().find(|s| {
6510                    s.fiscal_year == fiscal_year
6511                        && s.fiscal_period == fiscal_period
6512                        && s.statement_type == StatementType::BalanceSheet
6513                });
6514
6515                // If consolidated statements not available fall back to the flat list
6516                let is_stmt = cons_is.or_else(|| {
6517                    financial_statements.iter().find(|s| {
6518                        s.fiscal_year == fiscal_year
6519                            && s.fiscal_period == fiscal_period
6520                            && s.statement_type == StatementType::IncomeStatement
6521                    })
6522                });
6523                let bs_stmt = cons_bs.or_else(|| {
6524                    financial_statements.iter().find(|s| {
6525                        s.fiscal_year == fiscal_year
6526                            && s.fiscal_period == fiscal_period
6527                            && s.statement_type == StatementType::BalanceSheet
6528                    })
6529                });
6530
6531                let consolidated_revenue = is_stmt
6532                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6533                    .map(|li| -li.amount) // revenue is stored as negative in IS
6534                    .unwrap_or(rust_decimal::Decimal::ZERO);
6535
6536                let consolidated_profit = is_stmt
6537                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6538                    .map(|li| li.amount)
6539                    .unwrap_or(rust_decimal::Decimal::ZERO);
6540
6541                let consolidated_assets = bs_stmt
6542                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6543                    .map(|li| li.amount)
6544                    .unwrap_or(rust_decimal::Decimal::ZERO);
6545
6546                // Skip periods where we have no financial data
6547                if consolidated_revenue == rust_decimal::Decimal::ZERO
6548                    && consolidated_assets == rust_decimal::Decimal::ZERO
6549                {
6550                    continue;
6551                }
6552
6553                let group_code = self
6554                    .config
6555                    .companies
6556                    .first()
6557                    .map(|c| c.code.as_str())
6558                    .unwrap_or("GROUP");
6559
6560                // Compute period depreciation from JEs with document type "CL" hitting account
6561                // 6000 (depreciation expense).  These are generated by phase_period_close.
6562                let total_depr: rust_decimal::Decimal = journal_entries
6563                    .iter()
6564                    .filter(|je| je.header.document_type == "CL")
6565                    .flat_map(|je| je.lines.iter())
6566                    .filter(|l| l.gl_account.starts_with("6000"))
6567                    .map(|l| l.debit_amount)
6568                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6569                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6570                    Some(total_depr)
6571                } else {
6572                    None
6573                };
6574
6575                let (segs, recon) = seg_gen.generate(
6576                    group_code,
6577                    &period_label,
6578                    consolidated_revenue,
6579                    consolidated_profit,
6580                    consolidated_assets,
6581                    &entity_seeds,
6582                    depr_param,
6583                );
6584                segment_reports.extend(segs);
6585                segment_reconciliations.push(recon);
6586            }
6587
6588            info!(
6589                "Segment reports generated: {} segments, {} reconciliations",
6590                segment_reports.len(),
6591                segment_reconciliations.len()
6592            );
6593        }
6594
6595        // Generate bank reconciliations from payment data
6596        if br_enabled && !document_flows.payments.is_empty() {
6597            let employee_ids: Vec<String> = self
6598                .master_data
6599                .employees
6600                .iter()
6601                .map(|e| e.employee_id.clone())
6602                .collect();
6603            let mut br_gen =
6604                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6605
6606            // Group payments by company code and period
6607            for company in &self.config.companies {
6608                let company_payments: Vec<PaymentReference> = document_flows
6609                    .payments
6610                    .iter()
6611                    .filter(|p| p.header.company_code == company.code)
6612                    .map(|p| PaymentReference {
6613                        id: p.header.document_id.clone(),
6614                        amount: if p.is_vendor { p.amount } else { -p.amount },
6615                        date: p.header.document_date,
6616                        reference: p
6617                            .check_number
6618                            .clone()
6619                            .or_else(|| p.wire_reference.clone())
6620                            .unwrap_or_else(|| p.header.document_id.clone()),
6621                    })
6622                    .collect();
6623
6624                if company_payments.is_empty() {
6625                    continue;
6626                }
6627
6628                let bank_account_id = format!("{}-MAIN", company.code);
6629
6630                // Generate one reconciliation per period
6631                for period in 0..self.config.global.period_months {
6632                    let period_start = start_date + chrono::Months::new(period);
6633                    let period_end =
6634                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6635
6636                    let period_payments: Vec<PaymentReference> = company_payments
6637                        .iter()
6638                        .filter(|p| p.date >= period_start && p.date <= period_end)
6639                        .cloned()
6640                        .collect();
6641
6642                    let recon = br_gen.generate(
6643                        &company.code,
6644                        &bank_account_id,
6645                        period_start,
6646                        period_end,
6647                        &company.currency,
6648                        &period_payments,
6649                    );
6650                    bank_reconciliations.push(recon);
6651                }
6652            }
6653            info!(
6654                "Bank reconciliations generated: {} reconciliations",
6655                bank_reconciliations.len()
6656            );
6657        }
6658
6659        stats.bank_reconciliation_count = bank_reconciliations.len();
6660        self.check_resources_with_log("post-financial-reporting")?;
6661
6662        if !trial_balances.is_empty() {
6663            info!(
6664                "Period-close trial balances captured: {} periods",
6665                trial_balances.len()
6666            );
6667        }
6668
6669        // Notes to financial statements are generated in a separate post-processing step
6670        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6671        // phases have completed, so that deferred tax and provision data can be wired in.
6672        let notes_to_financial_statements = Vec::new();
6673
6674        Ok(FinancialReportingSnapshot {
6675            financial_statements,
6676            standalone_statements,
6677            consolidated_statements,
6678            consolidation_schedules,
6679            bank_reconciliations,
6680            trial_balances,
6681            segment_reports,
6682            segment_reconciliations,
6683            notes_to_financial_statements,
6684        })
6685    }
6686
6687    /// Populate notes to financial statements using fully-resolved snapshots.
6688    ///
6689    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6690    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6691    /// can be wired into the notes context.  The method mutates
6692    /// `financial_reporting.notes_to_financial_statements` in-place.
6693    fn generate_notes_to_financial_statements(
6694        &self,
6695        financial_reporting: &mut FinancialReportingSnapshot,
6696        accounting_standards: &AccountingStandardsSnapshot,
6697        tax: &TaxSnapshot,
6698        hr: &HrSnapshot,
6699        audit: &AuditSnapshot,
6700        treasury: &TreasurySnapshot,
6701    ) {
6702        use datasynth_config::schema::AccountingFrameworkConfig;
6703        use datasynth_core::models::StatementType;
6704        use datasynth_generators::period_close::notes_generator::{
6705            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6706        };
6707
6708        let seed = self.seed;
6709        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6710        {
6711            Ok(d) => d,
6712            Err(_) => return,
6713        };
6714
6715        let mut notes_gen = NotesGenerator::new(seed + 4235);
6716
6717        for company in &self.config.companies {
6718            let last_period_end = start_date
6719                + chrono::Months::new(self.config.global.period_months)
6720                - chrono::Days::new(1);
6721            let fiscal_year = last_period_end.year() as u16;
6722
6723            // Extract relevant amounts from the already-generated financial statements
6724            let entity_is = financial_reporting
6725                .standalone_statements
6726                .get(&company.code)
6727                .and_then(|stmts| {
6728                    stmts.iter().find(|s| {
6729                        s.fiscal_year == fiscal_year
6730                            && s.statement_type == StatementType::IncomeStatement
6731                    })
6732                });
6733            let entity_bs = financial_reporting
6734                .standalone_statements
6735                .get(&company.code)
6736                .and_then(|stmts| {
6737                    stmts.iter().find(|s| {
6738                        s.fiscal_year == fiscal_year
6739                            && s.statement_type == StatementType::BalanceSheet
6740                    })
6741                });
6742
6743            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6744            let revenue_amount = entity_is
6745                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6746                .map(|li| li.amount);
6747            let ppe_gross = entity_bs
6748                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6749                .map(|li| li.amount);
6750
6751            let framework = match self
6752                .config
6753                .accounting_standards
6754                .framework
6755                .unwrap_or_default()
6756            {
6757                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6758                    "IFRS".to_string()
6759                }
6760                _ => "US GAAP".to_string(),
6761            };
6762
6763            // ---- Deferred tax (IAS 12 / ASC 740) ----
6764            // Sum closing DTA and DTL from rollforward entries for this entity.
6765            let (entity_dta, entity_dtl) = {
6766                let mut dta = rust_decimal::Decimal::ZERO;
6767                let mut dtl = rust_decimal::Decimal::ZERO;
6768                for rf in &tax.deferred_tax.rollforwards {
6769                    if rf.entity_code == company.code {
6770                        dta += rf.closing_dta;
6771                        dtl += rf.closing_dtl;
6772                    }
6773                }
6774                (
6775                    if dta > rust_decimal::Decimal::ZERO {
6776                        Some(dta)
6777                    } else {
6778                        None
6779                    },
6780                    if dtl > rust_decimal::Decimal::ZERO {
6781                        Some(dtl)
6782                    } else {
6783                        None
6784                    },
6785                )
6786            };
6787
6788            // ---- Provisions (IAS 37 / ASC 450) ----
6789            // Filter provisions to this entity; sum best_estimate amounts.
6790            let entity_provisions: Vec<_> = accounting_standards
6791                .provisions
6792                .iter()
6793                .filter(|p| p.entity_code == company.code)
6794                .collect();
6795            let provision_count = entity_provisions.len();
6796            let total_provisions = if provision_count > 0 {
6797                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6798            } else {
6799                None
6800            };
6801
6802            // ---- Pension data from HR snapshot ----
6803            let entity_pension_plan_count = hr
6804                .pension_plans
6805                .iter()
6806                .filter(|p| p.entity_code == company.code)
6807                .count();
6808            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6809                let sum: rust_decimal::Decimal = hr
6810                    .pension_disclosures
6811                    .iter()
6812                    .filter(|d| {
6813                        hr.pension_plans
6814                            .iter()
6815                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6816                    })
6817                    .map(|d| d.net_pension_liability)
6818                    .sum();
6819                let plan_assets_sum: rust_decimal::Decimal = hr
6820                    .pension_plan_assets
6821                    .iter()
6822                    .filter(|a| {
6823                        hr.pension_plans
6824                            .iter()
6825                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6826                    })
6827                    .map(|a| a.fair_value_closing)
6828                    .sum();
6829                if entity_pension_plan_count > 0 {
6830                    Some(sum + plan_assets_sum)
6831                } else {
6832                    None
6833                }
6834            };
6835            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6836                let sum: rust_decimal::Decimal = hr
6837                    .pension_plan_assets
6838                    .iter()
6839                    .filter(|a| {
6840                        hr.pension_plans
6841                            .iter()
6842                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6843                    })
6844                    .map(|a| a.fair_value_closing)
6845                    .sum();
6846                if entity_pension_plan_count > 0 {
6847                    Some(sum)
6848                } else {
6849                    None
6850                }
6851            };
6852
6853            // ---- Audit data: related parties + subsequent events ----
6854            // Audit snapshot covers all entities; use total counts (common case = single entity).
6855            let rp_count = audit.related_party_transactions.len();
6856            let se_count = audit.subsequent_events.len();
6857            let adjusting_count = audit
6858                .subsequent_events
6859                .iter()
6860                .filter(|e| {
6861                    matches!(
6862                        e.classification,
6863                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6864                    )
6865                })
6866                .count();
6867
6868            let ctx = NotesGeneratorContext {
6869                entity_code: company.code.clone(),
6870                framework,
6871                period: format!("FY{}", fiscal_year),
6872                period_end: last_period_end,
6873                currency: company.currency.clone(),
6874                revenue_amount,
6875                total_ppe_gross: ppe_gross,
6876                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6877                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6878                deferred_tax_asset: entity_dta,
6879                deferred_tax_liability: entity_dtl,
6880                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6881                provision_count,
6882                total_provisions,
6883                // Pension data from HR snapshot
6884                pension_plan_count: entity_pension_plan_count,
6885                total_dbo: entity_total_dbo,
6886                total_plan_assets: entity_total_plan_assets,
6887                // Audit data
6888                related_party_transaction_count: rp_count,
6889                subsequent_event_count: se_count,
6890                adjusting_event_count: adjusting_count,
6891                ..NotesGeneratorContext::default()
6892            };
6893
6894            let entity_notes = notes_gen.generate(&ctx);
6895            let standard_note_count = entity_notes.len() as u32;
6896            info!(
6897                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6898                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6899            );
6900            financial_reporting
6901                .notes_to_financial_statements
6902                .extend(entity_notes);
6903
6904            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6905            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6906                .debt_instruments
6907                .iter()
6908                .filter(|d| d.entity_id == company.code)
6909                .map(|d| {
6910                    (
6911                        format!("{:?}", d.instrument_type),
6912                        d.principal,
6913                        d.maturity_date.to_string(),
6914                    )
6915                })
6916                .collect();
6917
6918            let hedge_count = treasury.hedge_relationships.len();
6919            let effective_hedges = treasury
6920                .hedge_relationships
6921                .iter()
6922                .filter(|h| h.is_effective)
6923                .count();
6924            let total_notional: rust_decimal::Decimal = treasury
6925                .hedging_instruments
6926                .iter()
6927                .map(|h| h.notional_amount)
6928                .sum();
6929            let total_fair_value: rust_decimal::Decimal = treasury
6930                .hedging_instruments
6931                .iter()
6932                .map(|h| h.fair_value)
6933                .sum();
6934
6935            // Join provision_movements with provisions to get entity/type info
6936            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6937                .provisions
6938                .iter()
6939                .filter(|p| p.entity_code == company.code)
6940                .map(|p| p.id.as_str())
6941                .collect();
6942            let provision_movements: Vec<(
6943                String,
6944                rust_decimal::Decimal,
6945                rust_decimal::Decimal,
6946                rust_decimal::Decimal,
6947            )> = accounting_standards
6948                .provision_movements
6949                .iter()
6950                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6951                .map(|m| {
6952                    let prov_type = accounting_standards
6953                        .provisions
6954                        .iter()
6955                        .find(|p| p.id == m.provision_id)
6956                        .map(|p| format!("{:?}", p.provision_type))
6957                        .unwrap_or_else(|| "Unknown".to_string());
6958                    (prov_type, m.opening, m.additions, m.closing)
6959                })
6960                .collect();
6961
6962            let enhanced_ctx = EnhancedNotesContext {
6963                entity_code: company.code.clone(),
6964                period: format!("FY{}", fiscal_year),
6965                currency: company.currency.clone(),
6966                // Inventory breakdown: best-effort using zero (would need balance tracker)
6967                finished_goods_value: rust_decimal::Decimal::ZERO,
6968                wip_value: rust_decimal::Decimal::ZERO,
6969                raw_materials_value: rust_decimal::Decimal::ZERO,
6970                debt_instruments,
6971                hedge_count,
6972                effective_hedges,
6973                total_notional,
6974                total_fair_value,
6975                provision_movements,
6976            };
6977
6978            let enhanced_notes =
6979                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6980            if !enhanced_notes.is_empty() {
6981                info!(
6982                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6983                    company.code,
6984                    enhanced_notes.len(),
6985                    enhanced_ctx.debt_instruments.len(),
6986                    hedge_count,
6987                    enhanced_ctx.provision_movements.len(),
6988                );
6989                financial_reporting
6990                    .notes_to_financial_statements
6991                    .extend(enhanced_notes);
6992            }
6993        }
6994    }
6995
6996    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6997    ///
6998    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6999    /// posted in the journal entries flows through to the trial balance, using the real
7000    /// GL account numbers from the CoA.
7001    fn build_trial_balance_from_entries(
7002        journal_entries: &[JournalEntry],
7003        coa: &ChartOfAccounts,
7004        company_code: &str,
7005        fiscal_year: u16,
7006        fiscal_period: u8,
7007    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7008        use rust_decimal::Decimal;
7009
7010        // Accumulate total debits and credits per GL account
7011        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7012        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7013
7014        for je in journal_entries {
7015            // Filter to matching company, fiscal year, and period
7016            if je.header.company_code != company_code
7017                || je.header.fiscal_year != fiscal_year
7018                || je.header.fiscal_period != fiscal_period
7019            {
7020                continue;
7021            }
7022
7023            for line in &je.lines {
7024                let acct = &line.gl_account;
7025                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7026                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7027            }
7028        }
7029
7030        // Build a TrialBalanceEntry for each account that had activity
7031        let mut all_accounts: Vec<&String> = account_debits
7032            .keys()
7033            .chain(account_credits.keys())
7034            .collect::<std::collections::HashSet<_>>()
7035            .into_iter()
7036            .collect();
7037        all_accounts.sort();
7038
7039        let mut entries = Vec::new();
7040
7041        for acct_number in all_accounts {
7042            let debit = account_debits
7043                .get(acct_number)
7044                .copied()
7045                .unwrap_or(Decimal::ZERO);
7046            let credit = account_credits
7047                .get(acct_number)
7048                .copied()
7049                .unwrap_or(Decimal::ZERO);
7050
7051            if debit.is_zero() && credit.is_zero() {
7052                continue;
7053            }
7054
7055            // Look up account name from CoA, fall back to "Account {code}"
7056            let account_name = coa
7057                .get_account(acct_number)
7058                .map(|gl| gl.short_description.clone())
7059                .unwrap_or_else(|| format!("Account {acct_number}"));
7060
7061            // Map account code prefix to the category strings expected by
7062            // FinancialStatementGenerator (Cash, Receivables, Inventory,
7063            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
7064            // OperatingExpenses).
7065            let category = Self::category_from_account_code(acct_number);
7066
7067            entries.push(datasynth_generators::TrialBalanceEntry {
7068                account_code: acct_number.clone(),
7069                account_name,
7070                category,
7071                debit_balance: debit,
7072                credit_balance: credit,
7073            });
7074        }
7075
7076        entries
7077    }
7078
7079    /// Build a cumulative trial balance by aggregating all JEs from the start up to
7080    /// (and including) the given period end date.
7081    ///
7082    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
7083    /// while income statement accounts (revenue, expenses) show only the current period.
7084    /// The two are merged into a single Vec for the FinancialStatementGenerator.
7085    fn build_cumulative_trial_balance(
7086        journal_entries: &[JournalEntry],
7087        coa: &ChartOfAccounts,
7088        company_code: &str,
7089        start_date: NaiveDate,
7090        period_end: NaiveDate,
7091        fiscal_year: u16,
7092        fiscal_period: u8,
7093    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7094        use rust_decimal::Decimal;
7095
7096        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
7097        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7098        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7099
7100        // Accumulate debits/credits for income statement accounts (current period only)
7101        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7102        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7103
7104        for je in journal_entries {
7105            if je.header.company_code != company_code {
7106                continue;
7107            }
7108
7109            for line in &je.lines {
7110                let acct = &line.gl_account;
7111                let category = Self::category_from_account_code(acct);
7112                let is_bs_account = matches!(
7113                    category.as_str(),
7114                    "Cash"
7115                        | "Receivables"
7116                        | "Inventory"
7117                        | "FixedAssets"
7118                        | "Payables"
7119                        | "AccruedLiabilities"
7120                        | "LongTermDebt"
7121                        | "Equity"
7122                );
7123
7124                if is_bs_account {
7125                    // Balance sheet: accumulate from start through period_end
7126                    if je.header.document_date <= period_end
7127                        && je.header.document_date >= start_date
7128                    {
7129                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7130                            line.debit_amount;
7131                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7132                            line.credit_amount;
7133                    }
7134                } else {
7135                    // Income statement: current period only
7136                    if je.header.fiscal_year == fiscal_year
7137                        && je.header.fiscal_period == fiscal_period
7138                    {
7139                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7140                            line.debit_amount;
7141                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7142                            line.credit_amount;
7143                    }
7144                }
7145            }
7146        }
7147
7148        // Merge all accounts
7149        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7150        all_accounts.extend(bs_debits.keys().cloned());
7151        all_accounts.extend(bs_credits.keys().cloned());
7152        all_accounts.extend(is_debits.keys().cloned());
7153        all_accounts.extend(is_credits.keys().cloned());
7154
7155        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7156        sorted_accounts.sort();
7157
7158        let mut entries = Vec::new();
7159
7160        for acct_number in &sorted_accounts {
7161            let category = Self::category_from_account_code(acct_number);
7162            let is_bs_account = matches!(
7163                category.as_str(),
7164                "Cash"
7165                    | "Receivables"
7166                    | "Inventory"
7167                    | "FixedAssets"
7168                    | "Payables"
7169                    | "AccruedLiabilities"
7170                    | "LongTermDebt"
7171                    | "Equity"
7172            );
7173
7174            let (debit, credit) = if is_bs_account {
7175                (
7176                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7177                    bs_credits
7178                        .get(acct_number)
7179                        .copied()
7180                        .unwrap_or(Decimal::ZERO),
7181                )
7182            } else {
7183                (
7184                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7185                    is_credits
7186                        .get(acct_number)
7187                        .copied()
7188                        .unwrap_or(Decimal::ZERO),
7189                )
7190            };
7191
7192            if debit.is_zero() && credit.is_zero() {
7193                continue;
7194            }
7195
7196            let account_name = coa
7197                .get_account(acct_number)
7198                .map(|gl| gl.short_description.clone())
7199                .unwrap_or_else(|| format!("Account {acct_number}"));
7200
7201            entries.push(datasynth_generators::TrialBalanceEntry {
7202                account_code: acct_number.clone(),
7203                account_name,
7204                category,
7205                debit_balance: debit,
7206                credit_balance: credit,
7207            });
7208        }
7209
7210        entries
7211    }
7212
7213    /// Build a JE-derived cash flow statement using the indirect method.
7214    ///
7215    /// Compares current and prior cumulative trial balances to derive working capital
7216    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7217    fn build_cash_flow_from_trial_balances(
7218        current_tb: &[datasynth_generators::TrialBalanceEntry],
7219        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7220        net_income: rust_decimal::Decimal,
7221    ) -> Vec<CashFlowItem> {
7222        use rust_decimal::Decimal;
7223
7224        // Helper: aggregate a TB by category and return net (debit - credit)
7225        let aggregate =
7226            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7227                let mut map: HashMap<String, Decimal> = HashMap::new();
7228                for entry in tb {
7229                    let net = entry.debit_balance - entry.credit_balance;
7230                    *map.entry(entry.category.clone()).or_default() += net;
7231                }
7232                map
7233            };
7234
7235        let current = aggregate(current_tb);
7236        let prior = prior_tb.map(aggregate);
7237
7238        // Get balance for a category, defaulting to zero
7239        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7240            *map.get(key).unwrap_or(&Decimal::ZERO)
7241        };
7242
7243        // Compute change: current - prior (or current if no prior)
7244        let change = |key: &str| -> Decimal {
7245            let curr = get(&current, key);
7246            match &prior {
7247                Some(p) => curr - get(p, key),
7248                None => curr,
7249            }
7250        };
7251
7252        // Operating activities (indirect method)
7253        // Depreciation add-back: approximate from FixedAssets decrease
7254        let fixed_asset_change = change("FixedAssets");
7255        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7256            -fixed_asset_change
7257        } else {
7258            Decimal::ZERO
7259        };
7260
7261        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7262        let ar_change = change("Receivables");
7263        let inventory_change = change("Inventory");
7264        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7265        let ap_change = change("Payables");
7266        let accrued_change = change("AccruedLiabilities");
7267
7268        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7269            + (-ap_change)
7270            + (-accrued_change);
7271
7272        // Investing activities
7273        let capex = if fixed_asset_change > Decimal::ZERO {
7274            -fixed_asset_change
7275        } else {
7276            Decimal::ZERO
7277        };
7278        let investing_cf = capex;
7279
7280        // Financing activities
7281        let debt_change = -change("LongTermDebt");
7282        let equity_change = -change("Equity");
7283        let financing_cf = debt_change + equity_change;
7284
7285        let net_change = operating_cf + investing_cf + financing_cf;
7286
7287        vec![
7288            CashFlowItem {
7289                item_code: "CF-NI".to_string(),
7290                label: "Net Income".to_string(),
7291                category: CashFlowCategory::Operating,
7292                amount: net_income,
7293                amount_prior: None,
7294                sort_order: 1,
7295                is_total: false,
7296            },
7297            CashFlowItem {
7298                item_code: "CF-DEP".to_string(),
7299                label: "Depreciation & Amortization".to_string(),
7300                category: CashFlowCategory::Operating,
7301                amount: depreciation_addback,
7302                amount_prior: None,
7303                sort_order: 2,
7304                is_total: false,
7305            },
7306            CashFlowItem {
7307                item_code: "CF-AR".to_string(),
7308                label: "Change in Accounts Receivable".to_string(),
7309                category: CashFlowCategory::Operating,
7310                amount: -ar_change,
7311                amount_prior: None,
7312                sort_order: 3,
7313                is_total: false,
7314            },
7315            CashFlowItem {
7316                item_code: "CF-AP".to_string(),
7317                label: "Change in Accounts Payable".to_string(),
7318                category: CashFlowCategory::Operating,
7319                amount: -ap_change,
7320                amount_prior: None,
7321                sort_order: 4,
7322                is_total: false,
7323            },
7324            CashFlowItem {
7325                item_code: "CF-INV".to_string(),
7326                label: "Change in Inventory".to_string(),
7327                category: CashFlowCategory::Operating,
7328                amount: -inventory_change,
7329                amount_prior: None,
7330                sort_order: 5,
7331                is_total: false,
7332            },
7333            CashFlowItem {
7334                item_code: "CF-OP".to_string(),
7335                label: "Net Cash from Operating Activities".to_string(),
7336                category: CashFlowCategory::Operating,
7337                amount: operating_cf,
7338                amount_prior: None,
7339                sort_order: 6,
7340                is_total: true,
7341            },
7342            CashFlowItem {
7343                item_code: "CF-CAPEX".to_string(),
7344                label: "Capital Expenditures".to_string(),
7345                category: CashFlowCategory::Investing,
7346                amount: capex,
7347                amount_prior: None,
7348                sort_order: 7,
7349                is_total: false,
7350            },
7351            CashFlowItem {
7352                item_code: "CF-INV-T".to_string(),
7353                label: "Net Cash from Investing Activities".to_string(),
7354                category: CashFlowCategory::Investing,
7355                amount: investing_cf,
7356                amount_prior: None,
7357                sort_order: 8,
7358                is_total: true,
7359            },
7360            CashFlowItem {
7361                item_code: "CF-DEBT".to_string(),
7362                label: "Net Borrowings / (Repayments)".to_string(),
7363                category: CashFlowCategory::Financing,
7364                amount: debt_change,
7365                amount_prior: None,
7366                sort_order: 9,
7367                is_total: false,
7368            },
7369            CashFlowItem {
7370                item_code: "CF-EQ".to_string(),
7371                label: "Equity Changes".to_string(),
7372                category: CashFlowCategory::Financing,
7373                amount: equity_change,
7374                amount_prior: None,
7375                sort_order: 10,
7376                is_total: false,
7377            },
7378            CashFlowItem {
7379                item_code: "CF-FIN-T".to_string(),
7380                label: "Net Cash from Financing Activities".to_string(),
7381                category: CashFlowCategory::Financing,
7382                amount: financing_cf,
7383                amount_prior: None,
7384                sort_order: 11,
7385                is_total: true,
7386            },
7387            CashFlowItem {
7388                item_code: "CF-NET".to_string(),
7389                label: "Net Change in Cash".to_string(),
7390                category: CashFlowCategory::Operating,
7391                amount: net_change,
7392                amount_prior: None,
7393                sort_order: 12,
7394                is_total: true,
7395            },
7396        ]
7397    }
7398
7399    /// Calculate net income from a set of trial balance entries.
7400    ///
7401    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7402    fn calculate_net_income_from_tb(
7403        tb: &[datasynth_generators::TrialBalanceEntry],
7404    ) -> rust_decimal::Decimal {
7405        use rust_decimal::Decimal;
7406
7407        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7408        for entry in tb {
7409            let net = entry.debit_balance - entry.credit_balance;
7410            *aggregated.entry(entry.category.clone()).or_default() += net;
7411        }
7412
7413        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7414        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7415        let opex = *aggregated
7416            .get("OperatingExpenses")
7417            .unwrap_or(&Decimal::ZERO);
7418        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7419        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7420
7421        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7422        // other_income is typically negative (credit), other_expenses is typically positive
7423        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7424        let tax_rate = Decimal::new(25, 2); // 0.25
7425        let tax = operating_income * tax_rate;
7426        operating_income - tax
7427    }
7428
7429    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7430    ///
7431    /// Uses the first two digits of the account code to classify into the categories
7432    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7433    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7434    /// OperatingExpenses, OtherIncome, OtherExpenses.
7435    fn category_from_account_code(code: &str) -> String {
7436        let prefix: String = code.chars().take(2).collect();
7437        match prefix.as_str() {
7438            "10" => "Cash",
7439            "11" => "Receivables",
7440            "12" | "13" | "14" => "Inventory",
7441            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7442            "20" => "Payables",
7443            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7444            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7445            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7446            "40" | "41" | "42" | "43" | "44" => "Revenue",
7447            "50" | "51" | "52" => "CostOfSales",
7448            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7449                "OperatingExpenses"
7450            }
7451            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7452            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7453            _ => "OperatingExpenses",
7454        }
7455        .to_string()
7456    }
7457
7458    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7459    fn phase_hr_data(
7460        &mut self,
7461        stats: &mut EnhancedGenerationStatistics,
7462    ) -> SynthResult<HrSnapshot> {
7463        if !self.phase_config.generate_hr {
7464            debug!("Phase 16: Skipped (HR generation disabled)");
7465            return Ok(HrSnapshot::default());
7466        }
7467
7468        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7469
7470        let seed = self.seed;
7471        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7472            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7473        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7474        let company_code = self
7475            .config
7476            .companies
7477            .first()
7478            .map(|c| c.code.as_str())
7479            .unwrap_or("1000");
7480        let currency = self
7481            .config
7482            .companies
7483            .first()
7484            .map(|c| c.currency.as_str())
7485            .unwrap_or("USD");
7486
7487        let employee_ids: Vec<String> = self
7488            .master_data
7489            .employees
7490            .iter()
7491            .map(|e| e.employee_id.clone())
7492            .collect();
7493
7494        if employee_ids.is_empty() {
7495            debug!("Phase 16: Skipped (no employees available)");
7496            return Ok(HrSnapshot::default());
7497        }
7498
7499        // Extract cost-center pool from master data employees for cross-reference
7500        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7501        let cost_center_ids: Vec<String> = self
7502            .master_data
7503            .employees
7504            .iter()
7505            .filter_map(|e| e.cost_center.clone())
7506            .collect::<std::collections::HashSet<_>>()
7507            .into_iter()
7508            .collect();
7509
7510        let mut snapshot = HrSnapshot::default();
7511
7512        // Generate payroll runs (one per month)
7513        if self.config.hr.payroll.enabled {
7514            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7515                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7516
7517            // Look up country pack for payroll deductions and labels
7518            let payroll_pack = self.primary_pack();
7519
7520            // Store the pack on the generator so generate() resolves
7521            // localized deduction rates and labels from it.
7522            payroll_gen.set_country_pack(payroll_pack.clone());
7523
7524            let employees_with_salary: Vec<(
7525                String,
7526                rust_decimal::Decimal,
7527                Option<String>,
7528                Option<String>,
7529            )> = self
7530                .master_data
7531                .employees
7532                .iter()
7533                .map(|e| {
7534                    // Use the employee's actual annual base salary.
7535                    // Fall back to $60,000 / yr if somehow zero.
7536                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7537                        e.base_salary
7538                    } else {
7539                        rust_decimal::Decimal::from(60_000)
7540                    };
7541                    (
7542                        e.employee_id.clone(),
7543                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7544                        e.cost_center.clone(),
7545                        e.department_id.clone(),
7546                    )
7547                })
7548                .collect();
7549
7550            // Use generate_with_changes when employee change history is available
7551            // so that salary adjustments, transfers, etc. are reflected in payroll.
7552            let change_history = &self.master_data.employee_change_history;
7553            let has_changes = !change_history.is_empty();
7554            if has_changes {
7555                debug!(
7556                    "Payroll will incorporate {} employee change events",
7557                    change_history.len()
7558                );
7559            }
7560
7561            for month in 0..self.config.global.period_months {
7562                let period_start = start_date + chrono::Months::new(month);
7563                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7564                let (run, items) = if has_changes {
7565                    payroll_gen.generate_with_changes(
7566                        company_code,
7567                        &employees_with_salary,
7568                        period_start,
7569                        period_end,
7570                        currency,
7571                        change_history,
7572                    )
7573                } else {
7574                    payroll_gen.generate(
7575                        company_code,
7576                        &employees_with_salary,
7577                        period_start,
7578                        period_end,
7579                        currency,
7580                    )
7581                };
7582                snapshot.payroll_runs.push(run);
7583                snapshot.payroll_run_count += 1;
7584                snapshot.payroll_line_item_count += items.len();
7585                snapshot.payroll_line_items.extend(items);
7586            }
7587        }
7588
7589        // Generate time entries
7590        if self.config.hr.time_attendance.enabled {
7591            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7592                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7593            // v3.4.2: when a temporal context is configured, time entries
7594            // respect holidays (not just weekends) and submitted_at lag
7595            // snaps to business days.
7596            if let Some(ctx) = &self.temporal_context {
7597                time_gen.set_temporal_context(Arc::clone(ctx));
7598            }
7599            let entries = time_gen.generate(
7600                &employee_ids,
7601                start_date,
7602                end_date,
7603                &self.config.hr.time_attendance,
7604            );
7605            snapshot.time_entry_count = entries.len();
7606            snapshot.time_entries = entries;
7607        }
7608
7609        // Generate expense reports
7610        if self.config.hr.expenses.enabled {
7611            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7612                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7613            expense_gen.set_country_pack(self.primary_pack().clone());
7614            // v3.4.2: snap submission / approval / paid / line-item dates
7615            // to business days when temporal_context is present.
7616            if let Some(ctx) = &self.temporal_context {
7617                expense_gen.set_temporal_context(Arc::clone(ctx));
7618            }
7619            let company_currency = self
7620                .config
7621                .companies
7622                .first()
7623                .map(|c| c.currency.as_str())
7624                .unwrap_or("USD");
7625            let reports = expense_gen.generate_with_currency(
7626                &employee_ids,
7627                start_date,
7628                end_date,
7629                &self.config.hr.expenses,
7630                company_currency,
7631            );
7632            snapshot.expense_report_count = reports.len();
7633            snapshot.expense_reports = reports;
7634        }
7635
7636        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7637        if self.config.hr.payroll.enabled {
7638            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7639            let employee_pairs: Vec<(String, String)> = self
7640                .master_data
7641                .employees
7642                .iter()
7643                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7644                .collect();
7645            let enrollments =
7646                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7647            snapshot.benefit_enrollment_count = enrollments.len();
7648            snapshot.benefit_enrollments = enrollments;
7649        }
7650
7651        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7652        if self.phase_config.generate_hr {
7653            let entity_name = self
7654                .config
7655                .companies
7656                .first()
7657                .map(|c| c.name.as_str())
7658                .unwrap_or("Entity");
7659            let period_months = self.config.global.period_months;
7660            let period_label = {
7661                let y = start_date.year();
7662                let m = start_date.month();
7663                if period_months >= 12 {
7664                    format!("FY{y}")
7665                } else {
7666                    format!("{y}-{m:02}")
7667                }
7668            };
7669            let reporting_date =
7670                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7671
7672            // Compute average annual salary from actual payroll data when available.
7673            // PayrollRun.total_gross covers all employees for one pay period; we sum
7674            // across all runs and divide by employee_count to get per-employee total,
7675            // then annualise for sub-annual periods.
7676            let avg_salary: Option<rust_decimal::Decimal> = {
7677                let employee_count = employee_ids.len();
7678                if self.config.hr.payroll.enabled
7679                    && employee_count > 0
7680                    && !snapshot.payroll_runs.is_empty()
7681                {
7682                    // Sum total gross pay across all payroll runs for this company
7683                    let total_gross: rust_decimal::Decimal = snapshot
7684                        .payroll_runs
7685                        .iter()
7686                        .filter(|r| r.company_code == company_code)
7687                        .map(|r| r.total_gross)
7688                        .sum();
7689                    if total_gross > rust_decimal::Decimal::ZERO {
7690                        // Annualise: total_gross covers `period_months` months of pay
7691                        let annual_total = if period_months > 0 && period_months < 12 {
7692                            total_gross * rust_decimal::Decimal::from(12u32)
7693                                / rust_decimal::Decimal::from(period_months)
7694                        } else {
7695                            total_gross
7696                        };
7697                        Some(
7698                            (annual_total / rust_decimal::Decimal::from(employee_count))
7699                                .round_dp(2),
7700                        )
7701                    } else {
7702                        None
7703                    }
7704                } else {
7705                    None
7706                }
7707            };
7708
7709            let mut pension_gen =
7710                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7711            let pension_snap = pension_gen.generate(
7712                company_code,
7713                entity_name,
7714                &period_label,
7715                reporting_date,
7716                employee_ids.len(),
7717                currency,
7718                avg_salary,
7719                period_months,
7720            );
7721            snapshot.pension_plan_count = pension_snap.plans.len();
7722            snapshot.pension_plans = pension_snap.plans;
7723            snapshot.pension_obligations = pension_snap.obligations;
7724            snapshot.pension_plan_assets = pension_snap.plan_assets;
7725            snapshot.pension_disclosures = pension_snap.disclosures;
7726            // Pension JEs are returned here so they can be added to entries
7727            // in the caller (stored temporarily on snapshot for transfer).
7728            // We embed them in the hr snapshot for simplicity; the orchestrator
7729            // will extract and extend `entries`.
7730            snapshot.pension_journal_entries = pension_snap.journal_entries;
7731        }
7732
7733        // Generate stock-based compensation (ASC 718 / IFRS 2)
7734        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7735            let period_months = self.config.global.period_months;
7736            let period_label = {
7737                let y = start_date.year();
7738                let m = start_date.month();
7739                if period_months >= 12 {
7740                    format!("FY{y}")
7741                } else {
7742                    format!("{y}-{m:02}")
7743                }
7744            };
7745            let reporting_date =
7746                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7747
7748            let mut stock_comp_gen =
7749                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7750            let stock_snap = stock_comp_gen.generate(
7751                company_code,
7752                &employee_ids,
7753                start_date,
7754                &period_label,
7755                reporting_date,
7756                currency,
7757            );
7758            snapshot.stock_grant_count = stock_snap.grants.len();
7759            snapshot.stock_grants = stock_snap.grants;
7760            snapshot.stock_comp_expenses = stock_snap.expenses;
7761            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7762        }
7763
7764        stats.payroll_run_count = snapshot.payroll_run_count;
7765        stats.time_entry_count = snapshot.time_entry_count;
7766        stats.expense_report_count = snapshot.expense_report_count;
7767        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7768        stats.pension_plan_count = snapshot.pension_plan_count;
7769        stats.stock_grant_count = snapshot.stock_grant_count;
7770
7771        info!(
7772            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7773            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7774            snapshot.time_entry_count, snapshot.expense_report_count,
7775            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7776            snapshot.stock_grant_count
7777        );
7778        self.check_resources_with_log("post-hr")?;
7779
7780        Ok(snapshot)
7781    }
7782
7783    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7784    fn phase_accounting_standards(
7785        &mut self,
7786        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7787        journal_entries: &[JournalEntry],
7788        stats: &mut EnhancedGenerationStatistics,
7789    ) -> SynthResult<AccountingStandardsSnapshot> {
7790        if !self.phase_config.generate_accounting_standards {
7791            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7792            return Ok(AccountingStandardsSnapshot::default());
7793        }
7794        info!("Phase 17: Generating Accounting Standards Data");
7795
7796        let seed = self.seed;
7797        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7798            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7799        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7800        let company_code = self
7801            .config
7802            .companies
7803            .first()
7804            .map(|c| c.code.as_str())
7805            .unwrap_or("1000");
7806        let currency = self
7807            .config
7808            .companies
7809            .first()
7810            .map(|c| c.currency.as_str())
7811            .unwrap_or("USD");
7812
7813        // Convert config framework to standards framework.
7814        // If the user explicitly set a framework in the YAML config, use that.
7815        // Otherwise, fall back to the country pack's accounting.framework field,
7816        // and if that is also absent or unrecognised, default to US GAAP.
7817        let framework = match self.config.accounting_standards.framework {
7818            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7819                datasynth_standards::framework::AccountingFramework::UsGaap
7820            }
7821            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7822                datasynth_standards::framework::AccountingFramework::Ifrs
7823            }
7824            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7825                datasynth_standards::framework::AccountingFramework::DualReporting
7826            }
7827            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7828                datasynth_standards::framework::AccountingFramework::FrenchGaap
7829            }
7830            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7831                datasynth_standards::framework::AccountingFramework::GermanGaap
7832            }
7833            None => {
7834                // Derive framework from the primary company's country pack
7835                let pack = self.primary_pack();
7836                let pack_fw = pack.accounting.framework.as_str();
7837                match pack_fw {
7838                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7839                    "dual_reporting" => {
7840                        datasynth_standards::framework::AccountingFramework::DualReporting
7841                    }
7842                    "french_gaap" => {
7843                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7844                    }
7845                    "german_gaap" | "hgb" => {
7846                        datasynth_standards::framework::AccountingFramework::GermanGaap
7847                    }
7848                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7849                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7850                }
7851            }
7852        };
7853
7854        let mut snapshot = AccountingStandardsSnapshot::default();
7855
7856        // Revenue recognition
7857        if self.config.accounting_standards.revenue_recognition.enabled {
7858            let customer_ids: Vec<String> = self
7859                .master_data
7860                .customers
7861                .iter()
7862                .map(|c| c.customer_id.clone())
7863                .collect();
7864
7865            if !customer_ids.is_empty() {
7866                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7867                let contracts = rev_gen.generate(
7868                    company_code,
7869                    &customer_ids,
7870                    start_date,
7871                    end_date,
7872                    currency,
7873                    &self.config.accounting_standards.revenue_recognition,
7874                    framework,
7875                );
7876                snapshot.revenue_contract_count = contracts.len();
7877                snapshot.contracts = contracts;
7878            }
7879        }
7880
7881        // Impairment testing
7882        if self.config.accounting_standards.impairment.enabled {
7883            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7884                .master_data
7885                .assets
7886                .iter()
7887                .map(|a| {
7888                    (
7889                        a.asset_id.clone(),
7890                        a.description.clone(),
7891                        a.acquisition_cost,
7892                    )
7893                })
7894                .collect();
7895
7896            if !asset_data.is_empty() {
7897                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7898                let tests = imp_gen.generate(
7899                    company_code,
7900                    &asset_data,
7901                    end_date,
7902                    &self.config.accounting_standards.impairment,
7903                    framework,
7904                );
7905                snapshot.impairment_test_count = tests.len();
7906                snapshot.impairment_tests = tests;
7907            }
7908        }
7909
7910        // Business combinations (IFRS 3 / ASC 805)
7911        if self
7912            .config
7913            .accounting_standards
7914            .business_combinations
7915            .enabled
7916        {
7917            let bc_config = &self.config.accounting_standards.business_combinations;
7918            let framework_str = match framework {
7919                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7920                _ => "US_GAAP",
7921            };
7922            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7923            let bc_snap = bc_gen.generate(
7924                company_code,
7925                currency,
7926                start_date,
7927                end_date,
7928                bc_config.acquisition_count,
7929                framework_str,
7930            );
7931            snapshot.business_combination_count = bc_snap.combinations.len();
7932            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7933            snapshot.business_combinations = bc_snap.combinations;
7934        }
7935
7936        // Expected Credit Loss (IFRS 9 / ASC 326)
7937        if self
7938            .config
7939            .accounting_standards
7940            .expected_credit_loss
7941            .enabled
7942        {
7943            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7944            let framework_str = match framework {
7945                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7946                _ => "ASC_326",
7947            };
7948
7949            // Use AR aging data from the subledger snapshot if available;
7950            // otherwise generate synthetic bucket exposures.
7951            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7952
7953            let mut ecl_gen = EclGenerator::new(seed + 43);
7954
7955            // Collect combined bucket totals across all company AR aging reports.
7956            let bucket_exposures: Vec<(
7957                datasynth_core::models::subledger::ar::AgingBucket,
7958                rust_decimal::Decimal,
7959            )> = if ar_aging_reports.is_empty() {
7960                // No AR aging data — synthesise plausible bucket exposures.
7961                use datasynth_core::models::subledger::ar::AgingBucket;
7962                vec![
7963                    (
7964                        AgingBucket::Current,
7965                        rust_decimal::Decimal::from(500_000_u32),
7966                    ),
7967                    (
7968                        AgingBucket::Days1To30,
7969                        rust_decimal::Decimal::from(120_000_u32),
7970                    ),
7971                    (
7972                        AgingBucket::Days31To60,
7973                        rust_decimal::Decimal::from(45_000_u32),
7974                    ),
7975                    (
7976                        AgingBucket::Days61To90,
7977                        rust_decimal::Decimal::from(15_000_u32),
7978                    ),
7979                    (
7980                        AgingBucket::Over90Days,
7981                        rust_decimal::Decimal::from(8_000_u32),
7982                    ),
7983                ]
7984            } else {
7985                use datasynth_core::models::subledger::ar::AgingBucket;
7986                // Sum bucket totals from all reports.
7987                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7988                    std::collections::HashMap::new();
7989                for report in ar_aging_reports {
7990                    for (bucket, amount) in &report.bucket_totals {
7991                        *totals.entry(*bucket).or_default() += amount;
7992                    }
7993                }
7994                AgingBucket::all()
7995                    .into_iter()
7996                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7997                    .collect()
7998            };
7999
8000            let ecl_snap = ecl_gen.generate(
8001                company_code,
8002                end_date,
8003                &bucket_exposures,
8004                ecl_config,
8005                &period_label,
8006                framework_str,
8007            );
8008
8009            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8010            snapshot.ecl_models = ecl_snap.ecl_models;
8011            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8012            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8013        }
8014
8015        // Provisions and contingencies (IAS 37 / ASC 450)
8016        {
8017            let framework_str = match framework {
8018                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8019                _ => "US_GAAP",
8020            };
8021
8022            // Compute actual revenue from the journal entries generated so far.
8023            // The `journal_entries` slice passed to this phase contains all GL entries
8024            // up to and including Period Close. Fall back to a minimum of 100_000 to
8025            // avoid degenerate zero-based provision amounts on first-period datasets.
8026            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8027                .max(rust_decimal::Decimal::from(100_000_u32));
8028
8029            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8030
8031            let mut prov_gen = ProvisionGenerator::new(seed + 44);
8032            let prov_snap = prov_gen.generate(
8033                company_code,
8034                currency,
8035                revenue_proxy,
8036                end_date,
8037                &period_label,
8038                framework_str,
8039                None, // prior_opening: no carry-forward data in single-period runs
8040            );
8041
8042            snapshot.provision_count = prov_snap.provisions.len();
8043            snapshot.provisions = prov_snap.provisions;
8044            snapshot.provision_movements = prov_snap.movements;
8045            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8046            snapshot.provision_journal_entries = prov_snap.journal_entries;
8047        }
8048
8049        // IAS 21 Functional Currency Translation
8050        // For each company whose functional currency differs from the presentation
8051        // currency, generate a CurrencyTranslationResult with CTA (OCI).
8052        {
8053            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8054
8055            let presentation_currency = self
8056                .config
8057                .global
8058                .presentation_currency
8059                .clone()
8060                .unwrap_or_else(|| self.config.global.group_currency.clone());
8061
8062            // Build a minimal rate table populated with approximate rates from
8063            // the FX model base rates (USD-based) so we can do the translation.
8064            let mut rate_table = FxRateTable::new(&presentation_currency);
8065
8066            // Populate with base rates against USD; if presentation_currency is
8067            // not USD we do a best-effort two-step conversion using the table's
8068            // triangulation support.
8069            let base_rates = base_rates_usd();
8070            for (ccy, rate) in &base_rates {
8071                rate_table.add_rate(FxRate::new(
8072                    ccy,
8073                    "USD",
8074                    RateType::Closing,
8075                    end_date,
8076                    *rate,
8077                    "SYNTHETIC",
8078                ));
8079                // Average rate = 98% of closing (approximation).
8080                // 0.98 = 98/100 = Decimal::new(98, 2)
8081                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8082                rate_table.add_rate(FxRate::new(
8083                    ccy,
8084                    "USD",
8085                    RateType::Average,
8086                    end_date,
8087                    avg,
8088                    "SYNTHETIC",
8089                ));
8090            }
8091
8092            let mut translation_results = Vec::new();
8093            for company in &self.config.companies {
8094                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
8095                // to ensure the translation produces non-trivial CTA amounts.
8096                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8097                    .max(rust_decimal::Decimal::from(100_000_u32));
8098
8099                let func_ccy = company
8100                    .functional_currency
8101                    .clone()
8102                    .unwrap_or_else(|| company.currency.clone());
8103
8104                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8105                    &company.code,
8106                    &func_ccy,
8107                    &presentation_currency,
8108                    &ias21_period_label,
8109                    end_date,
8110                    company_revenue,
8111                    &rate_table,
8112                );
8113                translation_results.push(result);
8114            }
8115
8116            snapshot.currency_translation_count = translation_results.len();
8117            snapshot.currency_translation_results = translation_results;
8118        }
8119
8120        stats.revenue_contract_count = snapshot.revenue_contract_count;
8121        stats.impairment_test_count = snapshot.impairment_test_count;
8122        stats.business_combination_count = snapshot.business_combination_count;
8123        stats.ecl_model_count = snapshot.ecl_model_count;
8124        stats.provision_count = snapshot.provision_count;
8125
8126        // ------------------------------------------------------------
8127        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
8128        // ------------------------------------------------------------
8129        if self.config.accounting_standards.leases.enabled {
8130            use datasynth_generators::standards::LeaseGenerator;
8131            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8132                .unwrap_or_else(|_| {
8133                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8134                });
8135            let framework =
8136                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8137            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8138            for company in &self.config.companies {
8139                let leases = lease_gen.generate(
8140                    &company.code,
8141                    start_date,
8142                    &self.config.accounting_standards.leases,
8143                    framework,
8144                );
8145                snapshot.lease_count += leases.len();
8146                snapshot.leases.extend(leases);
8147            }
8148            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8149        }
8150
8151        // ------------------------------------------------------------
8152        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
8153        // ------------------------------------------------------------
8154        if self.config.accounting_standards.fair_value.enabled {
8155            use datasynth_generators::standards::FairValueGenerator;
8156            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8157                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8158                + chrono::Months::new(self.config.global.period_months);
8159            let framework =
8160                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8161            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8162            for company in &self.config.companies {
8163                let measurements = fv_gen.generate(
8164                    &company.code,
8165                    end_date,
8166                    &company.currency,
8167                    &self.config.accounting_standards.fair_value,
8168                    framework,
8169                );
8170                snapshot.fair_value_measurement_count += measurements.len();
8171                snapshot.fair_value_measurements.extend(measurements);
8172            }
8173            info!(
8174                "v3.3.1 fair value measurements: {}",
8175                snapshot.fair_value_measurement_count
8176            );
8177        }
8178
8179        // ------------------------------------------------------------
8180        // v3.3.1: Framework reconciliation (dual reporting only)
8181        // ------------------------------------------------------------
8182        if self.config.accounting_standards.generate_differences
8183            && matches!(
8184                self.config.accounting_standards.framework,
8185                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8186            )
8187        {
8188            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8189            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8190                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8191                + chrono::Months::new(self.config.global.period_months);
8192            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8193            for company in &self.config.companies {
8194                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8195                snapshot.framework_difference_count += records.len();
8196                snapshot.framework_differences.extend(records);
8197                snapshot.framework_reconciliations.push(reconciliation);
8198            }
8199            info!(
8200                "v3.3.1 framework reconciliation: {} differences across {} entities",
8201                snapshot.framework_difference_count,
8202                snapshot.framework_reconciliations.len()
8203            );
8204        }
8205
8206        info!(
8207            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8208            snapshot.revenue_contract_count,
8209            snapshot.impairment_test_count,
8210            snapshot.business_combination_count,
8211            snapshot.ecl_model_count,
8212            snapshot.provision_count,
8213            snapshot.currency_translation_count,
8214            snapshot.lease_count,
8215            snapshot.fair_value_measurement_count,
8216            snapshot.framework_difference_count,
8217        );
8218        self.check_resources_with_log("post-accounting-standards")?;
8219
8220        Ok(snapshot)
8221    }
8222
8223    /// v3.3.1: helper to resolve the accounting-standards framework enum
8224    /// from config into the `datasynth_standards::framework::AccountingFramework`
8225    /// type expected by standards generators. Falls back to US GAAP.
8226    fn resolve_accounting_framework(
8227        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8228    ) -> datasynth_standards::framework::AccountingFramework {
8229        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8230        use datasynth_standards::framework::AccountingFramework as Fw;
8231        match cfg {
8232            Some(Cfg::Ifrs) => Fw::Ifrs,
8233            Some(Cfg::DualReporting) => Fw::DualReporting,
8234            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8235            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8236            _ => Fw::UsGaap,
8237        }
8238    }
8239
8240    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8241    fn phase_manufacturing(
8242        &mut self,
8243        stats: &mut EnhancedGenerationStatistics,
8244    ) -> SynthResult<ManufacturingSnapshot> {
8245        if !self.phase_config.generate_manufacturing {
8246            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8247            return Ok(ManufacturingSnapshot::default());
8248        }
8249        info!("Phase 18: Generating Manufacturing Data");
8250
8251        let seed = self.seed;
8252        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8253            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8254        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8255        let company_code = self
8256            .config
8257            .companies
8258            .first()
8259            .map(|c| c.code.as_str())
8260            .unwrap_or("1000");
8261
8262        let material_data: Vec<(String, String)> = self
8263            .master_data
8264            .materials
8265            .iter()
8266            .map(|m| (m.material_id.clone(), m.description.clone()))
8267            .collect();
8268
8269        if material_data.is_empty() {
8270            debug!("Phase 18: Skipped (no materials available)");
8271            return Ok(ManufacturingSnapshot::default());
8272        }
8273
8274        let mut snapshot = ManufacturingSnapshot::default();
8275
8276        // Generate production orders
8277        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8278        // v3.4.3: snap planned / actual / operation dates to business days.
8279        if let Some(ctx) = &self.temporal_context {
8280            prod_gen.set_temporal_context(Arc::clone(ctx));
8281        }
8282        let production_orders = prod_gen.generate(
8283            company_code,
8284            &material_data,
8285            start_date,
8286            end_date,
8287            &self.config.manufacturing.production_orders,
8288            &self.config.manufacturing.costing,
8289            &self.config.manufacturing.routing,
8290        );
8291        snapshot.production_order_count = production_orders.len();
8292
8293        // Generate quality inspections from production orders
8294        let inspection_data: Vec<(String, String, String)> = production_orders
8295            .iter()
8296            .map(|po| {
8297                (
8298                    po.order_id.clone(),
8299                    po.material_id.clone(),
8300                    po.material_description.clone(),
8301                )
8302            })
8303            .collect();
8304
8305        snapshot.production_orders = production_orders;
8306
8307        if !inspection_data.is_empty() {
8308            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8309            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8310            snapshot.quality_inspection_count = inspections.len();
8311            snapshot.quality_inspections = inspections;
8312        }
8313
8314        // Generate cycle counts (one per month)
8315        let storage_locations: Vec<(String, String)> = material_data
8316            .iter()
8317            .enumerate()
8318            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8319            .collect();
8320
8321        let employee_ids: Vec<String> = self
8322            .master_data
8323            .employees
8324            .iter()
8325            .map(|e| e.employee_id.clone())
8326            .collect();
8327        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8328            .with_employee_pool(employee_ids);
8329        let mut cycle_count_total = 0usize;
8330        for month in 0..self.config.global.period_months {
8331            let count_date = start_date + chrono::Months::new(month);
8332            let items_per_count = storage_locations.len().clamp(10, 50);
8333            let cc = cc_gen.generate(
8334                company_code,
8335                &storage_locations,
8336                count_date,
8337                items_per_count,
8338            );
8339            snapshot.cycle_counts.push(cc);
8340            cycle_count_total += 1;
8341        }
8342        snapshot.cycle_count_count = cycle_count_total;
8343
8344        // Generate BOM components
8345        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8346        let bom_components = bom_gen.generate(company_code, &material_data);
8347        snapshot.bom_component_count = bom_components.len();
8348        snapshot.bom_components = bom_components;
8349
8350        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8351        let currency = self
8352            .config
8353            .companies
8354            .first()
8355            .map(|c| c.currency.as_str())
8356            .unwrap_or("USD");
8357        let production_order_ids: Vec<String> = snapshot
8358            .production_orders
8359            .iter()
8360            .map(|po| po.order_id.clone())
8361            .collect();
8362        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8363        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8364            company_code,
8365            &material_data,
8366            start_date,
8367            end_date,
8368            2,
8369            currency,
8370            &production_order_ids,
8371        );
8372        snapshot.inventory_movement_count = inventory_movements.len();
8373        snapshot.inventory_movements = inventory_movements;
8374
8375        stats.production_order_count = snapshot.production_order_count;
8376        stats.quality_inspection_count = snapshot.quality_inspection_count;
8377        stats.cycle_count_count = snapshot.cycle_count_count;
8378        stats.bom_component_count = snapshot.bom_component_count;
8379        stats.inventory_movement_count = snapshot.inventory_movement_count;
8380
8381        info!(
8382            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8383            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8384            snapshot.bom_component_count, snapshot.inventory_movement_count
8385        );
8386        self.check_resources_with_log("post-manufacturing")?;
8387
8388        Ok(snapshot)
8389    }
8390
8391    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8392    fn phase_sales_kpi_budgets(
8393        &mut self,
8394        coa: &Arc<ChartOfAccounts>,
8395        financial_reporting: &FinancialReportingSnapshot,
8396        stats: &mut EnhancedGenerationStatistics,
8397    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8398        if !self.phase_config.generate_sales_kpi_budgets {
8399            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8400            return Ok(SalesKpiBudgetsSnapshot::default());
8401        }
8402        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8403
8404        let seed = self.seed;
8405        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8406            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8407        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8408        let company_code = self
8409            .config
8410            .companies
8411            .first()
8412            .map(|c| c.code.as_str())
8413            .unwrap_or("1000");
8414
8415        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8416
8417        // Sales Quotes
8418        if self.config.sales_quotes.enabled {
8419            let customer_data: Vec<(String, String)> = self
8420                .master_data
8421                .customers
8422                .iter()
8423                .map(|c| (c.customer_id.clone(), c.name.clone()))
8424                .collect();
8425            let material_data: Vec<(String, String)> = self
8426                .master_data
8427                .materials
8428                .iter()
8429                .map(|m| (m.material_id.clone(), m.description.clone()))
8430                .collect();
8431
8432            if !customer_data.is_empty() && !material_data.is_empty() {
8433                let employee_ids: Vec<String> = self
8434                    .master_data
8435                    .employees
8436                    .iter()
8437                    .map(|e| e.employee_id.clone())
8438                    .collect();
8439                let customer_ids: Vec<String> = self
8440                    .master_data
8441                    .customers
8442                    .iter()
8443                    .map(|c| c.customer_id.clone())
8444                    .collect();
8445                let company_currency = self
8446                    .config
8447                    .companies
8448                    .first()
8449                    .map(|c| c.currency.as_str())
8450                    .unwrap_or("USD");
8451
8452                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8453                    .with_pools(employee_ids, customer_ids);
8454                let quotes = quote_gen.generate_with_currency(
8455                    company_code,
8456                    &customer_data,
8457                    &material_data,
8458                    start_date,
8459                    end_date,
8460                    &self.config.sales_quotes,
8461                    company_currency,
8462                );
8463                snapshot.sales_quote_count = quotes.len();
8464                snapshot.sales_quotes = quotes;
8465            }
8466        }
8467
8468        // Management KPIs
8469        if self.config.financial_reporting.management_kpis.enabled {
8470            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8471            let mut kpis = kpi_gen.generate(
8472                company_code,
8473                start_date,
8474                end_date,
8475                &self.config.financial_reporting.management_kpis,
8476            );
8477
8478            // Override financial KPIs with actual data from financial statements
8479            {
8480                use rust_decimal::Decimal;
8481
8482                if let Some(income_stmt) =
8483                    financial_reporting.financial_statements.iter().find(|fs| {
8484                        fs.statement_type == StatementType::IncomeStatement
8485                            && fs.company_code == company_code
8486                    })
8487                {
8488                    // Extract revenue and COGS from income statement line items
8489                    let total_revenue: Decimal = income_stmt
8490                        .line_items
8491                        .iter()
8492                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8493                        .map(|li| li.amount)
8494                        .sum();
8495                    let total_cogs: Decimal = income_stmt
8496                        .line_items
8497                        .iter()
8498                        .filter(|li| {
8499                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8500                                && !li.is_total
8501                        })
8502                        .map(|li| li.amount.abs())
8503                        .sum();
8504                    let total_opex: Decimal = income_stmt
8505                        .line_items
8506                        .iter()
8507                        .filter(|li| {
8508                            li.section.contains("Expense")
8509                                && !li.is_total
8510                                && !li.section.contains("Cost")
8511                        })
8512                        .map(|li| li.amount.abs())
8513                        .sum();
8514
8515                    if total_revenue > Decimal::ZERO {
8516                        let hundred = Decimal::from(100);
8517                        let gross_margin_pct =
8518                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8519                        let operating_income = total_revenue - total_cogs - total_opex;
8520                        let op_margin_pct =
8521                            (operating_income * hundred / total_revenue).round_dp(2);
8522
8523                        // Override gross margin and operating margin KPIs
8524                        for kpi in &mut kpis {
8525                            if kpi.name == "Gross Margin" {
8526                                kpi.value = gross_margin_pct;
8527                            } else if kpi.name == "Operating Margin" {
8528                                kpi.value = op_margin_pct;
8529                            }
8530                        }
8531                    }
8532                }
8533
8534                // Override Current Ratio from balance sheet
8535                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8536                    fs.statement_type == StatementType::BalanceSheet
8537                        && fs.company_code == company_code
8538                }) {
8539                    let current_assets: Decimal = bs
8540                        .line_items
8541                        .iter()
8542                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8543                        .map(|li| li.amount)
8544                        .sum();
8545                    let current_liabilities: Decimal = bs
8546                        .line_items
8547                        .iter()
8548                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8549                        .map(|li| li.amount.abs())
8550                        .sum();
8551
8552                    if current_liabilities > Decimal::ZERO {
8553                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8554                        for kpi in &mut kpis {
8555                            if kpi.name == "Current Ratio" {
8556                                kpi.value = current_ratio;
8557                            }
8558                        }
8559                    }
8560                }
8561            }
8562
8563            snapshot.kpi_count = kpis.len();
8564            snapshot.kpis = kpis;
8565        }
8566
8567        // Budgets
8568        if self.config.financial_reporting.budgets.enabled {
8569            let account_data: Vec<(String, String)> = coa
8570                .accounts
8571                .iter()
8572                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8573                .collect();
8574
8575            if !account_data.is_empty() {
8576                let fiscal_year = start_date.year() as u32;
8577                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8578                let budget = budget_gen.generate(
8579                    company_code,
8580                    fiscal_year,
8581                    &account_data,
8582                    &self.config.financial_reporting.budgets,
8583                );
8584                snapshot.budget_line_count = budget.line_items.len();
8585                snapshot.budgets.push(budget);
8586            }
8587        }
8588
8589        stats.sales_quote_count = snapshot.sales_quote_count;
8590        stats.kpi_count = snapshot.kpi_count;
8591        stats.budget_line_count = snapshot.budget_line_count;
8592
8593        info!(
8594            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8595            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8596        );
8597        self.check_resources_with_log("post-sales-kpi-budgets")?;
8598
8599        Ok(snapshot)
8600    }
8601
8602    /// Compute pre-tax income for a single company from actual journal entries.
8603    ///
8604    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8605    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8606    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8607    /// and the period-close engine so that all three use a consistent definition.
8608    fn compute_pre_tax_income(
8609        company_code: &str,
8610        journal_entries: &[JournalEntry],
8611    ) -> rust_decimal::Decimal {
8612        use datasynth_core::accounts::AccountCategory;
8613        use rust_decimal::Decimal;
8614
8615        let mut total_revenue = Decimal::ZERO;
8616        let mut total_expenses = Decimal::ZERO;
8617
8618        for je in journal_entries {
8619            if je.header.company_code != company_code {
8620                continue;
8621            }
8622            for line in &je.lines {
8623                let cat = AccountCategory::from_account(&line.gl_account);
8624                match cat {
8625                    AccountCategory::Revenue => {
8626                        total_revenue += line.credit_amount - line.debit_amount;
8627                    }
8628                    AccountCategory::Cogs
8629                    | AccountCategory::OperatingExpense
8630                    | AccountCategory::OtherIncomeExpense => {
8631                        total_expenses += line.debit_amount - line.credit_amount;
8632                    }
8633                    _ => {}
8634                }
8635            }
8636        }
8637
8638        let pti = (total_revenue - total_expenses).round_dp(2);
8639        if pti == rust_decimal::Decimal::ZERO {
8640            // No income statement activity yet — fall back to a synthetic value so the
8641            // tax provision generator can still produce meaningful output.
8642            rust_decimal::Decimal::from(1_000_000u32)
8643        } else {
8644            pti
8645        }
8646    }
8647
8648    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8649    fn phase_tax_generation(
8650        &mut self,
8651        document_flows: &DocumentFlowSnapshot,
8652        journal_entries: &[JournalEntry],
8653        stats: &mut EnhancedGenerationStatistics,
8654    ) -> SynthResult<TaxSnapshot> {
8655        if !self.phase_config.generate_tax {
8656            debug!("Phase 20: Skipped (tax generation disabled)");
8657            return Ok(TaxSnapshot::default());
8658        }
8659        info!("Phase 20: Generating Tax Data");
8660
8661        let seed = self.seed;
8662        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8663            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8664        let fiscal_year = start_date.year();
8665        let company_code = self
8666            .config
8667            .companies
8668            .first()
8669            .map(|c| c.code.as_str())
8670            .unwrap_or("1000");
8671
8672        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8673            seed + 370,
8674            self.config.tax.clone(),
8675        );
8676
8677        let pack = self.primary_pack().clone();
8678        let (jurisdictions, codes) =
8679            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8680
8681        // Generate tax provisions for each company
8682        let mut provisions = Vec::new();
8683        if self.config.tax.provisions.enabled {
8684            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8685            for company in &self.config.companies {
8686                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8687                let statutory_rate = rust_decimal::Decimal::new(
8688                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8689                    2,
8690                );
8691                let provision = provision_gen.generate(
8692                    &company.code,
8693                    start_date,
8694                    pre_tax_income,
8695                    statutory_rate,
8696                );
8697                provisions.push(provision);
8698            }
8699        }
8700
8701        // Generate tax lines from document invoices
8702        let mut tax_lines = Vec::new();
8703        if !codes.is_empty() {
8704            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8705                datasynth_generators::TaxLineGeneratorConfig::default(),
8706                codes.clone(),
8707                seed + 372,
8708            );
8709
8710            // Tax lines from vendor invoices (input tax)
8711            // Use the first company's country as buyer country
8712            let buyer_country = self
8713                .config
8714                .companies
8715                .first()
8716                .map(|c| c.country.as_str())
8717                .unwrap_or("US");
8718            for vi in &document_flows.vendor_invoices {
8719                let lines = tax_line_gen.generate_for_document(
8720                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8721                    &vi.header.document_id,
8722                    buyer_country, // seller approx same country
8723                    buyer_country,
8724                    vi.payable_amount,
8725                    vi.header.document_date,
8726                    None,
8727                );
8728                tax_lines.extend(lines);
8729            }
8730
8731            // Tax lines from customer invoices (output tax)
8732            for ci in &document_flows.customer_invoices {
8733                let lines = tax_line_gen.generate_for_document(
8734                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8735                    &ci.header.document_id,
8736                    buyer_country, // seller is the company
8737                    buyer_country,
8738                    ci.total_gross_amount,
8739                    ci.header.document_date,
8740                    None,
8741                );
8742                tax_lines.extend(lines);
8743            }
8744        }
8745
8746        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8747        let deferred_tax = {
8748            let companies: Vec<(&str, &str)> = self
8749                .config
8750                .companies
8751                .iter()
8752                .map(|c| (c.code.as_str(), c.country.as_str()))
8753                .collect();
8754            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8755            deferred_gen.generate(&companies, start_date, journal_entries)
8756        };
8757
8758        // Build a document_id → posting_date map so each tax JE uses its
8759        // source document's date rather than a blanket period-end date.
8760        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8761            std::collections::HashMap::new();
8762        for vi in &document_flows.vendor_invoices {
8763            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8764        }
8765        for ci in &document_flows.customer_invoices {
8766            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8767        }
8768
8769        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8770        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8771        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8772            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8773                &tax_lines,
8774                company_code,
8775                &doc_dates,
8776                end_date,
8777            );
8778            debug!("Generated {} tax posting JEs", jes.len());
8779            jes
8780        } else {
8781            Vec::new()
8782        };
8783
8784        let snapshot = TaxSnapshot {
8785            jurisdiction_count: jurisdictions.len(),
8786            code_count: codes.len(),
8787            jurisdictions,
8788            codes,
8789            tax_provisions: provisions,
8790            tax_lines,
8791            tax_returns: Vec::new(),
8792            withholding_records: Vec::new(),
8793            tax_anomaly_labels: Vec::new(),
8794            deferred_tax,
8795            tax_posting_journal_entries,
8796        };
8797
8798        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8799        stats.tax_code_count = snapshot.code_count;
8800        stats.tax_provision_count = snapshot.tax_provisions.len();
8801        stats.tax_line_count = snapshot.tax_lines.len();
8802
8803        info!(
8804            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8805            snapshot.jurisdiction_count,
8806            snapshot.code_count,
8807            snapshot.tax_provisions.len(),
8808            snapshot.deferred_tax.temporary_differences.len(),
8809            snapshot.deferred_tax.journal_entries.len(),
8810            snapshot.tax_posting_journal_entries.len(),
8811        );
8812        self.check_resources_with_log("post-tax")?;
8813
8814        Ok(snapshot)
8815    }
8816
8817    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8818    fn phase_esg_generation(
8819        &mut self,
8820        document_flows: &DocumentFlowSnapshot,
8821        manufacturing: &ManufacturingSnapshot,
8822        stats: &mut EnhancedGenerationStatistics,
8823    ) -> SynthResult<EsgSnapshot> {
8824        if !self.phase_config.generate_esg {
8825            debug!("Phase 21: Skipped (ESG generation disabled)");
8826            return Ok(EsgSnapshot::default());
8827        }
8828        let degradation = self.check_resources()?;
8829        if degradation >= DegradationLevel::Reduced {
8830            debug!(
8831                "Phase skipped due to resource pressure (degradation: {:?})",
8832                degradation
8833            );
8834            return Ok(EsgSnapshot::default());
8835        }
8836        info!("Phase 21: Generating ESG Data");
8837
8838        let seed = self.seed;
8839        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8840            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8841        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8842        let entity_id = self
8843            .config
8844            .companies
8845            .first()
8846            .map(|c| c.code.as_str())
8847            .unwrap_or("1000");
8848
8849        let esg_cfg = &self.config.esg;
8850        let mut snapshot = EsgSnapshot::default();
8851
8852        // Energy consumption (feeds into scope 1 & 2 emissions)
8853        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8854            esg_cfg.environmental.energy.clone(),
8855            seed + 80,
8856        );
8857        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8858
8859        // Water usage
8860        let facility_count = esg_cfg.environmental.energy.facility_count;
8861        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8862        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8863
8864        // Waste
8865        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8866            seed + 82,
8867            esg_cfg.environmental.waste.diversion_target,
8868            facility_count,
8869        );
8870        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8871
8872        // Emissions (scope 1, 2, 3)
8873        let mut emission_gen =
8874            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8875
8876        // Build EnergyInput from energy_records
8877        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8878            .iter()
8879            .map(|e| datasynth_generators::EnergyInput {
8880                facility_id: e.facility_id.clone(),
8881                energy_type: match e.energy_source {
8882                    EnergySourceType::NaturalGas => {
8883                        datasynth_generators::EnergyInputType::NaturalGas
8884                    }
8885                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8886                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8887                    _ => datasynth_generators::EnergyInputType::Electricity,
8888                },
8889                consumption_kwh: e.consumption_kwh,
8890                period: e.period,
8891            })
8892            .collect();
8893
8894        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8895        if !manufacturing.production_orders.is_empty() {
8896            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8897                &manufacturing.production_orders,
8898                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8899                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8900            );
8901            if !mfg_energy.is_empty() {
8902                info!(
8903                    "ESG: {} energy inputs derived from {} production orders",
8904                    mfg_energy.len(),
8905                    manufacturing.production_orders.len(),
8906                );
8907                energy_inputs.extend(mfg_energy);
8908            }
8909        }
8910
8911        let mut emissions = Vec::new();
8912        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8913        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8914
8915        // Scope 3: use vendor spend data from actual payments
8916        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8917            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8918            for payment in &document_flows.payments {
8919                if payment.is_vendor {
8920                    *totals
8921                        .entry(payment.business_partner_id.clone())
8922                        .or_default() += payment.amount;
8923                }
8924            }
8925            totals
8926        };
8927        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8928            .master_data
8929            .vendors
8930            .iter()
8931            .map(|v| {
8932                let spend = vendor_payment_totals
8933                    .get(&v.vendor_id)
8934                    .copied()
8935                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8936                datasynth_generators::VendorSpendInput {
8937                    vendor_id: v.vendor_id.clone(),
8938                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8939                    spend,
8940                    country: v.country.clone(),
8941                }
8942            })
8943            .collect();
8944        if !vendor_spend.is_empty() {
8945            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8946                entity_id,
8947                &vendor_spend,
8948                start_date,
8949                end_date,
8950            ));
8951        }
8952
8953        // Business travel & commuting (scope 3)
8954        let headcount = self.master_data.employees.len() as u32;
8955        if headcount > 0 {
8956            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8957            emissions.extend(emission_gen.generate_scope3_business_travel(
8958                entity_id,
8959                travel_spend,
8960                start_date,
8961            ));
8962            emissions
8963                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8964        }
8965
8966        snapshot.emission_count = emissions.len();
8967        snapshot.emissions = emissions;
8968        snapshot.energy = energy_records;
8969
8970        // Social: Workforce diversity, pay equity, safety
8971        let mut workforce_gen =
8972            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8973        let total_headcount = headcount.max(100);
8974        snapshot.diversity =
8975            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8976        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8977
8978        // v2.4: Derive additional workforce diversity metrics from actual employee data
8979        if !self.master_data.employees.is_empty() {
8980            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8981                entity_id,
8982                &self.master_data.employees,
8983                end_date,
8984            );
8985            if !hr_diversity.is_empty() {
8986                info!(
8987                    "ESG: {} diversity metrics derived from {} actual employees",
8988                    hr_diversity.len(),
8989                    self.master_data.employees.len(),
8990                );
8991                snapshot.diversity.extend(hr_diversity);
8992            }
8993        }
8994
8995        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8996            entity_id,
8997            facility_count,
8998            start_date,
8999            end_date,
9000        );
9001
9002        // Compute safety metrics
9003        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
9004        let safety_metric = workforce_gen.compute_safety_metrics(
9005            entity_id,
9006            &snapshot.safety_incidents,
9007            total_hours,
9008            start_date,
9009        );
9010        snapshot.safety_metrics = vec![safety_metric];
9011
9012        // Governance
9013        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9014            seed + 85,
9015            esg_cfg.governance.board_size,
9016            esg_cfg.governance.independence_target,
9017        );
9018        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9019
9020        // Supplier ESG assessments
9021        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9022            esg_cfg.supply_chain_esg.clone(),
9023            seed + 86,
9024        );
9025        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9026            .master_data
9027            .vendors
9028            .iter()
9029            .map(|v| datasynth_generators::VendorInput {
9030                vendor_id: v.vendor_id.clone(),
9031                country: v.country.clone(),
9032                industry: format!("{:?}", v.vendor_type).to_lowercase(),
9033                quality_score: None,
9034            })
9035            .collect();
9036        snapshot.supplier_assessments =
9037            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9038
9039        // Disclosures
9040        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9041            seed + 87,
9042            esg_cfg.reporting.clone(),
9043            esg_cfg.climate_scenarios.clone(),
9044        );
9045        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9046        snapshot.disclosures = disclosure_gen.generate_disclosures(
9047            entity_id,
9048            &snapshot.materiality,
9049            start_date,
9050            end_date,
9051        );
9052        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9053        snapshot.disclosure_count = snapshot.disclosures.len();
9054
9055        // Anomaly injection
9056        if esg_cfg.anomaly_rate > 0.0 {
9057            let mut anomaly_injector =
9058                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9059            let mut labels = Vec::new();
9060            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9061            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9062            labels.extend(
9063                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9064            );
9065            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9066            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9067            snapshot.anomaly_labels = labels;
9068        }
9069
9070        stats.esg_emission_count = snapshot.emission_count;
9071        stats.esg_disclosure_count = snapshot.disclosure_count;
9072
9073        info!(
9074            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9075            snapshot.emission_count,
9076            snapshot.disclosure_count,
9077            snapshot.supplier_assessments.len()
9078        );
9079        self.check_resources_with_log("post-esg")?;
9080
9081        Ok(snapshot)
9082    }
9083
9084    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
9085    fn phase_treasury_data(
9086        &mut self,
9087        document_flows: &DocumentFlowSnapshot,
9088        subledger: &SubledgerSnapshot,
9089        intercompany: &IntercompanySnapshot,
9090        stats: &mut EnhancedGenerationStatistics,
9091    ) -> SynthResult<TreasurySnapshot> {
9092        if !self.phase_config.generate_treasury {
9093            debug!("Phase 22: Skipped (treasury generation disabled)");
9094            return Ok(TreasurySnapshot::default());
9095        }
9096        let degradation = self.check_resources()?;
9097        if degradation >= DegradationLevel::Reduced {
9098            debug!(
9099                "Phase skipped due to resource pressure (degradation: {:?})",
9100                degradation
9101            );
9102            return Ok(TreasurySnapshot::default());
9103        }
9104        info!("Phase 22: Generating Treasury Data");
9105
9106        let seed = self.seed;
9107        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9108            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9109        let currency = self
9110            .config
9111            .companies
9112            .first()
9113            .map(|c| c.currency.as_str())
9114            .unwrap_or("USD");
9115        let entity_id = self
9116            .config
9117            .companies
9118            .first()
9119            .map(|c| c.code.as_str())
9120            .unwrap_or("1000");
9121
9122        let mut snapshot = TreasurySnapshot::default();
9123
9124        // Generate debt instruments
9125        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9126            self.config.treasury.debt.clone(),
9127            seed + 90,
9128        );
9129        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9130
9131        // Generate hedging instruments (IR swaps for floating-rate debt)
9132        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9133            self.config.treasury.hedging.clone(),
9134            seed + 91,
9135        );
9136        for debt in &snapshot.debt_instruments {
9137            if debt.rate_type == InterestRateType::Variable {
9138                let swap = hedge_gen.generate_ir_swap(
9139                    currency,
9140                    debt.principal,
9141                    debt.origination_date,
9142                    debt.maturity_date,
9143                );
9144                snapshot.hedging_instruments.push(swap);
9145            }
9146        }
9147
9148        // Build FX exposures from foreign-currency payments and generate
9149        // FX forwards + hedge relationship designations via generate() API.
9150        {
9151            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9152            for payment in &document_flows.payments {
9153                if payment.currency != currency {
9154                    let entry = fx_map
9155                        .entry(payment.currency.clone())
9156                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9157                    entry.0 += payment.amount;
9158                    // Use the latest settlement date among grouped payments
9159                    if payment.header.document_date > entry.1 {
9160                        entry.1 = payment.header.document_date;
9161                    }
9162                }
9163            }
9164            if !fx_map.is_empty() {
9165                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9166                    .into_iter()
9167                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9168                        datasynth_generators::treasury::FxExposure {
9169                            currency_pair: format!("{foreign_ccy}/{currency}"),
9170                            foreign_currency: foreign_ccy,
9171                            net_amount,
9172                            settlement_date,
9173                            description: "AP payment FX exposure".to_string(),
9174                        }
9175                    })
9176                    .collect();
9177                let (fx_instruments, fx_relationships) =
9178                    hedge_gen.generate(start_date, &fx_exposures);
9179                snapshot.hedging_instruments.extend(fx_instruments);
9180                snapshot.hedge_relationships.extend(fx_relationships);
9181            }
9182        }
9183
9184        // Inject anomalies if configured
9185        if self.config.treasury.anomaly_rate > 0.0 {
9186            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9187                seed + 92,
9188                self.config.treasury.anomaly_rate,
9189            );
9190            let mut labels = Vec::new();
9191            labels.extend(
9192                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9193            );
9194            snapshot.treasury_anomaly_labels = labels;
9195        }
9196
9197        // Generate cash positions from payment flows
9198        if self.config.treasury.cash_positioning.enabled {
9199            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9200
9201            // AP payments as outflows
9202            for payment in &document_flows.payments {
9203                cash_flows.push(datasynth_generators::treasury::CashFlow {
9204                    date: payment.header.document_date,
9205                    account_id: format!("{entity_id}-MAIN"),
9206                    amount: payment.amount,
9207                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9208                });
9209            }
9210
9211            // Customer receipts (from O2C chains) as inflows
9212            for chain in &document_flows.o2c_chains {
9213                if let Some(ref receipt) = chain.customer_receipt {
9214                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9215                        date: receipt.header.document_date,
9216                        account_id: format!("{entity_id}-MAIN"),
9217                        amount: receipt.amount,
9218                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9219                    });
9220                }
9221                // Remainder receipts (follow-up to partial payments)
9222                for receipt in &chain.remainder_receipts {
9223                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9224                        date: receipt.header.document_date,
9225                        account_id: format!("{entity_id}-MAIN"),
9226                        amount: receipt.amount,
9227                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9228                    });
9229                }
9230            }
9231
9232            if !cash_flows.is_empty() {
9233                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9234                    self.config.treasury.cash_positioning.clone(),
9235                    seed + 93,
9236                );
9237                let account_id = format!("{entity_id}-MAIN");
9238                snapshot.cash_positions = cash_gen.generate(
9239                    entity_id,
9240                    &account_id,
9241                    currency,
9242                    &cash_flows,
9243                    start_date,
9244                    start_date + chrono::Months::new(self.config.global.period_months),
9245                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9246                );
9247            }
9248        }
9249
9250        // Generate cash forecasts from AR/AP aging
9251        if self.config.treasury.cash_forecasting.enabled {
9252            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9253
9254            // Build AR aging items from subledger AR invoices
9255            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9256                .ar_invoices
9257                .iter()
9258                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9259                .map(|inv| {
9260                    let days_past_due = if inv.due_date < end_date {
9261                        (end_date - inv.due_date).num_days().max(0) as u32
9262                    } else {
9263                        0
9264                    };
9265                    datasynth_generators::treasury::ArAgingItem {
9266                        expected_date: inv.due_date,
9267                        amount: inv.amount_remaining,
9268                        days_past_due,
9269                        document_id: inv.invoice_number.clone(),
9270                    }
9271                })
9272                .collect();
9273
9274            // Build AP aging items from subledger AP invoices
9275            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9276                .ap_invoices
9277                .iter()
9278                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9279                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9280                    payment_date: inv.due_date,
9281                    amount: inv.amount_remaining,
9282                    document_id: inv.invoice_number.clone(),
9283                })
9284                .collect();
9285
9286            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9287                self.config.treasury.cash_forecasting.clone(),
9288                seed + 94,
9289            );
9290            let forecast = forecast_gen.generate(
9291                entity_id,
9292                currency,
9293                end_date,
9294                &ar_items,
9295                &ap_items,
9296                &[], // scheduled disbursements - empty for now
9297            );
9298            snapshot.cash_forecasts.push(forecast);
9299        }
9300
9301        // Generate cash pools and sweeps
9302        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9303            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9304            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9305                self.config.treasury.cash_pooling.clone(),
9306                seed + 95,
9307            );
9308
9309            // Create a pool from available accounts
9310            let account_ids: Vec<String> = snapshot
9311                .cash_positions
9312                .iter()
9313                .map(|cp| cp.bank_account_id.clone())
9314                .collect::<std::collections::HashSet<_>>()
9315                .into_iter()
9316                .collect();
9317
9318            if let Some(pool) =
9319                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9320            {
9321                // Generate sweeps - build participant balances from last cash position per account
9322                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9323                for cp in &snapshot.cash_positions {
9324                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9325                }
9326
9327                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9328                    latest_balances
9329                        .into_iter()
9330                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9331                        .map(
9332                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9333                                account_id: id,
9334                                balance,
9335                            },
9336                        )
9337                        .collect();
9338
9339                let sweeps =
9340                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9341                snapshot.cash_pool_sweeps = sweeps;
9342                snapshot.cash_pools.push(pool);
9343            }
9344        }
9345
9346        // Generate bank guarantees
9347        if self.config.treasury.bank_guarantees.enabled {
9348            let vendor_names: Vec<String> = self
9349                .master_data
9350                .vendors
9351                .iter()
9352                .map(|v| v.name.clone())
9353                .collect();
9354            if !vendor_names.is_empty() {
9355                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9356                    self.config.treasury.bank_guarantees.clone(),
9357                    seed + 96,
9358                );
9359                snapshot.bank_guarantees =
9360                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9361            }
9362        }
9363
9364        // Generate netting runs from intercompany matched pairs
9365        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9366            let entity_ids: Vec<String> = self
9367                .config
9368                .companies
9369                .iter()
9370                .map(|c| c.code.clone())
9371                .collect();
9372            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9373                .matched_pairs
9374                .iter()
9375                .map(|mp| {
9376                    (
9377                        mp.seller_company.clone(),
9378                        mp.buyer_company.clone(),
9379                        mp.amount,
9380                    )
9381                })
9382                .collect();
9383            if entity_ids.len() >= 2 {
9384                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9385                    self.config.treasury.netting.clone(),
9386                    seed + 97,
9387                );
9388                snapshot.netting_runs = netting_gen.generate(
9389                    &entity_ids,
9390                    currency,
9391                    start_date,
9392                    self.config.global.period_months,
9393                    &ic_amounts,
9394                );
9395            }
9396        }
9397
9398        // Generate treasury journal entries from the instruments we just created.
9399        {
9400            use datasynth_generators::treasury::TreasuryAccounting;
9401
9402            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9403            let mut treasury_jes = Vec::new();
9404
9405            // Debt interest accrual JEs
9406            if !snapshot.debt_instruments.is_empty() {
9407                let debt_jes =
9408                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9409                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9410                treasury_jes.extend(debt_jes);
9411            }
9412
9413            // Hedge mark-to-market JEs
9414            if !snapshot.hedging_instruments.is_empty() {
9415                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9416                    &snapshot.hedging_instruments,
9417                    &snapshot.hedge_relationships,
9418                    end_date,
9419                    entity_id,
9420                );
9421                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9422                treasury_jes.extend(hedge_jes);
9423            }
9424
9425            // Cash pool sweep JEs
9426            if !snapshot.cash_pool_sweeps.is_empty() {
9427                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9428                    &snapshot.cash_pool_sweeps,
9429                    entity_id,
9430                );
9431                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9432                treasury_jes.extend(sweep_jes);
9433            }
9434
9435            if !treasury_jes.is_empty() {
9436                debug!("Total treasury journal entries: {}", treasury_jes.len());
9437            }
9438            snapshot.journal_entries = treasury_jes;
9439        }
9440
9441        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9442        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9443        stats.cash_position_count = snapshot.cash_positions.len();
9444        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9445        stats.cash_pool_count = snapshot.cash_pools.len();
9446
9447        info!(
9448            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9449            snapshot.debt_instruments.len(),
9450            snapshot.hedging_instruments.len(),
9451            snapshot.cash_positions.len(),
9452            snapshot.cash_forecasts.len(),
9453            snapshot.cash_pools.len(),
9454            snapshot.bank_guarantees.len(),
9455            snapshot.netting_runs.len(),
9456            snapshot.journal_entries.len(),
9457        );
9458        self.check_resources_with_log("post-treasury")?;
9459
9460        Ok(snapshot)
9461    }
9462
9463    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9464    fn phase_project_accounting(
9465        &mut self,
9466        document_flows: &DocumentFlowSnapshot,
9467        hr: &HrSnapshot,
9468        stats: &mut EnhancedGenerationStatistics,
9469    ) -> SynthResult<ProjectAccountingSnapshot> {
9470        if !self.phase_config.generate_project_accounting {
9471            debug!("Phase 23: Skipped (project accounting disabled)");
9472            return Ok(ProjectAccountingSnapshot::default());
9473        }
9474        let degradation = self.check_resources()?;
9475        if degradation >= DegradationLevel::Reduced {
9476            debug!(
9477                "Phase skipped due to resource pressure (degradation: {:?})",
9478                degradation
9479            );
9480            return Ok(ProjectAccountingSnapshot::default());
9481        }
9482        info!("Phase 23: Generating Project Accounting Data");
9483
9484        let seed = self.seed;
9485        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9486            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9487        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9488        let company_code = self
9489            .config
9490            .companies
9491            .first()
9492            .map(|c| c.code.as_str())
9493            .unwrap_or("1000");
9494
9495        let mut snapshot = ProjectAccountingSnapshot::default();
9496
9497        // Generate projects with WBS hierarchies
9498        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9499            self.config.project_accounting.clone(),
9500            seed + 95,
9501        );
9502        let pool = project_gen.generate(company_code, start_date, end_date);
9503        snapshot.projects = pool.projects.clone();
9504
9505        // Link source documents to projects for cost allocation
9506        {
9507            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9508                Vec::new();
9509
9510            // Time entries
9511            for te in &hr.time_entries {
9512                let total_hours = te.hours_regular + te.hours_overtime;
9513                if total_hours > 0.0 {
9514                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9515                        id: te.entry_id.clone(),
9516                        entity_id: company_code.to_string(),
9517                        date: te.date,
9518                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9519                            .unwrap_or(rust_decimal::Decimal::ZERO),
9520                        source_type: CostSourceType::TimeEntry,
9521                        hours: Some(
9522                            rust_decimal::Decimal::from_f64_retain(total_hours)
9523                                .unwrap_or(rust_decimal::Decimal::ZERO),
9524                        ),
9525                    });
9526                }
9527            }
9528
9529            // Expense reports
9530            for er in &hr.expense_reports {
9531                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9532                    id: er.report_id.clone(),
9533                    entity_id: company_code.to_string(),
9534                    date: er.submission_date,
9535                    amount: er.total_amount,
9536                    source_type: CostSourceType::ExpenseReport,
9537                    hours: None,
9538                });
9539            }
9540
9541            // Purchase orders
9542            for po in &document_flows.purchase_orders {
9543                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9544                    id: po.header.document_id.clone(),
9545                    entity_id: company_code.to_string(),
9546                    date: po.header.document_date,
9547                    amount: po.total_net_amount,
9548                    source_type: CostSourceType::PurchaseOrder,
9549                    hours: None,
9550                });
9551            }
9552
9553            // Vendor invoices
9554            for vi in &document_flows.vendor_invoices {
9555                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9556                    id: vi.header.document_id.clone(),
9557                    entity_id: company_code.to_string(),
9558                    date: vi.header.document_date,
9559                    amount: vi.payable_amount,
9560                    source_type: CostSourceType::VendorInvoice,
9561                    hours: None,
9562                });
9563            }
9564
9565            if !source_docs.is_empty() && !pool.projects.is_empty() {
9566                let mut cost_gen =
9567                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9568                        self.config.project_accounting.cost_allocation.clone(),
9569                        seed + 99,
9570                    );
9571                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9572            }
9573        }
9574
9575        // Generate change orders
9576        if self.config.project_accounting.change_orders.enabled {
9577            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9578                self.config.project_accounting.change_orders.clone(),
9579                seed + 96,
9580            );
9581            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9582        }
9583
9584        // Generate milestones
9585        if self.config.project_accounting.milestones.enabled {
9586            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9587                self.config.project_accounting.milestones.clone(),
9588                seed + 97,
9589            );
9590            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9591        }
9592
9593        // Generate earned value metrics (needs cost lines, so only if we have projects)
9594        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9595            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9596                self.config.project_accounting.earned_value.clone(),
9597                seed + 98,
9598            );
9599            snapshot.earned_value_metrics =
9600                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9601        }
9602
9603        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9604        if self.config.project_accounting.revenue_recognition.enabled
9605            && !snapshot.projects.is_empty()
9606            && !snapshot.cost_lines.is_empty()
9607        {
9608            use datasynth_generators::project_accounting::RevenueGenerator;
9609            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9610            let avg_contract_value =
9611                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9612                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9613
9614            // Build contract value tuples: only customer-type projects get revenue recognition.
9615            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9616            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9617                snapshot
9618                    .projects
9619                    .iter()
9620                    .filter(|p| {
9621                        matches!(
9622                            p.project_type,
9623                            datasynth_core::models::ProjectType::Customer
9624                        )
9625                    })
9626                    .map(|p| {
9627                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9628                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9629                        // budget × 1.25 → contract value
9630                        } else {
9631                            avg_contract_value
9632                        };
9633                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9634                        (p.project_id.clone(), cv, etc)
9635                    })
9636                    .collect();
9637
9638            if !contract_values.is_empty() {
9639                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9640                snapshot.revenue_records = rev_gen.generate(
9641                    &snapshot.projects,
9642                    &snapshot.cost_lines,
9643                    &contract_values,
9644                    start_date,
9645                    end_date,
9646                );
9647                debug!(
9648                    "Generated {} revenue recognition records for {} customer projects",
9649                    snapshot.revenue_records.len(),
9650                    contract_values.len()
9651                );
9652            }
9653        }
9654
9655        stats.project_count = snapshot.projects.len();
9656        stats.project_change_order_count = snapshot.change_orders.len();
9657        stats.project_cost_line_count = snapshot.cost_lines.len();
9658
9659        info!(
9660            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9661            snapshot.projects.len(),
9662            snapshot.change_orders.len(),
9663            snapshot.milestones.len(),
9664            snapshot.earned_value_metrics.len()
9665        );
9666        self.check_resources_with_log("post-project-accounting")?;
9667
9668        Ok(snapshot)
9669    }
9670
9671    /// Phase 24: Generate process evolution and organizational events.
9672    fn phase_evolution_events(
9673        &mut self,
9674        stats: &mut EnhancedGenerationStatistics,
9675    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9676        if !self.phase_config.generate_evolution_events {
9677            debug!("Phase 24: Skipped (evolution events disabled)");
9678            return Ok((Vec::new(), Vec::new()));
9679        }
9680        info!("Phase 24: Generating Process Evolution + Organizational Events");
9681
9682        let seed = self.seed;
9683        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9684            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9685        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9686
9687        // Process evolution events
9688        let mut proc_gen =
9689            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9690                seed + 100,
9691            );
9692        let process_events = proc_gen.generate_events(start_date, end_date);
9693
9694        // Organizational events
9695        let company_codes: Vec<String> = self
9696            .config
9697            .companies
9698            .iter()
9699            .map(|c| c.code.clone())
9700            .collect();
9701        let mut org_gen =
9702            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9703                seed + 101,
9704            );
9705        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9706
9707        stats.process_evolution_event_count = process_events.len();
9708        stats.organizational_event_count = org_events.len();
9709
9710        info!(
9711            "Evolution events generated: {} process evolution, {} organizational",
9712            process_events.len(),
9713            org_events.len()
9714        );
9715        self.check_resources_with_log("post-evolution-events")?;
9716
9717        Ok((process_events, org_events))
9718    }
9719
9720    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9721    /// data recovery, and regulatory changes).
9722    fn phase_disruption_events(
9723        &self,
9724        stats: &mut EnhancedGenerationStatistics,
9725    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9726        if !self.config.organizational_events.enabled {
9727            debug!("Phase 24b: Skipped (organizational events disabled)");
9728            return Ok(Vec::new());
9729        }
9730        info!("Phase 24b: Generating Disruption Events");
9731
9732        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9733            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9734        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9735
9736        let company_codes: Vec<String> = self
9737            .config
9738            .companies
9739            .iter()
9740            .map(|c| c.code.clone())
9741            .collect();
9742
9743        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9744        let events = gen.generate(start_date, end_date, &company_codes);
9745
9746        stats.disruption_event_count = events.len();
9747        info!("Disruption events generated: {} events", events.len());
9748        self.check_resources_with_log("post-disruption-events")?;
9749
9750        Ok(events)
9751    }
9752
9753    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9754    ///
9755    /// Produces paired examples where each pair contains the original clean JE
9756    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9757    /// split transaction). Useful for training anomaly detection models with
9758    /// known ground truth.
9759    fn phase_counterfactuals(
9760        &self,
9761        journal_entries: &[JournalEntry],
9762        stats: &mut EnhancedGenerationStatistics,
9763    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9764        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9765            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9766            return Ok(Vec::new());
9767        }
9768        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9769
9770        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9771
9772        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9773
9774        // Rotating set of specs to produce diverse mutation types
9775        let specs = [
9776            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9777            CounterfactualSpec::ShiftDate { days: -14 },
9778            CounterfactualSpec::SelfApprove,
9779            CounterfactualSpec::SplitTransaction { split_count: 3 },
9780        ];
9781
9782        let pairs: Vec<_> = journal_entries
9783            .iter()
9784            .enumerate()
9785            .map(|(i, je)| {
9786                let spec = &specs[i % specs.len()];
9787                gen.generate(je, spec)
9788            })
9789            .collect();
9790
9791        stats.counterfactual_pair_count = pairs.len();
9792        info!(
9793            "Counterfactual pairs generated: {} pairs from {} journal entries",
9794            pairs.len(),
9795            journal_entries.len()
9796        );
9797        self.check_resources_with_log("post-counterfactuals")?;
9798
9799        Ok(pairs)
9800    }
9801
9802    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9803    ///
9804    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9805    /// fraudulent, then generates probabilistic red flags on all chain documents.
9806    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9807    /// to produce realistic ML training data.
9808    fn phase_red_flags(
9809        &self,
9810        anomaly_labels: &AnomalyLabels,
9811        document_flows: &DocumentFlowSnapshot,
9812        stats: &mut EnhancedGenerationStatistics,
9813    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9814        if !self.config.fraud.enabled {
9815            debug!("Phase 26: Skipped (fraud generation disabled)");
9816            return Ok(Vec::new());
9817        }
9818        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9819
9820        use datasynth_generators::fraud::RedFlagGenerator;
9821
9822        let generator = RedFlagGenerator::new();
9823        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9824
9825        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9826        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9827            .labels
9828            .iter()
9829            .filter(|label| label.anomaly_type.is_intentional())
9830            .map(|label| label.document_id.as_str())
9831            .collect();
9832
9833        let mut flags = Vec::new();
9834
9835        // Iterate P2P chains: use the purchase order document ID as the chain key.
9836        for chain in &document_flows.p2p_chains {
9837            let doc_id = &chain.purchase_order.header.document_id;
9838            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9839            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9840        }
9841
9842        // Iterate O2C chains: use the sales order document ID as the chain key.
9843        for chain in &document_flows.o2c_chains {
9844            let doc_id = &chain.sales_order.header.document_id;
9845            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9846            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9847        }
9848
9849        stats.red_flag_count = flags.len();
9850        info!(
9851            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9852            flags.len(),
9853            document_flows.p2p_chains.len(),
9854            document_flows.o2c_chains.len(),
9855            fraud_doc_ids.len()
9856        );
9857        self.check_resources_with_log("post-red-flags")?;
9858
9859        Ok(flags)
9860    }
9861
9862    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9863    ///
9864    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9865    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9866    /// advance them over the simulation period.
9867    fn phase_collusion_rings(
9868        &mut self,
9869        stats: &mut EnhancedGenerationStatistics,
9870    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9871        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9872            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9873            return Ok(Vec::new());
9874        }
9875        info!("Phase 26b: Generating Collusion Rings");
9876
9877        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9878            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9879        let months = self.config.global.period_months;
9880
9881        let employee_ids: Vec<String> = self
9882            .master_data
9883            .employees
9884            .iter()
9885            .map(|e| e.employee_id.clone())
9886            .collect();
9887        let vendor_ids: Vec<String> = self
9888            .master_data
9889            .vendors
9890            .iter()
9891            .map(|v| v.vendor_id.clone())
9892            .collect();
9893
9894        let mut generator =
9895            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9896        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9897
9898        stats.collusion_ring_count = rings.len();
9899        info!(
9900            "Collusion rings generated: {} rings, total members: {}",
9901            rings.len(),
9902            rings
9903                .iter()
9904                .map(datasynth_generators::fraud::CollusionRing::size)
9905                .sum::<usize>()
9906        );
9907        self.check_resources_with_log("post-collusion-rings")?;
9908
9909        Ok(rings)
9910    }
9911
9912    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9913    ///
9914    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9915    /// master data changes over time, supporting bi-temporal audit queries.
9916    fn phase_temporal_attributes(
9917        &mut self,
9918        stats: &mut EnhancedGenerationStatistics,
9919    ) -> SynthResult<
9920        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9921    > {
9922        if !self.config.temporal_attributes.enabled {
9923            debug!("Phase 27: Skipped (temporal attributes disabled)");
9924            return Ok(Vec::new());
9925        }
9926        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9927
9928        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9929            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9930
9931        // Build a TemporalAttributeConfig from the user's config.
9932        // Since Phase 27 is already gated on temporal_attributes.enabled,
9933        // default to enabling version chains so users get actual mutations.
9934        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9935            || self.config.temporal_attributes.enabled;
9936        let temporal_config = {
9937            let ta = &self.config.temporal_attributes;
9938            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9939                .enabled(ta.enabled)
9940                .closed_probability(ta.valid_time.closed_probability)
9941                .avg_validity_days(ta.valid_time.avg_validity_days)
9942                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9943                .with_version_chains(if generate_version_chains {
9944                    ta.avg_versions_per_entity
9945                } else {
9946                    1.0
9947                })
9948                .build()
9949        };
9950        // Apply backdating settings if configured
9951        let temporal_config = if self
9952            .config
9953            .temporal_attributes
9954            .transaction_time
9955            .allow_backdating
9956        {
9957            let mut c = temporal_config;
9958            c.transaction_time.allow_backdating = true;
9959            c.transaction_time.backdating_probability = self
9960                .config
9961                .temporal_attributes
9962                .transaction_time
9963                .backdating_probability;
9964            c.transaction_time.max_backdate_days = self
9965                .config
9966                .temporal_attributes
9967                .transaction_time
9968                .max_backdate_days;
9969            c
9970        } else {
9971            temporal_config
9972        };
9973        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9974            temporal_config,
9975            self.seed + 130,
9976            start_date,
9977        );
9978
9979        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9980            self.seed + 130,
9981            datasynth_core::GeneratorType::Vendor,
9982        );
9983
9984        let chains: Vec<_> = self
9985            .master_data
9986            .vendors
9987            .iter()
9988            .map(|vendor| {
9989                let id = uuid_factory.next();
9990                gen.generate_version_chain(vendor.clone(), id)
9991            })
9992            .collect();
9993
9994        stats.temporal_version_chain_count = chains.len();
9995        info!("Temporal version chains generated: {} chains", chains.len());
9996        self.check_resources_with_log("post-temporal-attributes")?;
9997
9998        Ok(chains)
9999    }
10000
10001    /// Phase 28: Build entity relationship graph and cross-process links.
10002    ///
10003    /// Part 1 (gated on `relationship_strength.enabled`): builds an
10004    /// `EntityGraph` from master-data vendor/customer entities and
10005    /// journal-entry-derived transaction summaries.
10006    ///
10007    /// Part 2 (gated on `cross_process_links.enabled`): extracts
10008    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
10009    /// generates inventory-movement cross-process links.
10010    fn phase_entity_relationships(
10011        &self,
10012        journal_entries: &[JournalEntry],
10013        document_flows: &DocumentFlowSnapshot,
10014        stats: &mut EnhancedGenerationStatistics,
10015    ) -> SynthResult<(
10016        Option<datasynth_core::models::EntityGraph>,
10017        Vec<datasynth_core::models::CrossProcessLink>,
10018    )> {
10019        use datasynth_generators::relationships::{
10020            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10021            TransactionSummary,
10022        };
10023
10024        let rs_enabled = self.config.relationship_strength.enabled;
10025        let cpl_enabled = self.config.cross_process_links.enabled
10026            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10027
10028        if !rs_enabled && !cpl_enabled {
10029            debug!(
10030                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10031            );
10032            return Ok((None, Vec::new()));
10033        }
10034
10035        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10036
10037        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10038            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10039
10040        let company_code = self
10041            .config
10042            .companies
10043            .first()
10044            .map(|c| c.code.as_str())
10045            .unwrap_or("1000");
10046
10047        // Build the generator with matching config flags
10048        let gen_config = EntityGraphConfig {
10049            enabled: rs_enabled,
10050            cross_process: datasynth_generators::relationships::CrossProcessConfig {
10051                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10052                enable_return_flows: false,
10053                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10054                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10055                // Use higher link rate for small datasets to avoid probabilistic empty results
10056                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10057                    1.0
10058                } else {
10059                    0.30
10060                },
10061                ..Default::default()
10062            },
10063            strength_config: datasynth_generators::relationships::StrengthConfig {
10064                transaction_volume_weight: self
10065                    .config
10066                    .relationship_strength
10067                    .calculation
10068                    .transaction_volume_weight,
10069                transaction_count_weight: self
10070                    .config
10071                    .relationship_strength
10072                    .calculation
10073                    .transaction_count_weight,
10074                duration_weight: self
10075                    .config
10076                    .relationship_strength
10077                    .calculation
10078                    .relationship_duration_weight,
10079                recency_weight: self.config.relationship_strength.calculation.recency_weight,
10080                mutual_connections_weight: self
10081                    .config
10082                    .relationship_strength
10083                    .calculation
10084                    .mutual_connections_weight,
10085                recency_half_life_days: self
10086                    .config
10087                    .relationship_strength
10088                    .calculation
10089                    .recency_half_life_days,
10090            },
10091            ..Default::default()
10092        };
10093
10094        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10095
10096        // --- Part 1: Entity Relationship Graph ---
10097        let entity_graph = if rs_enabled {
10098            // Build EntitySummary lists from master data
10099            let vendor_summaries: Vec<EntitySummary> = self
10100                .master_data
10101                .vendors
10102                .iter()
10103                .map(|v| {
10104                    EntitySummary::new(
10105                        &v.vendor_id,
10106                        &v.name,
10107                        datasynth_core::models::GraphEntityType::Vendor,
10108                        start_date,
10109                    )
10110                })
10111                .collect();
10112
10113            let customer_summaries: Vec<EntitySummary> = self
10114                .master_data
10115                .customers
10116                .iter()
10117                .map(|c| {
10118                    EntitySummary::new(
10119                        &c.customer_id,
10120                        &c.name,
10121                        datasynth_core::models::GraphEntityType::Customer,
10122                        start_date,
10123                    )
10124                })
10125                .collect();
10126
10127            // Build transaction summaries from journal entries.
10128            // Key = (company_code, trading_partner) for entries that have a
10129            // trading partner.  This captures intercompany flows and any JE
10130            // whose line items carry a trading_partner reference.
10131            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10132                std::collections::HashMap::new();
10133
10134            for je in journal_entries {
10135                let cc = je.header.company_code.clone();
10136                let posting_date = je.header.posting_date;
10137                for line in &je.lines {
10138                    if let Some(ref tp) = line.trading_partner {
10139                        let amount = if line.debit_amount > line.credit_amount {
10140                            line.debit_amount
10141                        } else {
10142                            line.credit_amount
10143                        };
10144                        let entry = txn_summaries
10145                            .entry((cc.clone(), tp.clone()))
10146                            .or_insert_with(|| TransactionSummary {
10147                                total_volume: rust_decimal::Decimal::ZERO,
10148                                transaction_count: 0,
10149                                first_transaction_date: posting_date,
10150                                last_transaction_date: posting_date,
10151                                related_entities: std::collections::HashSet::new(),
10152                            });
10153                        entry.total_volume += amount;
10154                        entry.transaction_count += 1;
10155                        if posting_date < entry.first_transaction_date {
10156                            entry.first_transaction_date = posting_date;
10157                        }
10158                        if posting_date > entry.last_transaction_date {
10159                            entry.last_transaction_date = posting_date;
10160                        }
10161                        entry.related_entities.insert(cc.clone());
10162                    }
10163                }
10164            }
10165
10166            // Also extract transaction relationships from document flow chains.
10167            // P2P chains: Company → Vendor relationships
10168            for chain in &document_flows.p2p_chains {
10169                let cc = chain.purchase_order.header.company_code.clone();
10170                let vendor_id = chain.purchase_order.vendor_id.clone();
10171                let po_date = chain.purchase_order.header.document_date;
10172                let amount = chain.purchase_order.total_net_amount;
10173
10174                let entry = txn_summaries
10175                    .entry((cc.clone(), vendor_id))
10176                    .or_insert_with(|| TransactionSummary {
10177                        total_volume: rust_decimal::Decimal::ZERO,
10178                        transaction_count: 0,
10179                        first_transaction_date: po_date,
10180                        last_transaction_date: po_date,
10181                        related_entities: std::collections::HashSet::new(),
10182                    });
10183                entry.total_volume += amount;
10184                entry.transaction_count += 1;
10185                if po_date < entry.first_transaction_date {
10186                    entry.first_transaction_date = po_date;
10187                }
10188                if po_date > entry.last_transaction_date {
10189                    entry.last_transaction_date = po_date;
10190                }
10191                entry.related_entities.insert(cc);
10192            }
10193
10194            // O2C chains: Company → Customer relationships
10195            for chain in &document_flows.o2c_chains {
10196                let cc = chain.sales_order.header.company_code.clone();
10197                let customer_id = chain.sales_order.customer_id.clone();
10198                let so_date = chain.sales_order.header.document_date;
10199                let amount = chain.sales_order.total_net_amount;
10200
10201                let entry = txn_summaries
10202                    .entry((cc.clone(), customer_id))
10203                    .or_insert_with(|| TransactionSummary {
10204                        total_volume: rust_decimal::Decimal::ZERO,
10205                        transaction_count: 0,
10206                        first_transaction_date: so_date,
10207                        last_transaction_date: so_date,
10208                        related_entities: std::collections::HashSet::new(),
10209                    });
10210                entry.total_volume += amount;
10211                entry.transaction_count += 1;
10212                if so_date < entry.first_transaction_date {
10213                    entry.first_transaction_date = so_date;
10214                }
10215                if so_date > entry.last_transaction_date {
10216                    entry.last_transaction_date = so_date;
10217                }
10218                entry.related_entities.insert(cc);
10219            }
10220
10221            let as_of_date = journal_entries
10222                .last()
10223                .map(|je| je.header.posting_date)
10224                .unwrap_or(start_date);
10225
10226            let graph = gen.generate_entity_graph(
10227                company_code,
10228                as_of_date,
10229                &vendor_summaries,
10230                &customer_summaries,
10231                &txn_summaries,
10232            );
10233
10234            info!(
10235                "Entity relationship graph: {} nodes, {} edges",
10236                graph.nodes.len(),
10237                graph.edges.len()
10238            );
10239            stats.entity_relationship_node_count = graph.nodes.len();
10240            stats.entity_relationship_edge_count = graph.edges.len();
10241            Some(graph)
10242        } else {
10243            None
10244        };
10245
10246        // --- Part 2: Cross-Process Links ---
10247        let cross_process_links = if cpl_enabled {
10248            // Build GoodsReceiptRef from P2P chains
10249            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10250                .p2p_chains
10251                .iter()
10252                .flat_map(|chain| {
10253                    let vendor_id = chain.purchase_order.vendor_id.clone();
10254                    let cc = chain.purchase_order.header.company_code.clone();
10255                    chain.goods_receipts.iter().flat_map(move |gr| {
10256                        gr.items.iter().filter_map({
10257                            let doc_id = gr.header.document_id.clone();
10258                            let v_id = vendor_id.clone();
10259                            let company = cc.clone();
10260                            let receipt_date = gr.header.document_date;
10261                            move |item| {
10262                                item.base
10263                                    .material_id
10264                                    .as_ref()
10265                                    .map(|mat_id| GoodsReceiptRef {
10266                                        document_id: doc_id.clone(),
10267                                        material_id: mat_id.clone(),
10268                                        quantity: item.base.quantity,
10269                                        receipt_date,
10270                                        vendor_id: v_id.clone(),
10271                                        company_code: company.clone(),
10272                                    })
10273                            }
10274                        })
10275                    })
10276                })
10277                .collect();
10278
10279            // Build DeliveryRef from O2C chains
10280            let del_refs: Vec<DeliveryRef> = document_flows
10281                .o2c_chains
10282                .iter()
10283                .flat_map(|chain| {
10284                    let customer_id = chain.sales_order.customer_id.clone();
10285                    let cc = chain.sales_order.header.company_code.clone();
10286                    chain.deliveries.iter().flat_map(move |del| {
10287                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10288                        del.items.iter().filter_map({
10289                            let doc_id = del.header.document_id.clone();
10290                            let c_id = customer_id.clone();
10291                            let company = cc.clone();
10292                            move |item| {
10293                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10294                                    document_id: doc_id.clone(),
10295                                    material_id: mat_id.clone(),
10296                                    quantity: item.base.quantity,
10297                                    delivery_date,
10298                                    customer_id: c_id.clone(),
10299                                    company_code: company.clone(),
10300                                })
10301                            }
10302                        })
10303                    })
10304                })
10305                .collect();
10306
10307            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10308            info!("Cross-process links generated: {} links", links.len());
10309            stats.cross_process_link_count = links.len();
10310            links
10311        } else {
10312            Vec::new()
10313        };
10314
10315        self.check_resources_with_log("post-entity-relationships")?;
10316        Ok((entity_graph, cross_process_links))
10317    }
10318
10319    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10320    fn phase_industry_data(
10321        &self,
10322        stats: &mut EnhancedGenerationStatistics,
10323    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10324        if !self.config.industry_specific.enabled {
10325            return None;
10326        }
10327        info!("Phase 29: Generating industry-specific data");
10328        let output = datasynth_generators::industry::factory::generate_industry_output(
10329            self.config.global.industry,
10330        );
10331        stats.industry_gl_account_count = output.gl_accounts.len();
10332        info!(
10333            "Industry data generated: {} GL accounts for {:?}",
10334            output.gl_accounts.len(),
10335            self.config.global.industry
10336        );
10337        Some(output)
10338    }
10339
10340    /// Phase 3b: Generate opening balances for each company.
10341    fn phase_opening_balances(
10342        &mut self,
10343        coa: &Arc<ChartOfAccounts>,
10344        stats: &mut EnhancedGenerationStatistics,
10345    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10346        if !self.config.balance.generate_opening_balances {
10347            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10348            return Ok(Vec::new());
10349        }
10350        info!("Phase 3b: Generating Opening Balances");
10351
10352        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354        let fiscal_year = start_date.year();
10355
10356        // **v5.3** — When the shard context supplies prior-period
10357        // opening-balance carryovers, use them directly instead of
10358        // calling `OpeningBalanceGenerator`.  This implements multi-
10359        // period continuity: period N+1 opens with period N's closing
10360        // BS positions exactly, rather than re-rolling the industry-
10361        // mix generator and losing the audit trail.
10362        //
10363        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10364        // through to the generator path — byte-identical behaviour
10365        // for single-period engagements.
10366        if let Some(ctx) = &self.shard_context {
10367            if !ctx.opening_balances.is_empty() {
10368                debug!(
10369                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10370                    ctx.opening_balances.len()
10371                );
10372                let mut results = Vec::new();
10373                for company in &self.config.companies {
10374                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10375                        .opening_balances
10376                        .iter()
10377                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10378                        .collect();
10379                    let total_assets = ctx
10380                        .opening_balances
10381                        .iter()
10382                        .filter(|ob| {
10383                            matches!(
10384                                ob.account_type,
10385                                AccountType::Asset | AccountType::ContraAsset
10386                            )
10387                        })
10388                        .map(|ob| ob.net_balance())
10389                        .sum::<rust_decimal::Decimal>();
10390                    let total_liabilities = ctx
10391                        .opening_balances
10392                        .iter()
10393                        .filter(|ob| {
10394                            matches!(
10395                                ob.account_type,
10396                                AccountType::Liability | AccountType::ContraLiability
10397                            )
10398                        })
10399                        .map(|ob| ob.net_balance())
10400                        .sum::<rust_decimal::Decimal>();
10401                    let total_equity = ctx
10402                        .opening_balances
10403                        .iter()
10404                        .filter(|ob| {
10405                            matches!(
10406                                ob.account_type,
10407                                AccountType::Equity | AccountType::ContraEquity
10408                            )
10409                        })
10410                        .map(|ob| ob.net_balance())
10411                        .sum::<rust_decimal::Decimal>();
10412                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10413                        < rust_decimal::Decimal::ONE;
10414                    results.push(GeneratedOpeningBalance {
10415                        company_code: company.code.clone(),
10416                        as_of_date: start_date,
10417                        balances,
10418                        total_assets,
10419                        total_liabilities,
10420                        total_equity,
10421                        is_balanced,
10422                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10423                            current_ratio: None,
10424                            quick_ratio: None,
10425                            debt_to_equity: None,
10426                            working_capital: rust_decimal::Decimal::ZERO,
10427                        },
10428                    });
10429                }
10430                stats.opening_balance_count = results.len();
10431                info!(
10432                    "Phase 3b: opening-balance carryover applied ({} companies)",
10433                    results.len()
10434                );
10435                self.check_resources_with_log("post-opening-balances")?;
10436                return Ok(results);
10437            }
10438        }
10439
10440        let industry = match self.config.global.industry {
10441            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10442            IndustrySector::Retail => IndustryType::Retail,
10443            IndustrySector::FinancialServices => IndustryType::Financial,
10444            IndustrySector::Healthcare => IndustryType::Healthcare,
10445            IndustrySector::Technology => IndustryType::Technology,
10446            _ => IndustryType::Manufacturing,
10447        };
10448
10449        let config = datasynth_generators::OpeningBalanceConfig {
10450            industry,
10451            ..Default::default()
10452        };
10453        let mut gen =
10454            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10455
10456        let mut results = Vec::new();
10457        for company in &self.config.companies {
10458            let spec = OpeningBalanceSpec::new(
10459                company.code.clone(),
10460                start_date,
10461                fiscal_year,
10462                company.currency.clone(),
10463                rust_decimal::Decimal::new(10_000_000, 0),
10464                industry,
10465            );
10466            let ob = gen.generate(&spec, coa, start_date, &company.code);
10467            results.push(ob);
10468        }
10469
10470        stats.opening_balance_count = results.len();
10471        info!("Opening balances generated: {} companies", results.len());
10472        self.check_resources_with_log("post-opening-balances")?;
10473
10474        Ok(results)
10475    }
10476
10477    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10478    fn phase_subledger_reconciliation(
10479        &mut self,
10480        subledger: &SubledgerSnapshot,
10481        entries: &[JournalEntry],
10482        stats: &mut EnhancedGenerationStatistics,
10483    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10484        if !self.config.balance.reconcile_subledgers {
10485            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10486            return Ok(Vec::new());
10487        }
10488        info!("Phase 9b: Reconciling GL to subledger balances");
10489
10490        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10491            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10492            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10493
10494        // Build GL balance map from journal entries using a balance tracker
10495        let tracker_config = BalanceTrackerConfig {
10496            validate_on_each_entry: false,
10497            track_history: false,
10498            fail_on_validation_error: false,
10499            ..Default::default()
10500        };
10501        let recon_currency = self
10502            .config
10503            .companies
10504            .first()
10505            .map(|c| c.currency.clone())
10506            .unwrap_or_else(|| "USD".to_string());
10507        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10508        let validation_errors = tracker.apply_entries(entries);
10509        if !validation_errors.is_empty() {
10510            warn!(
10511                error_count = validation_errors.len(),
10512                "Balance tracker encountered validation errors during subledger reconciliation"
10513            );
10514            for err in &validation_errors {
10515                debug!("Balance validation error: {:?}", err);
10516            }
10517        }
10518
10519        let mut engine = datasynth_generators::ReconciliationEngine::new(
10520            datasynth_generators::ReconciliationConfig::default(),
10521        );
10522
10523        let mut results = Vec::new();
10524        let company_code = self
10525            .config
10526            .companies
10527            .first()
10528            .map(|c| c.code.as_str())
10529            .unwrap_or("1000");
10530
10531        // Reconcile AR
10532        if !subledger.ar_invoices.is_empty() {
10533            let gl_balance = tracker
10534                .get_account_balance(
10535                    company_code,
10536                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10537                )
10538                .map(|b| b.closing_balance)
10539                .unwrap_or_default();
10540            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10541            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10542        }
10543
10544        // Reconcile AP
10545        if !subledger.ap_invoices.is_empty() {
10546            let gl_balance = tracker
10547                .get_account_balance(
10548                    company_code,
10549                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10550                )
10551                .map(|b| b.closing_balance)
10552                .unwrap_or_default();
10553            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10554            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10555        }
10556
10557        // Reconcile FA
10558        if !subledger.fa_records.is_empty() {
10559            let gl_asset_balance = tracker
10560                .get_account_balance(
10561                    company_code,
10562                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10563                )
10564                .map(|b| b.closing_balance)
10565                .unwrap_or_default();
10566            let gl_accum_depr_balance = tracker
10567                .get_account_balance(
10568                    company_code,
10569                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10570                )
10571                .map(|b| b.closing_balance)
10572                .unwrap_or_default();
10573            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10574                subledger.fa_records.iter().collect();
10575            let (asset_recon, depr_recon) = engine.reconcile_fa(
10576                company_code,
10577                end_date,
10578                gl_asset_balance,
10579                gl_accum_depr_balance,
10580                &fa_refs,
10581            );
10582            results.push(asset_recon);
10583            results.push(depr_recon);
10584        }
10585
10586        // Reconcile Inventory
10587        if !subledger.inventory_positions.is_empty() {
10588            let gl_balance = tracker
10589                .get_account_balance(
10590                    company_code,
10591                    datasynth_core::accounts::control_accounts::INVENTORY,
10592                )
10593                .map(|b| b.closing_balance)
10594                .unwrap_or_default();
10595            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10596                subledger.inventory_positions.iter().collect();
10597            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10598        }
10599
10600        stats.subledger_reconciliation_count = results.len();
10601        let passed = results.iter().filter(|r| r.is_balanced()).count();
10602        let failed = results.len() - passed;
10603        info!(
10604            "Subledger reconciliation: {} checks, {} passed, {} failed",
10605            results.len(),
10606            passed,
10607            failed
10608        );
10609        self.check_resources_with_log("post-subledger-reconciliation")?;
10610
10611        Ok(results)
10612    }
10613
10614    /// Generate the chart of accounts.
10615    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10616        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10617
10618        let coa_framework = self.resolve_coa_framework();
10619
10620        let mut gen = ChartOfAccountsGenerator::new(
10621            self.config.chart_of_accounts.complexity,
10622            self.config.global.industry,
10623            self.seed,
10624        )
10625        .with_coa_framework(coa_framework)
10626        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10627        .with_expand_industry_subaccounts(
10628            self.config.chart_of_accounts.expand_industry_subaccounts,
10629        );
10630
10631        let mut built = gen.generate();
10632        // v4.4.1: propagate the accounting framework label from config
10633        // onto the CoA struct so SDK consumers can read it without
10634        // cross-referencing the config (they previously saw null).
10635        if self.config.accounting_standards.enabled {
10636            use datasynth_config::schema::AccountingFrameworkConfig;
10637            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10638                match f {
10639                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10640                    AccountingFrameworkConfig::Ifrs => "ifrs",
10641                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10642                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10643                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10644                }
10645                .to_string()
10646            });
10647        }
10648        // SP4.2 W8.2 + W7.1 — remap synthetic account numbers to corpus
10649        // ones first (W8.2), then enrich descriptions via the overlay (W7.1).
10650        // Applied before Arc::new so we only build one Arc (no clone needed).
10651        if let Some(ref cached) = self.cached_priors {
10652            if let Some(ref coa_prior) = cached.coa_semantic {
10653                use datasynth_generators::coa_generator::{
10654                    remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10655                };
10656                // W8.2 — replace synthetic account numbers with corpus
10657                // ones so the W7.1 overlay fires at ~80% instead of ~16%.
10658                let mut rng =
10659                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10660                let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10661                tracing::info!(
10662                    target: "datasynth_runtime::coa",
10663                    remapped,
10664                    total = built.accounts.len(),
10665                    "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10666                );
10667                // W7.1 — now overlay descriptions / class metadata for the
10668                // (now mostly corpus-numbered) accounts.
10669                let applied =
10670                    ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10671                tracing::info!(
10672                    target: "datasynth_runtime::coa",
10673                    applied,
10674                    total = built.accounts.len(),
10675                    "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10676                );
10677            }
10678            // SP6 — taxonomy overlay: run AFTER the semantic overlay so
10679            // taxonomy-templated accounts take precedence over verbatim
10680            // semantic descriptions.  Uses SyntheticExampleResolver because
10681            // the CoA is built before master-data pools are populated (so
10682            // vendor/customer names are not yet available).
10683            if let Some(tx) = cached.text_taxonomy.as_ref() {
10684                use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
10685                use datasynth_generators::coa_generator::overlay_coa_taxonomy;
10686                let mut resolver = SyntheticExampleResolver;
10687                let mut rng =
10688                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
10689                overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
10690                tracing::info!(
10691                    target: "datasynth_runtime::coa",
10692                    total = built.accounts.len(),
10693                    "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
10694                );
10695            }
10696        }
10697
10698        let coa = Arc::new(built);
10699        self.coa = Some(Arc::clone(&coa));
10700
10701        if let Some(pb) = pb {
10702            pb.finish_with_message("Chart of Accounts complete");
10703        }
10704
10705        Ok(coa)
10706    }
10707
10708    /// Generate master data entities.
10709    fn generate_master_data(&mut self) -> SynthResult<()> {
10710        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10711            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10712        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10713
10714        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10715        let pb = self.create_progress_bar(total, "Generating Master Data");
10716
10717        // Resolve country pack once for all companies (uses primary company's country)
10718        let pack = self.primary_pack().clone();
10719
10720        // Capture config values needed inside the parallel closure
10721        let vendors_per_company = self.phase_config.vendors_per_company;
10722        let customers_per_company = self.phase_config.customers_per_company;
10723        let materials_per_company = self.phase_config.materials_per_company;
10724        let assets_per_company = self.phase_config.assets_per_company;
10725        let coa_framework = self.resolve_coa_framework();
10726
10727        // Generate all master data in parallel across companies.
10728        // Each company's data is independent, making this embarrassingly parallel.
10729        let per_company_results: Vec<_> = self
10730            .config
10731            .companies
10732            .par_iter()
10733            .enumerate()
10734            .map(|(i, company)| {
10735                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10736                let pack = pack.clone();
10737
10738                // Generate vendors (offset counter so IDs are globally unique across companies)
10739                let mut vendor_gen = VendorGenerator::new(company_seed);
10740                vendor_gen.set_country_pack(pack.clone());
10741                vendor_gen.set_coa_framework(coa_framework);
10742                vendor_gen.set_counter_offset(i * vendors_per_company);
10743                // v3.2.0+: user-supplied bank names (and future template
10744                // strings) flow through the shared provider.
10745                vendor_gen.set_template_provider(self.template_provider.clone());
10746                // Wire vendor network config when enabled
10747                if self.config.vendor_network.enabled {
10748                    let vn = &self.config.vendor_network;
10749                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10750                        enabled: true,
10751                        depth: vn.depth,
10752                        tier1_count: datasynth_generators::TierCountConfig::new(
10753                            vn.tier1.min,
10754                            vn.tier1.max,
10755                        ),
10756                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10757                            vn.tier2_per_parent.min,
10758                            vn.tier2_per_parent.max,
10759                        ),
10760                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10761                            vn.tier3_per_parent.min,
10762                            vn.tier3_per_parent.max,
10763                        ),
10764                        cluster_distribution: datasynth_generators::ClusterDistribution {
10765                            reliable_strategic: vn.clusters.reliable_strategic,
10766                            standard_operational: vn.clusters.standard_operational,
10767                            transactional: vn.clusters.transactional,
10768                            problematic: vn.clusters.problematic,
10769                        },
10770                        concentration_limits: datasynth_generators::ConcentrationLimits {
10771                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10772                            max_top5: vn.dependencies.top_5_concentration,
10773                        },
10774                        ..datasynth_generators::VendorNetworkConfig::default()
10775                    });
10776                }
10777                let vendor_pool =
10778                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10779
10780                // Generate customers (offset counter so IDs are globally unique across companies)
10781                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10782                customer_gen.set_country_pack(pack.clone());
10783                customer_gen.set_coa_framework(coa_framework);
10784                customer_gen.set_counter_offset(i * customers_per_company);
10785                // v3.2.0+: user-supplied customer names flow through the shared provider.
10786                customer_gen.set_template_provider(self.template_provider.clone());
10787                // Wire customer segmentation config when enabled
10788                if self.config.customer_segmentation.enabled {
10789                    let cs = &self.config.customer_segmentation;
10790                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10791                        enabled: true,
10792                        segment_distribution: datasynth_generators::SegmentDistribution {
10793                            enterprise: cs.value_segments.enterprise.customer_share,
10794                            mid_market: cs.value_segments.mid_market.customer_share,
10795                            smb: cs.value_segments.smb.customer_share,
10796                            consumer: cs.value_segments.consumer.customer_share,
10797                        },
10798                        referral_config: datasynth_generators::ReferralConfig {
10799                            enabled: cs.networks.referrals.enabled,
10800                            referral_rate: cs.networks.referrals.referral_rate,
10801                            ..Default::default()
10802                        },
10803                        hierarchy_config: datasynth_generators::HierarchyConfig {
10804                            enabled: cs.networks.corporate_hierarchies.enabled,
10805                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10806                            ..Default::default()
10807                        },
10808                        ..Default::default()
10809                    };
10810                    customer_gen.set_segmentation_config(seg_cfg);
10811                }
10812                let customer_pool = customer_gen.generate_customer_pool(
10813                    customers_per_company,
10814                    &company.code,
10815                    start_date,
10816                );
10817
10818                // Generate materials (offset counter so IDs are globally unique across companies)
10819                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10820                material_gen.set_country_pack(pack.clone());
10821                material_gen.set_counter_offset(i * materials_per_company);
10822                // v3.2.1+: user-supplied material descriptions flow through shared provider
10823                material_gen.set_template_provider(self.template_provider.clone());
10824                let material_pool = material_gen.generate_material_pool(
10825                    materials_per_company,
10826                    &company.code,
10827                    start_date,
10828                );
10829
10830                // Generate fixed assets
10831                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10832                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10833                asset_gen.set_template_provider(self.template_provider.clone());
10834                let asset_pool = asset_gen.generate_asset_pool(
10835                    assets_per_company,
10836                    &company.code,
10837                    (start_date, end_date),
10838                );
10839
10840                // Generate employees
10841                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10842                employee_gen.set_country_pack(pack);
10843                // v3.2.1+: user-supplied department names flow through shared provider
10844                employee_gen.set_template_provider(self.template_provider.clone());
10845                let employee_pool =
10846                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10847
10848                // Generate employee change history (2-5 events per employee)
10849                let employee_change_history =
10850                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10851
10852                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10853                let employee_ids: Vec<String> = employee_pool
10854                    .employees
10855                    .iter()
10856                    .map(|e| e.employee_id.clone())
10857                    .collect();
10858                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10859                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10860
10861                // v5.1: profit centre hierarchy (two-level: top-level
10862                // segment / region / product-group nodes + sub-units).
10863                let mut pc_gen =
10864                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10865                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10866
10867                (
10868                    vendor_pool.vendors,
10869                    customer_pool.customers,
10870                    material_pool.materials,
10871                    asset_pool.assets,
10872                    employee_pool.employees,
10873                    employee_change_history,
10874                    cost_centers,
10875                    profit_centers,
10876                )
10877            })
10878            .collect();
10879
10880        // Aggregate results from all companies
10881        for (
10882            vendors,
10883            customers,
10884            materials,
10885            assets,
10886            employees,
10887            change_history,
10888            cost_centers,
10889            profit_centers,
10890        ) in per_company_results
10891        {
10892            self.master_data.vendors.extend(vendors);
10893            self.master_data.customers.extend(customers);
10894            self.master_data.materials.extend(materials);
10895            self.master_data.assets.extend(assets);
10896            self.master_data.employees.extend(employees);
10897            self.master_data.cost_centers.extend(cost_centers);
10898            self.master_data.profit_centers.extend(profit_centers);
10899            self.master_data
10900                .employee_change_history
10901                .extend(change_history);
10902        }
10903
10904        // v3.3.0: one OrganizationalProfile per company. Cheap to
10905        // generate (derived from industry + company_code) so we
10906        // always emit when master data runs; no separate config flag.
10907        {
10908            use datasynth_core::models::IndustrySector;
10909            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10910            let industry = match self.config.global.industry {
10911                IndustrySector::Manufacturing => "manufacturing",
10912                IndustrySector::Retail => "retail",
10913                IndustrySector::FinancialServices => "financial_services",
10914                IndustrySector::Technology => "technology",
10915                IndustrySector::Healthcare => "healthcare",
10916                _ => "other",
10917            };
10918            for (i, company) in self.config.companies.iter().enumerate() {
10919                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10920                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10921                let profile = profile_gen.generate(&company.code, industry);
10922                self.master_data.organizational_profiles.push(profile);
10923            }
10924        }
10925
10926        if let Some(pb) = &pb {
10927            pb.inc(total);
10928        }
10929        if let Some(pb) = pb {
10930            pb.finish_with_message("Master data generation complete");
10931        }
10932
10933        Ok(())
10934    }
10935
10936    /// Generate document flows (P2P and O2C).
10937    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10938        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10939            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10940
10941        // Generate P2P chains
10942        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10943        let months = (self.config.global.period_months as usize).max(1);
10944        let p2p_count = self
10945            .phase_config
10946            .p2p_chains
10947            .min(self.master_data.vendors.len() * 2 * months);
10948        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10949
10950        // Convert P2P config from schema to generator config
10951        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10952        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10953        p2p_gen.set_country_pack(self.primary_pack().clone());
10954        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10955        // to business days. No-op when `temporal_patterns.business_days.
10956        // enabled = false`.
10957        if let Some(ctx) = &self.temporal_context {
10958            p2p_gen.set_temporal_context(Arc::clone(ctx));
10959        }
10960
10961        for i in 0..p2p_count {
10962            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10963            let materials: Vec<&Material> = self
10964                .master_data
10965                .materials
10966                .iter()
10967                .skip(i % self.master_data.materials.len().max(1))
10968                .take(2.min(self.master_data.materials.len()))
10969                .collect();
10970
10971            if materials.is_empty() {
10972                continue;
10973            }
10974
10975            let company = &self.config.companies[i % self.config.companies.len()];
10976            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10977            let fiscal_period = po_date.month() as u8;
10978            let created_by = if self.master_data.employees.is_empty() {
10979                "SYSTEM"
10980            } else {
10981                self.master_data.employees[i % self.master_data.employees.len()]
10982                    .user_id
10983                    .as_str()
10984            };
10985
10986            let chain = p2p_gen.generate_chain(
10987                &company.code,
10988                vendor,
10989                &materials,
10990                po_date,
10991                start_date.year() as u16,
10992                fiscal_period,
10993                created_by,
10994            );
10995
10996            // Flatten documents
10997            flows.purchase_orders.push(chain.purchase_order.clone());
10998            flows.goods_receipts.extend(chain.goods_receipts.clone());
10999            if let Some(vi) = &chain.vendor_invoice {
11000                flows.vendor_invoices.push(vi.clone());
11001            }
11002            if let Some(payment) = &chain.payment {
11003                flows.payments.push(payment.clone());
11004            }
11005            for remainder in &chain.remainder_payments {
11006                flows.payments.push(remainder.clone());
11007            }
11008            flows.p2p_chains.push(chain);
11009
11010            if let Some(pb) = &pb {
11011                pb.inc(1);
11012            }
11013        }
11014
11015        if let Some(pb) = pb {
11016            pb.finish_with_message("P2P document flows complete");
11017        }
11018
11019        // Generate O2C chains
11020        // Cap at ~2 SOs per customer per month to keep order volume realistic
11021        let o2c_count = self
11022            .phase_config
11023            .o2c_chains
11024            .min(self.master_data.customers.len() * 2 * months);
11025        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11026
11027        // Convert O2C config from schema to generator config
11028        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11029        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11030        o2c_gen.set_country_pack(self.primary_pack().clone());
11031        // v3.4.1: wire temporal context (no-op when business_days disabled).
11032        if let Some(ctx) = &self.temporal_context {
11033            o2c_gen.set_temporal_context(Arc::clone(ctx));
11034        }
11035
11036        for i in 0..o2c_count {
11037            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11038            let materials: Vec<&Material> = self
11039                .master_data
11040                .materials
11041                .iter()
11042                .skip(i % self.master_data.materials.len().max(1))
11043                .take(2.min(self.master_data.materials.len()))
11044                .collect();
11045
11046            if materials.is_empty() {
11047                continue;
11048            }
11049
11050            let company = &self.config.companies[i % self.config.companies.len()];
11051            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11052            let fiscal_period = so_date.month() as u8;
11053            let created_by = if self.master_data.employees.is_empty() {
11054                "SYSTEM"
11055            } else {
11056                self.master_data.employees[i % self.master_data.employees.len()]
11057                    .user_id
11058                    .as_str()
11059            };
11060
11061            let chain = o2c_gen.generate_chain(
11062                &company.code,
11063                customer,
11064                &materials,
11065                so_date,
11066                start_date.year() as u16,
11067                fiscal_period,
11068                created_by,
11069            );
11070
11071            // Flatten documents
11072            flows.sales_orders.push(chain.sales_order.clone());
11073            flows.deliveries.extend(chain.deliveries.clone());
11074            if let Some(ci) = &chain.customer_invoice {
11075                flows.customer_invoices.push(ci.clone());
11076            }
11077            if let Some(receipt) = &chain.customer_receipt {
11078                flows.payments.push(receipt.clone());
11079            }
11080            // Extract remainder receipts (follow-up to partial payments)
11081            for receipt in &chain.remainder_receipts {
11082                flows.payments.push(receipt.clone());
11083            }
11084            flows.o2c_chains.push(chain);
11085
11086            if let Some(pb) = &pb {
11087                pb.inc(1);
11088            }
11089        }
11090
11091        if let Some(pb) = pb {
11092            pb.finish_with_message("O2C document flows complete");
11093        }
11094
11095        // Collect all document cross-references from document headers.
11096        // Each document embeds references to its predecessor(s) via add_reference(); here we
11097        // denormalise them into a flat list for the document_references.json output file.
11098        {
11099            let mut refs = Vec::new();
11100            for doc in &flows.purchase_orders {
11101                refs.extend(doc.header.document_references.iter().cloned());
11102            }
11103            for doc in &flows.goods_receipts {
11104                refs.extend(doc.header.document_references.iter().cloned());
11105            }
11106            for doc in &flows.vendor_invoices {
11107                refs.extend(doc.header.document_references.iter().cloned());
11108            }
11109            for doc in &flows.sales_orders {
11110                refs.extend(doc.header.document_references.iter().cloned());
11111            }
11112            for doc in &flows.deliveries {
11113                refs.extend(doc.header.document_references.iter().cloned());
11114            }
11115            for doc in &flows.customer_invoices {
11116                refs.extend(doc.header.document_references.iter().cloned());
11117            }
11118            for doc in &flows.payments {
11119                refs.extend(doc.header.document_references.iter().cloned());
11120            }
11121            debug!(
11122                "Collected {} document cross-references from document headers",
11123                refs.len()
11124            );
11125            flows.document_references = refs;
11126        }
11127
11128        Ok(())
11129    }
11130
11131    /// Generate journal entries using parallel generation across multiple cores.
11132    fn generate_journal_entries(
11133        &mut self,
11134        coa: &Arc<ChartOfAccounts>,
11135    ) -> SynthResult<Vec<JournalEntry>> {
11136        use datasynth_core::traits::ParallelGenerator;
11137
11138        let total = self.calculate_total_transactions();
11139        let pb = self.create_progress_bar(total, "Generating Journal Entries");
11140
11141        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11142            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11143        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11144
11145        let company_codes: Vec<String> = self
11146            .config
11147            .companies
11148            .iter()
11149            .map(|c| c.code.clone())
11150            .collect();
11151
11152        let mut generator = JournalEntryGenerator::new_with_params(
11153            self.config.transactions.clone(),
11154            Arc::clone(coa),
11155            company_codes,
11156            start_date,
11157            end_date,
11158            self.seed,
11159        );
11160        // Wire the `business_processes.*_weight` config through (phantom knob
11161        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
11162        let bp = &self.config.business_processes;
11163        generator.set_business_process_weights(
11164            bp.o2c_weight,
11165            bp.p2p_weight,
11166            bp.r2r_weight,
11167            bp.h2r_weight,
11168            bp.a2r_weight,
11169        );
11170        // v3.4.0: wire advanced distributions (mixture models + industry
11171        // profiles). No-op when `distributions.enabled = false` or
11172        // `distributions.amounts.enabled = false`, preserving v3.3.2
11173        // byte-identical output on default configs.
11174        generator
11175            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11176            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11177
11178        // SP3: load and wire industry priors when the config opts in via
11179        //   distributions.industry_profile.priors.enabled = true
11180        // When disabled (or when using the legacy bare-name form), this block
11181        // is a no-op and generation behavior is identical to v5.11.
11182        if let Some(profile) = &self.config.distributions.industry_profile {
11183            if let Some(priors_cfg) = profile.priors() {
11184                if priors_cfg.enabled {
11185                    use datasynth_config::schema::PriorsSource;
11186                    use datasynth_generators::priors_loader::LoadedPriors;
11187
11188                    let mut priors_rng =
11189                        rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11190                    let period_days = i64::from(self.config.global.period_months) * 30;
11191                    let industry_slug = profile.profile_type().slug();
11192
11193                    let loaded = match priors_cfg.source {
11194                        PriorsSource::Bundled => {
11195                            LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11196                                .map_err(|e| {
11197                                    SynthError::config(format!(
11198                                "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11199                            ))
11200                                })?
11201                        }
11202                        PriorsSource::File => {
11203                            let path = priors_cfg.path.as_ref().ok_or_else(|| {
11204                                SynthError::config(
11205                                    "SP3: industry_profile.priors.path required when source = file"
11206                                        .to_string(),
11207                                )
11208                            })?;
11209                            LoadedPriors::load_from_path(
11210                                path,
11211                                &mut priors_rng,
11212                                period_days,
11213                                Some(industry_slug),
11214                            )
11215                            .map_err(|e| {
11216                                SynthError::config(format!(
11217                                    "SP3: failed to load priors from '{}': {e}",
11218                                    path.display()
11219                                ))
11220                            })?
11221                        }
11222                    };
11223
11224                    // SP3.12 — cache priors in Arc so document-flow generator
11225                    // can also apply lines-per-JE padding without re-loading.
11226                    let loaded = std::sync::Arc::new(loaded);
11227                    self.cached_priors = Some(loaded.clone());
11228                    generator.loaded_priors = Some((*loaded).clone());
11229
11230                    // SP3.4 — instantiate VelocityCalibrator when the config
11231                    // opts in.  Default target rates (R7/R9) are a sensible
11232                    // baseline; they can be derived from the loaded priors in
11233                    // a future hardening pass.
11234                    if priors_cfg.velocity_calibration {
11235                        use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11236                        let mut targets = std::collections::HashMap::new();
11237                        targets.insert("R7".to_string(), 0.10);
11238                        targets.insert("R9".to_string(), 0.10);
11239                        let calibrator = VelocityCalibrator::new(targets, 10_000);
11240                        generator.velocity_calibrator = Some(calibrator);
11241                    }
11242                }
11243            }
11244        }
11245
11246        let generator = generator;
11247
11248        // Connect generated master data to ensure JEs reference real entities
11249        // Enable persona-based error injection for realistic human behavior
11250        // Pass fraud configuration for fraud injection
11251        let je_pack = self.primary_pack();
11252
11253        // Master-data CC / PC pools so JE.cost_center and
11254        // JE.profit_center join back to `cost_centers.id` and
11255        // `profit_centers.id` (closes the v5.9.0 linkage gap that
11256        // had `JE.cost_center = "CC1000"` while master used
11257        // `CC-1000-FIN` etc.).  Empty when no master is present —
11258        // the generator falls back to its hardcoded constants.
11259        let cc_pool: Vec<String> = self
11260            .master_data
11261            .cost_centers
11262            .iter()
11263            .map(|c| c.id.clone())
11264            .collect();
11265        let pc_pool: Vec<String> = self
11266            .master_data
11267            .profit_centers
11268            .iter()
11269            .map(|p| p.id.clone())
11270            .collect();
11271
11272        // Build a UserPool from the generated employee master so
11273        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
11274        // closes the third linkage gap (the previous behaviour had
11275        // JeGenerator generate its own UserPool internally with
11276        // ids disjoint from the employee master).
11277        let user_pool_from_employees =
11278            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11279
11280        let mut generator = generator
11281            .with_master_data(
11282                &self.master_data.vendors,
11283                &self.master_data.customers,
11284                &self.master_data.materials,
11285            )
11286            .with_cost_center_pool(cc_pool)
11287            .with_profit_center_pool(pc_pool)
11288            .with_country_pack_names(je_pack)
11289            .with_user_pool(user_pool_from_employees)
11290            .with_country_pack_temporal(
11291                self.config.temporal_patterns.clone(),
11292                self.seed + 200,
11293                je_pack,
11294            )
11295            .with_persona_errors(true)
11296            .with_fraud_config(self.config.fraud.clone());
11297
11298        // Apply temporal drift if configured. v3.5.2+: also merge
11299        // `distributions.regime_changes` (regime events, economic
11300        // cycles, parameter drifts) into the same DriftConfig so both
11301        // knobs flow through the shared DriftController.
11302        let temporal_enabled = self.config.temporal.enabled;
11303        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11304        if temporal_enabled || regimes_enabled {
11305            let mut drift_config = if temporal_enabled {
11306                self.config.temporal.to_core_config()
11307            } else {
11308                // regime-changes only: start from default (drift OFF),
11309                // apply_to flips `enabled = true`.
11310                datasynth_core::distributions::DriftConfig::default()
11311            };
11312            if regimes_enabled {
11313                self.config
11314                    .distributions
11315                    .regime_changes
11316                    .apply_to(&mut drift_config, start_date);
11317            }
11318            generator = generator.with_drift_config(drift_config, self.seed + 100);
11319        }
11320
11321        // Check memory limit at start
11322        self.check_memory_limit()?;
11323
11324        // Determine parallelism: use available cores, but cap at total entries
11325        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11326
11327        // Use parallel generation for datasets with 10K+ entries.
11328        // Below this threshold, the statistical properties of a single-seeded
11329        // generator (e.g. Benford compliance) are better preserved.
11330        let entries = if total >= 10_000 && num_threads > 1 {
11331            // Parallel path: split the generator across cores and generate in parallel.
11332            // Each sub-generator gets a unique seed for deterministic, independent generation.
11333            let sub_generators = generator.split(num_threads);
11334            let entries_per_thread = total as usize / num_threads;
11335            let remainder = total as usize % num_threads;
11336
11337            let batches: Vec<Vec<JournalEntry>> = sub_generators
11338                .into_par_iter()
11339                .enumerate()
11340                .map(|(i, mut gen)| {
11341                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11342                    gen.generate_batch(count)
11343                })
11344                .collect();
11345
11346            // Merge all batches into a single Vec
11347            let entries = JournalEntryGenerator::merge_results(batches);
11348
11349            if let Some(pb) = &pb {
11350                pb.inc(total);
11351            }
11352            entries
11353        } else {
11354            // Sequential path for small datasets (< 1000 entries)
11355            let mut entries = Vec::with_capacity(total as usize);
11356            for _ in 0..total {
11357                let entry = generator.generate();
11358                entries.push(entry);
11359                if let Some(pb) = &pb {
11360                    pb.inc(1);
11361                }
11362            }
11363            entries
11364        };
11365
11366        if let Some(pb) = pb {
11367            pb.finish_with_message("Journal entries complete");
11368        }
11369
11370        Ok(entries)
11371    }
11372
11373    /// Generate journal entries from document flows.
11374    ///
11375    /// This creates proper GL entries for each document in the P2P and O2C flows,
11376    /// ensuring that document activity is reflected in the general ledger.
11377    fn generate_jes_from_document_flows(
11378        &mut self,
11379        flows: &DocumentFlowSnapshot,
11380    ) -> SynthResult<Vec<JournalEntry>> {
11381        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11382        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11383
11384        let je_config = match self.resolve_coa_framework() {
11385            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11386            CoAFramework::GermanSkr04 => {
11387                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11388                DocumentFlowJeConfig::from(&fa)
11389            }
11390            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11391        };
11392
11393        let populate_fec = je_config.populate_fec_fields;
11394        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11395
11396        // SP3.12 — propagate cached priors so document-flow JEs receive
11397        // the same lines-per-JE padding as standalone JEs.
11398        if let Some(ref priors) = self.cached_priors {
11399            generator.set_loaded_priors(priors.clone());
11400        }
11401
11402        // Master-data CC / PC pools so document-flow-derived JEs
11403        // (P2P / O2C postings) reference IDs that join back to the
11404        // cost-centers / profit-centers masters.  Same plumbing as
11405        // for `JeGenerator` above; falls back to hardcoded const
11406        // pools when masters are absent.
11407        let cc_pool: Vec<String> = self
11408            .master_data
11409            .cost_centers
11410            .iter()
11411            .map(|c| c.id.clone())
11412            .collect();
11413        let pc_pool: Vec<String> = self
11414            .master_data
11415            .profit_centers
11416            .iter()
11417            .map(|p| p.id.clone())
11418            .collect();
11419        if !cc_pool.is_empty() {
11420            generator.set_cost_center_pool(cc_pool);
11421        }
11422        if !pc_pool.is_empty() {
11423            generator.set_profit_center_pool(pc_pool);
11424        }
11425
11426        // Build auxiliary account lookup from vendor/customer master data so that
11427        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11428        // PCG "4010001") instead of raw partner IDs.
11429        if populate_fec {
11430            let mut aux_lookup = std::collections::HashMap::new();
11431            for vendor in &self.master_data.vendors {
11432                if let Some(ref aux) = vendor.auxiliary_gl_account {
11433                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11434                }
11435            }
11436            for customer in &self.master_data.customers {
11437                if let Some(ref aux) = customer.auxiliary_gl_account {
11438                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11439                }
11440            }
11441            if !aux_lookup.is_empty() {
11442                generator.set_auxiliary_account_lookup(aux_lookup);
11443            }
11444        }
11445
11446        let mut entries = Vec::new();
11447
11448        // Generate JEs from P2P chains
11449        for chain in &flows.p2p_chains {
11450            let chain_entries = generator.generate_from_p2p_chain(chain);
11451            entries.extend(chain_entries);
11452            if let Some(pb) = &pb {
11453                pb.inc(1);
11454            }
11455        }
11456
11457        // Generate JEs from O2C chains
11458        for chain in &flows.o2c_chains {
11459            let chain_entries = generator.generate_from_o2c_chain(chain);
11460            entries.extend(chain_entries);
11461            if let Some(pb) = &pb {
11462                pb.inc(1);
11463            }
11464        }
11465
11466        if let Some(pb) = pb {
11467            pb.finish_with_message(format!(
11468                "Generated {} JEs from document flows",
11469                entries.len()
11470            ));
11471        }
11472
11473        Ok(entries)
11474    }
11475
11476    /// Generate journal entries from payroll runs.
11477    ///
11478    /// Creates one JE per payroll run:
11479    /// - DR Salaries & Wages (6100) for gross pay
11480    /// - CR Payroll Clearing (9100) for gross pay
11481    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11482        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11483
11484        let mut jes = Vec::with_capacity(payroll_runs.len());
11485
11486        for run in payroll_runs {
11487            let mut je = JournalEntry::new_simple(
11488                format!("JE-PAYROLL-{}", run.payroll_id),
11489                run.company_code.clone(),
11490                run.run_date,
11491                format!("Payroll {}", run.payroll_id),
11492            );
11493
11494            // Debit Salaries & Wages for gross pay
11495            je.add_line(JournalEntryLine {
11496                line_number: 1,
11497                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11498                debit_amount: run.total_gross,
11499                reference: Some(run.payroll_id.clone()),
11500                text: Some(format!(
11501                    "Payroll {} ({} employees)",
11502                    run.payroll_id, run.employee_count
11503                )),
11504                ..Default::default()
11505            });
11506
11507            // Credit Payroll Clearing for gross pay
11508            je.add_line(JournalEntryLine {
11509                line_number: 2,
11510                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11511                credit_amount: run.total_gross,
11512                reference: Some(run.payroll_id.clone()),
11513                ..Default::default()
11514            });
11515
11516            jes.push(je);
11517        }
11518
11519        jes
11520    }
11521
11522    /// Link document flows to subledger records.
11523    ///
11524    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11525    /// ensuring subledger data is coherent with document flow data.
11526    fn link_document_flows_to_subledgers(
11527        &mut self,
11528        flows: &DocumentFlowSnapshot,
11529    ) -> SynthResult<SubledgerSnapshot> {
11530        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11531        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11532
11533        // Build vendor/customer name maps from master data for realistic subledger names
11534        let vendor_names: std::collections::HashMap<String, String> = self
11535            .master_data
11536            .vendors
11537            .iter()
11538            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11539            .collect();
11540        let customer_names: std::collections::HashMap<String, String> = self
11541            .master_data
11542            .customers
11543            .iter()
11544            .map(|c| (c.customer_id.clone(), c.name.clone()))
11545            .collect();
11546
11547        let mut linker = DocumentFlowLinker::new()
11548            .with_vendor_names(vendor_names)
11549            .with_customer_names(customer_names);
11550
11551        // Convert vendor invoices to AP invoices
11552        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11553        if let Some(pb) = &pb {
11554            pb.inc(flows.vendor_invoices.len() as u64);
11555        }
11556
11557        // Convert customer invoices to AR invoices
11558        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11559        if let Some(pb) = &pb {
11560            pb.inc(flows.customer_invoices.len() as u64);
11561        }
11562
11563        if let Some(pb) = pb {
11564            pb.finish_with_message(format!(
11565                "Linked {} AP and {} AR invoices",
11566                ap_invoices.len(),
11567                ar_invoices.len()
11568            ));
11569        }
11570
11571        Ok(SubledgerSnapshot {
11572            ap_invoices,
11573            ar_invoices,
11574            fa_records: Vec::new(),
11575            inventory_positions: Vec::new(),
11576            inventory_movements: Vec::new(),
11577            // Aging reports are computed after payment settlement in phase_document_flows.
11578            ar_aging_reports: Vec::new(),
11579            ap_aging_reports: Vec::new(),
11580            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11581            depreciation_runs: Vec::new(),
11582            inventory_valuations: Vec::new(),
11583            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11584            dunning_runs: Vec::new(),
11585            dunning_letters: Vec::new(),
11586        })
11587    }
11588
11589    /// Generate OCPM events from document flows.
11590    ///
11591    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11592    /// capturing the object-centric process perspective.
11593    #[allow(clippy::too_many_arguments)]
11594    fn generate_ocpm_events(
11595        &mut self,
11596        flows: &DocumentFlowSnapshot,
11597        sourcing: &SourcingSnapshot,
11598        hr: &HrSnapshot,
11599        manufacturing: &ManufacturingSnapshot,
11600        banking: &BankingSnapshot,
11601        audit: &AuditSnapshot,
11602        financial_reporting: &FinancialReportingSnapshot,
11603    ) -> SynthResult<OcpmSnapshot> {
11604        let total_chains = flows.p2p_chains.len()
11605            + flows.o2c_chains.len()
11606            + sourcing.sourcing_projects.len()
11607            + hr.payroll_runs.len()
11608            + manufacturing.production_orders.len()
11609            + banking.customers.len()
11610            + audit.engagements.len()
11611            + financial_reporting.bank_reconciliations.len();
11612        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11613
11614        // Create OCPM event log with standard types
11615        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11616        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11617
11618        // Configure the OCPM generator
11619        let ocpm_config = OcpmGeneratorConfig {
11620            generate_p2p: true,
11621            generate_o2c: true,
11622            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11623            generate_h2r: !hr.payroll_runs.is_empty(),
11624            generate_mfg: !manufacturing.production_orders.is_empty(),
11625            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11626            generate_bank: !banking.customers.is_empty(),
11627            generate_audit: !audit.engagements.is_empty(),
11628            happy_path_rate: 0.75,
11629            exception_path_rate: 0.20,
11630            error_path_rate: 0.05,
11631            add_duration_variability: true,
11632            duration_std_dev_factor: 0.3,
11633        };
11634        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11635        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11636
11637        // Get available users for resource assignment
11638        let available_users: Vec<String> = self
11639            .master_data
11640            .employees
11641            .iter()
11642            .take(20)
11643            .map(|e| e.user_id.clone())
11644            .collect();
11645
11646        // Deterministic base date from config (avoids Utc::now() non-determinism)
11647        let fallback_date =
11648            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11649        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11650            .unwrap_or(fallback_date);
11651        let base_midnight = base_date
11652            .and_hms_opt(0, 0, 0)
11653            .expect("midnight is always valid");
11654        let base_datetime =
11655            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11656
11657        // Helper closure to add case results to event log
11658        let add_result = |event_log: &mut OcpmEventLog,
11659                          result: datasynth_ocpm::CaseGenerationResult| {
11660            for event in result.events {
11661                event_log.add_event(event);
11662            }
11663            for object in result.objects {
11664                event_log.add_object(object);
11665            }
11666            for relationship in result.relationships {
11667                event_log.add_relationship(relationship);
11668            }
11669            for corr in result.correlation_events {
11670                event_log.add_correlation_event(corr);
11671            }
11672            event_log.add_case(result.case_trace);
11673        };
11674
11675        // Generate events from P2P chains
11676        for chain in &flows.p2p_chains {
11677            let po = &chain.purchase_order;
11678            let documents = P2pDocuments::new(
11679                &po.header.document_id,
11680                &po.vendor_id,
11681                &po.header.company_code,
11682                po.total_net_amount,
11683                &po.header.currency,
11684                &ocpm_uuid_factory,
11685            )
11686            .with_goods_receipt(
11687                chain
11688                    .goods_receipts
11689                    .first()
11690                    .map(|gr| gr.header.document_id.as_str())
11691                    .unwrap_or(""),
11692                &ocpm_uuid_factory,
11693            )
11694            .with_invoice(
11695                chain
11696                    .vendor_invoice
11697                    .as_ref()
11698                    .map(|vi| vi.header.document_id.as_str())
11699                    .unwrap_or(""),
11700                &ocpm_uuid_factory,
11701            )
11702            .with_payment(
11703                chain
11704                    .payment
11705                    .as_ref()
11706                    .map(|p| p.header.document_id.as_str())
11707                    .unwrap_or(""),
11708                &ocpm_uuid_factory,
11709            );
11710
11711            let start_time =
11712                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11713            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11714            add_result(&mut event_log, result);
11715
11716            if let Some(pb) = &pb {
11717                pb.inc(1);
11718            }
11719        }
11720
11721        // Generate events from O2C chains
11722        for chain in &flows.o2c_chains {
11723            let so = &chain.sales_order;
11724            let documents = O2cDocuments::new(
11725                &so.header.document_id,
11726                &so.customer_id,
11727                &so.header.company_code,
11728                so.total_net_amount,
11729                &so.header.currency,
11730                &ocpm_uuid_factory,
11731            )
11732            .with_delivery(
11733                chain
11734                    .deliveries
11735                    .first()
11736                    .map(|d| d.header.document_id.as_str())
11737                    .unwrap_or(""),
11738                &ocpm_uuid_factory,
11739            )
11740            .with_invoice(
11741                chain
11742                    .customer_invoice
11743                    .as_ref()
11744                    .map(|ci| ci.header.document_id.as_str())
11745                    .unwrap_or(""),
11746                &ocpm_uuid_factory,
11747            )
11748            .with_receipt(
11749                chain
11750                    .customer_receipt
11751                    .as_ref()
11752                    .map(|r| r.header.document_id.as_str())
11753                    .unwrap_or(""),
11754                &ocpm_uuid_factory,
11755            );
11756
11757            let start_time =
11758                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11759            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11760            add_result(&mut event_log, result);
11761
11762            if let Some(pb) = &pb {
11763                pb.inc(1);
11764            }
11765        }
11766
11767        // Generate events from S2C sourcing projects
11768        for project in &sourcing.sourcing_projects {
11769            // Find vendor from contracts or qualifications
11770            let vendor_id = sourcing
11771                .contracts
11772                .iter()
11773                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11774                .map(|c| c.vendor_id.clone())
11775                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11776                .or_else(|| {
11777                    self.master_data
11778                        .vendors
11779                        .first()
11780                        .map(|v| v.vendor_id.clone())
11781                })
11782                .unwrap_or_else(|| "V000".to_string());
11783            let mut docs = S2cDocuments::new(
11784                &project.project_id,
11785                &vendor_id,
11786                &project.company_code,
11787                project.estimated_annual_spend,
11788                &ocpm_uuid_factory,
11789            );
11790            // Link RFx if available
11791            if let Some(rfx) = sourcing
11792                .rfx_events
11793                .iter()
11794                .find(|r| r.sourcing_project_id == project.project_id)
11795            {
11796                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11797                // Link winning bid (status == Accepted)
11798                if let Some(bid) = sourcing.bids.iter().find(|b| {
11799                    b.rfx_id == rfx.rfx_id
11800                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11801                }) {
11802                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11803                }
11804            }
11805            // Link contract
11806            if let Some(contract) = sourcing
11807                .contracts
11808                .iter()
11809                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11810            {
11811                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11812            }
11813            let start_time = base_datetime - chrono::Duration::days(90);
11814            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11815            add_result(&mut event_log, result);
11816
11817            if let Some(pb) = &pb {
11818                pb.inc(1);
11819            }
11820        }
11821
11822        // Generate events from H2R payroll runs
11823        for run in &hr.payroll_runs {
11824            // Use first matching payroll line item's employee, or fallback
11825            let employee_id = hr
11826                .payroll_line_items
11827                .iter()
11828                .find(|li| li.payroll_id == run.payroll_id)
11829                .map(|li| li.employee_id.as_str())
11830                .unwrap_or("EMP000");
11831            let docs = H2rDocuments::new(
11832                &run.payroll_id,
11833                employee_id,
11834                &run.company_code,
11835                run.total_gross,
11836                &ocpm_uuid_factory,
11837            )
11838            .with_time_entries(
11839                hr.time_entries
11840                    .iter()
11841                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11842                    .take(5)
11843                    .map(|t| t.entry_id.as_str())
11844                    .collect(),
11845            );
11846            let start_time = base_datetime - chrono::Duration::days(30);
11847            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11848            add_result(&mut event_log, result);
11849
11850            if let Some(pb) = &pb {
11851                pb.inc(1);
11852            }
11853        }
11854
11855        // Generate events from MFG production orders
11856        for order in &manufacturing.production_orders {
11857            let mut docs = MfgDocuments::new(
11858                &order.order_id,
11859                &order.material_id,
11860                &order.company_code,
11861                order.planned_quantity,
11862                &ocpm_uuid_factory,
11863            )
11864            .with_operations(
11865                order
11866                    .operations
11867                    .iter()
11868                    .map(|o| format!("OP-{:04}", o.operation_number))
11869                    .collect::<Vec<_>>()
11870                    .iter()
11871                    .map(std::string::String::as_str)
11872                    .collect(),
11873            );
11874            // Link quality inspection if available (via reference_id matching order_id)
11875            if let Some(insp) = manufacturing
11876                .quality_inspections
11877                .iter()
11878                .find(|i| i.reference_id == order.order_id)
11879            {
11880                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11881            }
11882            // Link cycle count if available (match by material_id in items)
11883            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11884                cc.items
11885                    .iter()
11886                    .any(|item| item.material_id == order.material_id)
11887            }) {
11888                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11889            }
11890            let start_time = base_datetime - chrono::Duration::days(60);
11891            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11892            add_result(&mut event_log, result);
11893
11894            if let Some(pb) = &pb {
11895                pb.inc(1);
11896            }
11897        }
11898
11899        // Generate events from Banking customers
11900        for customer in &banking.customers {
11901            let customer_id_str = customer.customer_id.to_string();
11902            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11903            // Link accounts (primary_owner_id matches customer_id)
11904            if let Some(account) = banking
11905                .accounts
11906                .iter()
11907                .find(|a| a.primary_owner_id == customer.customer_id)
11908            {
11909                let account_id_str = account.account_id.to_string();
11910                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11911                // Link transactions for this account
11912                let txn_strs: Vec<String> = banking
11913                    .transactions
11914                    .iter()
11915                    .filter(|t| t.account_id == account.account_id)
11916                    .take(10)
11917                    .map(|t| t.transaction_id.to_string())
11918                    .collect();
11919                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11920                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11921                    .transactions
11922                    .iter()
11923                    .filter(|t| t.account_id == account.account_id)
11924                    .take(10)
11925                    .map(|t| t.amount)
11926                    .collect();
11927                if !txn_ids.is_empty() {
11928                    docs = docs.with_transactions(txn_ids, txn_amounts);
11929                }
11930            }
11931            let start_time = base_datetime - chrono::Duration::days(180);
11932            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11933            add_result(&mut event_log, result);
11934
11935            if let Some(pb) = &pb {
11936                pb.inc(1);
11937            }
11938        }
11939
11940        // Generate events from Audit engagements
11941        for engagement in &audit.engagements {
11942            let engagement_id_str = engagement.engagement_id.to_string();
11943            let docs = AuditDocuments::new(
11944                &engagement_id_str,
11945                &engagement.client_entity_id,
11946                &ocpm_uuid_factory,
11947            )
11948            .with_workpapers(
11949                audit
11950                    .workpapers
11951                    .iter()
11952                    .filter(|w| w.engagement_id == engagement.engagement_id)
11953                    .take(10)
11954                    .map(|w| w.workpaper_id.to_string())
11955                    .collect::<Vec<_>>()
11956                    .iter()
11957                    .map(std::string::String::as_str)
11958                    .collect(),
11959            )
11960            .with_evidence(
11961                audit
11962                    .evidence
11963                    .iter()
11964                    .filter(|e| e.engagement_id == engagement.engagement_id)
11965                    .take(10)
11966                    .map(|e| e.evidence_id.to_string())
11967                    .collect::<Vec<_>>()
11968                    .iter()
11969                    .map(std::string::String::as_str)
11970                    .collect(),
11971            )
11972            .with_risks(
11973                audit
11974                    .risk_assessments
11975                    .iter()
11976                    .filter(|r| r.engagement_id == engagement.engagement_id)
11977                    .take(5)
11978                    .map(|r| r.risk_id.to_string())
11979                    .collect::<Vec<_>>()
11980                    .iter()
11981                    .map(std::string::String::as_str)
11982                    .collect(),
11983            )
11984            .with_findings(
11985                audit
11986                    .findings
11987                    .iter()
11988                    .filter(|f| f.engagement_id == engagement.engagement_id)
11989                    .take(5)
11990                    .map(|f| f.finding_id.to_string())
11991                    .collect::<Vec<_>>()
11992                    .iter()
11993                    .map(std::string::String::as_str)
11994                    .collect(),
11995            )
11996            .with_judgments(
11997                audit
11998                    .judgments
11999                    .iter()
12000                    .filter(|j| j.engagement_id == engagement.engagement_id)
12001                    .take(5)
12002                    .map(|j| j.judgment_id.to_string())
12003                    .collect::<Vec<_>>()
12004                    .iter()
12005                    .map(std::string::String::as_str)
12006                    .collect(),
12007            );
12008            let start_time = base_datetime - chrono::Duration::days(120);
12009            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12010            add_result(&mut event_log, result);
12011
12012            if let Some(pb) = &pb {
12013                pb.inc(1);
12014            }
12015        }
12016
12017        // Generate events from Bank Reconciliations
12018        for recon in &financial_reporting.bank_reconciliations {
12019            let docs = BankReconDocuments::new(
12020                &recon.reconciliation_id,
12021                &recon.bank_account_id,
12022                &recon.company_code,
12023                recon.bank_ending_balance,
12024                &ocpm_uuid_factory,
12025            )
12026            .with_statement_lines(
12027                recon
12028                    .statement_lines
12029                    .iter()
12030                    .take(20)
12031                    .map(|l| l.line_id.as_str())
12032                    .collect(),
12033            )
12034            .with_reconciling_items(
12035                recon
12036                    .reconciling_items
12037                    .iter()
12038                    .take(10)
12039                    .map(|i| i.item_id.as_str())
12040                    .collect(),
12041            );
12042            let start_time = base_datetime - chrono::Duration::days(30);
12043            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12044            add_result(&mut event_log, result);
12045
12046            if let Some(pb) = &pb {
12047                pb.inc(1);
12048            }
12049        }
12050
12051        // Compute process variants
12052        event_log.compute_variants();
12053
12054        let summary = event_log.summary();
12055
12056        if let Some(pb) = pb {
12057            pb.finish_with_message(format!(
12058                "Generated {} OCPM events, {} objects",
12059                summary.event_count, summary.object_count
12060            ));
12061        }
12062
12063        Ok(OcpmSnapshot {
12064            event_count: summary.event_count,
12065            object_count: summary.object_count,
12066            case_count: summary.case_count,
12067            event_log: Some(event_log),
12068        })
12069    }
12070
12071    /// Inject anomalies into journal entries.
12072    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12073        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12074
12075        // Read anomaly rates from config instead of using hardcoded values.
12076        // Priority: anomaly_injection config > fraud config > default 0.02
12077        let total_rate = if self.config.anomaly_injection.enabled {
12078            self.config.anomaly_injection.rates.total_rate
12079        } else if self.config.fraud.enabled {
12080            self.config.fraud.fraud_rate
12081        } else {
12082            0.02
12083        };
12084
12085        let fraud_rate = if self.config.anomaly_injection.enabled {
12086            self.config.anomaly_injection.rates.fraud_rate
12087        } else {
12088            AnomalyRateConfig::default().fraud_rate
12089        };
12090
12091        let error_rate = if self.config.anomaly_injection.enabled {
12092            self.config.anomaly_injection.rates.error_rate
12093        } else {
12094            AnomalyRateConfig::default().error_rate
12095        };
12096
12097        let process_issue_rate = if self.config.anomaly_injection.enabled {
12098            self.config.anomaly_injection.rates.process_rate
12099        } else {
12100            AnomalyRateConfig::default().process_issue_rate
12101        };
12102
12103        let anomaly_config = AnomalyInjectorConfig {
12104            rates: AnomalyRateConfig {
12105                total_rate,
12106                fraud_rate,
12107                error_rate,
12108                process_issue_rate,
12109                ..Default::default()
12110            },
12111            seed: self.seed + 5000,
12112            ..Default::default()
12113        };
12114
12115        let mut injector = AnomalyInjector::new(anomaly_config);
12116        let result = injector.process_entries(entries);
12117
12118        // Central concentration abstraction (#143, Phase 1): run the post-process
12119        // pipeline AFTER per-entry strategies. The pipeline merges the SOTA-12
12120        // tagger + new passes (trading-partner pool, Phase-2 account substitution)
12121        // through a single integration point — see
12122        // docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md.
12123        //
12124        // Back-compat: the legacy `anomaly_injection.source_conditional_rarity_rate`
12125        // key remains honored. If `concentration.source_conditional_rarity` is also
12126        // set in the same config, the unified DSL field wins.
12127        let sota12_tagged: usize = {
12128            use datasynth_config::schema::{
12129                ConcentrationConfig, SourceConditionalRarityPassConfig,
12130            };
12131            use datasynth_generators::concentration::ConcentrationPipeline;
12132
12133            // Decide effective ConcentrationConfig: start from user config, then
12134            // back-fill from the legacy SOTA-12 key if the unified DSL didn't set it.
12135            let mut effective: ConcentrationConfig = self.config.concentration.clone();
12136            if effective.source_conditional_rarity.is_none() {
12137                if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12138                    effective.enabled = true;
12139                    effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12140                        rate,
12141                        min_surprise: None,
12142                        min_per_source_lines: None,
12143                    });
12144                }
12145            }
12146
12147            if !effective.enabled {
12148                0
12149            } else {
12150                let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12151                    SynthError::generation(format!(
12152                        "ConcentrationPipeline construction failed: {e}"
12153                    ))
12154                })?;
12155                if !pipeline.is_active() {
12156                    0
12157                } else {
12158                    // Per-pipeline seed disjoint from every other generator stream.
12159                    const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12160                    let stats =
12161                        pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12162                    stats
12163                        .iter()
12164                        .filter(|s| s.pass == "source_conditional_rarity")
12165                        .map(|s| s.entries_modified)
12166                        .sum()
12167                }
12168            }
12169        };
12170
12171        if let Some(pb) = &pb {
12172            pb.inc(entries.len() as u64);
12173            pb.finish_with_message("Anomaly injection complete");
12174        }
12175
12176        let mut by_type = HashMap::new();
12177        for label in &result.labels {
12178            *by_type
12179                .entry(format!("{:?}", label.anomaly_type))
12180                .or_insert(0) += 1;
12181        }
12182        if sota12_tagged > 0 {
12183            *by_type
12184                .entry("SourceConditionalRarity".to_string())
12185                .or_insert(0) += sota12_tagged;
12186        }
12187
12188        Ok(AnomalyLabels {
12189            labels: result.labels,
12190            summary: Some(result.summary),
12191            by_type,
12192        })
12193    }
12194
12195    /// Validate journal entries using running balance tracker.
12196    ///
12197    /// Applies all entries to the balance tracker and validates:
12198    /// - Each entry is internally balanced (debits = credits)
12199    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
12200    ///
12201    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
12202    /// excluded from balance validation as they may be intentionally unbalanced.
12203    fn validate_journal_entries(
12204        &mut self,
12205        entries: &[JournalEntry],
12206    ) -> SynthResult<BalanceValidationResult> {
12207        // Filter out entries with human errors as they may be intentionally unbalanced
12208        let clean_entries: Vec<&JournalEntry> = entries
12209            .iter()
12210            .filter(|e| {
12211                e.header
12212                    .header_text
12213                    .as_ref()
12214                    .map(|t| !t.contains("[HUMAN_ERROR:"))
12215                    .unwrap_or(true)
12216            })
12217            .collect();
12218
12219        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12220
12221        // Configure tracker to not fail on errors (collect them instead)
12222        let config = BalanceTrackerConfig {
12223            validate_on_each_entry: false,   // We'll validate at the end
12224            track_history: false,            // Skip history for performance
12225            fail_on_validation_error: false, // Collect errors, don't fail
12226            ..Default::default()
12227        };
12228        let validation_currency = self
12229            .config
12230            .companies
12231            .first()
12232            .map(|c| c.currency.clone())
12233            .unwrap_or_else(|| "USD".to_string());
12234
12235        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12236
12237        // Apply clean entries (without human errors)
12238        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12239        let errors = tracker.apply_entries(&clean_refs);
12240
12241        if let Some(pb) = &pb {
12242            pb.inc(entries.len() as u64);
12243        }
12244
12245        // Check if any entries were unbalanced
12246        // Note: When fail_on_validation_error is false, errors are stored in tracker
12247        let has_unbalanced = tracker
12248            .get_validation_errors()
12249            .iter()
12250            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12251
12252        // Validate balance sheet for each company
12253        // Include both returned errors and collected validation errors
12254        let mut all_errors = errors;
12255        all_errors.extend(tracker.get_validation_errors().iter().cloned());
12256        let company_codes: Vec<String> = self
12257            .config
12258            .companies
12259            .iter()
12260            .map(|c| c.code.clone())
12261            .collect();
12262
12263        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12264            .map(|d| d + chrono::Months::new(self.config.global.period_months))
12265            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12266
12267        for company_code in &company_codes {
12268            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12269                all_errors.push(e);
12270            }
12271        }
12272
12273        // Get statistics after all mutable operations are done
12274        let stats = tracker.get_statistics();
12275
12276        // Determine if balanced overall
12277        let is_balanced = all_errors.is_empty();
12278
12279        if let Some(pb) = pb {
12280            let msg = if is_balanced {
12281                "Balance validation passed"
12282            } else {
12283                "Balance validation completed with errors"
12284            };
12285            pb.finish_with_message(msg);
12286        }
12287
12288        Ok(BalanceValidationResult {
12289            validated: true,
12290            is_balanced,
12291            entries_processed: stats.entries_processed,
12292            total_debits: stats.total_debits,
12293            total_credits: stats.total_credits,
12294            accounts_tracked: stats.accounts_tracked,
12295            companies_tracked: stats.companies_tracked,
12296            validation_errors: all_errors,
12297            has_unbalanced_entries: has_unbalanced,
12298        })
12299    }
12300
12301    /// Inject data quality variations into journal entries.
12302    ///
12303    /// Applies typos, missing values, and format variations to make
12304    /// the synthetic data more realistic for testing data cleaning pipelines.
12305    fn inject_data_quality(
12306        &mut self,
12307        entries: &mut [JournalEntry],
12308    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12309        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12310
12311        // Build config from user-specified schema settings when data_quality is enabled;
12312        // otherwise fall back to the low-rate minimal() preset.
12313        let config = if self.config.data_quality.enabled {
12314            let dq = &self.config.data_quality;
12315            // Propagate per-field rates and protected fields from the schema
12316            // so users can dial in real-production NULL profiles per field
12317            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
12318            let field_rates = dq.missing_values.field_rates.clone();
12319            let mut required_fields: std::collections::HashSet<String> =
12320                dq.missing_values.protected_fields.iter().cloned().collect();
12321            // Always preserve audit-critical identifiers regardless of
12322            // user config — losing these breaks downstream joins.
12323            for f in [
12324                "document_id",
12325                "company_code",
12326                "posting_date",
12327                "fiscal_year",
12328                "fiscal_period",
12329                "gl_account",
12330                "line_number",
12331                "transaction_id",
12332            ] {
12333                required_fields.insert(f.to_string());
12334            }
12335            DataQualityConfig {
12336                enable_missing_values: dq.missing_values.enabled,
12337                missing_values: datasynth_generators::MissingValueConfig {
12338                    global_rate: dq.effective_missing_rate(),
12339                    field_rates,
12340                    required_fields,
12341                    ..Default::default()
12342                },
12343                enable_format_variations: dq.format_variations.enabled,
12344                format_variations: datasynth_generators::FormatVariationConfig {
12345                    date_variation_rate: dq.format_variations.dates.rate,
12346                    amount_variation_rate: dq.format_variations.amounts.rate,
12347                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12348                    ..Default::default()
12349                },
12350                enable_duplicates: dq.duplicates.enabled,
12351                duplicates: datasynth_generators::DuplicateConfig {
12352                    duplicate_rate: dq.effective_duplicate_rate(),
12353                    ..Default::default()
12354                },
12355                enable_typos: dq.typos.enabled,
12356                typos: datasynth_generators::TypoConfig {
12357                    char_error_rate: dq.effective_typo_rate(),
12358                    ..Default::default()
12359                },
12360                enable_encoding_issues: dq.encoding_issues.enabled,
12361                encoding_issue_rate: dq.encoding_issues.rate,
12362                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12363                track_statistics: true,
12364            }
12365        } else {
12366            DataQualityConfig::minimal()
12367        };
12368        let mut injector = DataQualityInjector::new(config);
12369
12370        // Wire country pack for locale-aware format baselines
12371        injector.set_country_pack(self.primary_pack().clone());
12372
12373        // Build context for missing value decisions
12374        let context = HashMap::new();
12375
12376        for entry in entries.iter_mut() {
12377            // Process header_text field (common target for typos)
12378            if let Some(text) = &entry.header.header_text {
12379                let processed = injector.process_text_field(
12380                    "header_text",
12381                    text,
12382                    &entry.header.document_id.to_string(),
12383                    &context,
12384                );
12385                match processed {
12386                    Some(new_text) if new_text != *text => {
12387                        entry.header.header_text = Some(new_text);
12388                    }
12389                    None => {
12390                        entry.header.header_text = None; // Missing value
12391                    }
12392                    _ => {}
12393                }
12394            }
12395
12396            // Process reference field
12397            if let Some(ref_text) = &entry.header.reference {
12398                let processed = injector.process_text_field(
12399                    "reference",
12400                    ref_text,
12401                    &entry.header.document_id.to_string(),
12402                    &context,
12403                );
12404                match processed {
12405                    Some(new_text) if new_text != *ref_text => {
12406                        entry.header.reference = Some(new_text);
12407                    }
12408                    None => {
12409                        entry.header.reference = None;
12410                    }
12411                    _ => {}
12412                }
12413            }
12414
12415            // Process user_persona field (potential for typos in user IDs)
12416            let user_persona = entry.header.user_persona.clone();
12417            if let Some(processed) = injector.process_text_field(
12418                "user_persona",
12419                &user_persona,
12420                &entry.header.document_id.to_string(),
12421                &context,
12422            ) {
12423                if processed != user_persona {
12424                    entry.header.user_persona = processed;
12425                }
12426            }
12427
12428            // Process line items
12429            for line in &mut entry.lines {
12430                // Process line description if present
12431                if let Some(ref text) = line.line_text {
12432                    let processed = injector.process_text_field(
12433                        "line_text",
12434                        text,
12435                        &entry.header.document_id.to_string(),
12436                        &context,
12437                    );
12438                    match processed {
12439                        Some(new_text) if new_text != *text => {
12440                            line.line_text = Some(new_text);
12441                        }
12442                        None => {
12443                            line.line_text = None;
12444                        }
12445                        _ => {}
12446                    }
12447                }
12448
12449                // Process cost_center if present
12450                if let Some(cc) = &line.cost_center {
12451                    let processed = injector.process_text_field(
12452                        "cost_center",
12453                        cc,
12454                        &entry.header.document_id.to_string(),
12455                        &context,
12456                    );
12457                    match processed {
12458                        Some(new_cc) if new_cc != *cc => {
12459                            line.cost_center = Some(new_cc);
12460                        }
12461                        None => {
12462                            line.cost_center = None;
12463                        }
12464                        _ => {}
12465                    }
12466                }
12467
12468                // Extended field coverage (v5.6+): apply NULL injection to
12469                // every Option<String> on the line so users can match
12470                // arbitrary real-production NULL profiles via
12471                // `data_quality.missing_values.field_rates`.
12472                //
12473                // Macro-free helper: process_field returns the new value
12474                // ({Some, None, unchanged}) and we apply it back.
12475                macro_rules! process_opt_field {
12476                    ($field_name:expr, $opt:expr) => {
12477                        if let Some(val) = $opt.as_ref() {
12478                            match injector.process_text_field(
12479                                $field_name,
12480                                val,
12481                                &entry.header.document_id.to_string(),
12482                                &context,
12483                            ) {
12484                                Some(new_val) if new_val != *val => {
12485                                    *$opt = Some(new_val);
12486                                }
12487                                None => {
12488                                    *$opt = None;
12489                                }
12490                                _ => {}
12491                            }
12492                        }
12493                    };
12494                }
12495
12496                process_opt_field!("profit_center", &mut line.profit_center);
12497                process_opt_field!("assignment", &mut line.assignment);
12498                process_opt_field!("tax_code", &mut line.tax_code);
12499                process_opt_field!("account_description", &mut line.account_description);
12500                process_opt_field!(
12501                    "auxiliary_account_number",
12502                    &mut line.auxiliary_account_number
12503                );
12504                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12505                process_opt_field!("lettrage", &mut line.lettrage);
12506            }
12507
12508            if let Some(pb) = &pb {
12509                pb.inc(1);
12510            }
12511        }
12512
12513        if let Some(pb) = pb {
12514            pb.finish_with_message("Data quality injection complete");
12515        }
12516
12517        let quality_issues = injector.issues().to_vec();
12518        Ok((injector.stats().clone(), quality_issues))
12519    }
12520
12521    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12522    ///
12523    /// Creates complete audit documentation for each company in the configuration,
12524    /// following ISA standards:
12525    /// - ISA 210/220: Engagement acceptance and terms
12526    /// - ISA 230: Audit documentation (workpapers)
12527    /// - ISA 265: Control deficiencies (findings)
12528    /// - ISA 315/330: Risk assessment and response
12529    /// - ISA 500: Audit evidence
12530    /// - ISA 200: Professional judgment
12531    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12532        // Check if FSM-driven audit generation is enabled
12533        let use_fsm = self
12534            .config
12535            .audit
12536            .fsm
12537            .as_ref()
12538            .map(|f| f.enabled)
12539            .unwrap_or(false);
12540
12541        if use_fsm {
12542            return self.generate_audit_data_with_fsm(entries);
12543        }
12544
12545        // --- Legacy (non-FSM) audit generation follows ---
12546        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12547            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12548        let fiscal_year = start_date.year() as u16;
12549        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12550
12551        // Calculate rough total revenue from entries for materiality
12552        let total_revenue: rust_decimal::Decimal = entries
12553            .iter()
12554            .flat_map(|e| e.lines.iter())
12555            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12556            .map(|l| l.credit_amount)
12557            .sum();
12558
12559        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12560        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12561
12562        let mut snapshot = AuditSnapshot::default();
12563
12564        // Initialize generators
12565        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12566        // v3.3.2: thread the user-facing audit schema config into the
12567        // engagement generator (team size range).
12568        engagement_gen.set_team_config(&self.config.audit.team);
12569
12570        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12571        // v3.3.2: thread workpaper + review workflow schema config into
12572        // the workpaper generator (per-section count range + review
12573        // delay ranges).
12574        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12575        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12576        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12577        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12578        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12579        finding_gen.set_template_provider(self.template_provider.clone());
12580        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12581        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12582        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12583        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12584        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12585        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12586        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12587
12588        // Get list of accounts from CoA for risk assessment
12589        let accounts: Vec<String> = self
12590            .coa
12591            .as_ref()
12592            .map(|coa| {
12593                coa.get_postable_accounts()
12594                    .iter()
12595                    .map(|acc| acc.account_code().to_string())
12596                    .collect()
12597            })
12598            .unwrap_or_default();
12599
12600        // Generate engagements for each company
12601        for (i, company) in self.config.companies.iter().enumerate() {
12602            // Calculate company-specific revenue (proportional to volume weight)
12603            let company_revenue = total_revenue
12604                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12605
12606            // Generate engagements for this company
12607            let engagements_for_company =
12608                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12609            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12610                1
12611            } else {
12612                0
12613            };
12614
12615            for _eng_idx in 0..(engagements_for_company + extra) {
12616                // v3.3.2: draw engagement type from the user-configured
12617                // distribution instead of always using the default
12618                // (AnnualAudit). Falls back to the default when all
12619                // probabilities are zero.
12620                let eng_type =
12621                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12622
12623                // Generate the engagement
12624                let mut engagement = engagement_gen.generate_engagement(
12625                    &company.code,
12626                    &company.name,
12627                    fiscal_year,
12628                    period_end,
12629                    company_revenue,
12630                    Some(eng_type),
12631                );
12632
12633                // Replace synthetic team IDs with real employee IDs from master data
12634                if !self.master_data.employees.is_empty() {
12635                    let emp_count = self.master_data.employees.len();
12636                    // Use employee IDs deterministically based on engagement index
12637                    let base = (i * 10 + _eng_idx) % emp_count;
12638                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12639                        .employee_id
12640                        .clone();
12641                    engagement.engagement_manager_id = self.master_data.employees
12642                        [(base + 1) % emp_count]
12643                        .employee_id
12644                        .clone();
12645                    let real_team: Vec<String> = engagement
12646                        .team_member_ids
12647                        .iter()
12648                        .enumerate()
12649                        .map(|(j, _)| {
12650                            self.master_data.employees[(base + 2 + j) % emp_count]
12651                                .employee_id
12652                                .clone()
12653                        })
12654                        .collect();
12655                    engagement.team_member_ids = real_team;
12656                }
12657
12658                if let Some(pb) = &pb {
12659                    pb.inc(1);
12660                }
12661
12662                // Get team members from the engagement
12663                let team_members: Vec<String> = engagement.team_member_ids.clone();
12664
12665                // Generate workpapers for the engagement.
12666                // v3.3.2: honor `audit.generate_workpapers` — when false,
12667                // workpapers (and dependent evidence) are skipped while
12668                // the engagement itself, risk assessments, findings, etc.
12669                // still generate normally.
12670                let workpapers = if self.config.audit.generate_workpapers {
12671                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12672                } else {
12673                    Vec::new()
12674                };
12675
12676                for wp in &workpapers {
12677                    if let Some(pb) = &pb {
12678                        pb.inc(1);
12679                    }
12680
12681                    // Generate evidence for each workpaper
12682                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12683                        wp,
12684                        &team_members,
12685                        wp.preparer_date,
12686                    );
12687
12688                    for _ in &evidence {
12689                        if let Some(pb) = &pb {
12690                            pb.inc(1);
12691                        }
12692                    }
12693
12694                    snapshot.evidence.extend(evidence);
12695                }
12696
12697                // Generate risk assessments for the engagement
12698                let risks =
12699                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12700
12701                for _ in &risks {
12702                    if let Some(pb) = &pb {
12703                        pb.inc(1);
12704                    }
12705                }
12706                snapshot.risk_assessments.extend(risks);
12707
12708                // Generate findings for the engagement
12709                let findings = finding_gen.generate_findings_for_engagement(
12710                    &engagement,
12711                    &workpapers,
12712                    &team_members,
12713                );
12714
12715                for _ in &findings {
12716                    if let Some(pb) = &pb {
12717                        pb.inc(1);
12718                    }
12719                }
12720                snapshot.findings.extend(findings);
12721
12722                // Generate professional judgments for the engagement
12723                let judgments =
12724                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12725
12726                for _ in &judgments {
12727                    if let Some(pb) = &pb {
12728                        pb.inc(1);
12729                    }
12730                }
12731                snapshot.judgments.extend(judgments);
12732
12733                // ISA 505: External confirmations and responses
12734                let (confs, resps) =
12735                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12736                snapshot.confirmations.extend(confs);
12737                snapshot.confirmation_responses.extend(resps);
12738
12739                // ISA 330: Procedure steps per workpaper
12740                let team_pairs: Vec<(String, String)> = team_members
12741                    .iter()
12742                    .map(|id| {
12743                        let name = self
12744                            .master_data
12745                            .employees
12746                            .iter()
12747                            .find(|e| e.employee_id == *id)
12748                            .map(|e| e.display_name.clone())
12749                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12750                        (id.clone(), name)
12751                    })
12752                    .collect();
12753                for wp in &workpapers {
12754                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12755                    snapshot.procedure_steps.extend(steps);
12756                }
12757
12758                // ISA 530: Samples per workpaper
12759                for wp in &workpapers {
12760                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12761                        snapshot.samples.push(sample);
12762                    }
12763                }
12764
12765                // ISA 520: Analytical procedures
12766                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12767                snapshot.analytical_results.extend(analytical);
12768
12769                // ISA 610: Internal audit function and reports
12770                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12771                snapshot.ia_functions.push(ia_func);
12772                snapshot.ia_reports.extend(ia_reports);
12773
12774                // ISA 550: Related parties and transactions
12775                let vendor_names: Vec<String> = self
12776                    .master_data
12777                    .vendors
12778                    .iter()
12779                    .map(|v| v.name.clone())
12780                    .collect();
12781                let customer_names: Vec<String> = self
12782                    .master_data
12783                    .customers
12784                    .iter()
12785                    .map(|c| c.name.clone())
12786                    .collect();
12787                let (parties, rp_txns) =
12788                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12789                snapshot.related_parties.extend(parties);
12790                snapshot.related_party_transactions.extend(rp_txns);
12791
12792                // Add workpapers after findings since findings need them
12793                snapshot.workpapers.extend(workpapers);
12794
12795                // Generate audit scope record for this engagement (one per engagement)
12796                {
12797                    let scope_id = format!(
12798                        "SCOPE-{}-{}",
12799                        engagement.engagement_id.simple(),
12800                        &engagement.client_entity_id
12801                    );
12802                    let scope = datasynth_core::models::audit::AuditScope::new(
12803                        scope_id.clone(),
12804                        engagement.engagement_id.to_string(),
12805                        engagement.client_entity_id.clone(),
12806                        engagement.materiality,
12807                    );
12808                    // Wire scope_id back to engagement
12809                    let mut eng = engagement;
12810                    eng.scope_id = Some(scope_id);
12811                    snapshot.audit_scopes.push(scope);
12812                    snapshot.engagements.push(eng);
12813                }
12814            }
12815        }
12816
12817        // ----------------------------------------------------------------
12818        // ISA 600: Group audit — component auditors, plan, instructions, reports
12819        // ----------------------------------------------------------------
12820        if self.config.companies.len() > 1 {
12821            // Use materiality from the first engagement if available, otherwise
12822            // derive a reasonable figure from total revenue.
12823            let group_materiality = snapshot
12824                .engagements
12825                .first()
12826                .map(|e| e.materiality)
12827                .unwrap_or_else(|| {
12828                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12829                    total_revenue * pct
12830                });
12831
12832            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12833            let group_engagement_id = snapshot
12834                .engagements
12835                .first()
12836                .map(|e| e.engagement_id.to_string())
12837                .unwrap_or_else(|| "GROUP-ENG".to_string());
12838
12839            let component_snapshot = component_gen.generate(
12840                &self.config.companies,
12841                group_materiality,
12842                &group_engagement_id,
12843                period_end,
12844            );
12845
12846            snapshot.component_auditors = component_snapshot.component_auditors;
12847            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12848            snapshot.component_instructions = component_snapshot.component_instructions;
12849            snapshot.component_reports = component_snapshot.component_reports;
12850
12851            info!(
12852                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12853                snapshot.component_auditors.len(),
12854                snapshot.component_instructions.len(),
12855                snapshot.component_reports.len(),
12856            );
12857        }
12858
12859        // ----------------------------------------------------------------
12860        // ISA 210: Engagement letters — one per engagement
12861        // ----------------------------------------------------------------
12862        {
12863            let applicable_framework = self
12864                .config
12865                .accounting_standards
12866                .framework
12867                .as_ref()
12868                .map(|f| format!("{f:?}"))
12869                .unwrap_or_else(|| "IFRS".to_string());
12870
12871            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12872            let entity_count = self.config.companies.len();
12873
12874            for engagement in &snapshot.engagements {
12875                let company = self
12876                    .config
12877                    .companies
12878                    .iter()
12879                    .find(|c| c.code == engagement.client_entity_id);
12880                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12881                let letter_date = engagement.planning_start;
12882                let letter = letter_gen.generate(
12883                    &engagement.engagement_id.to_string(),
12884                    &engagement.client_name,
12885                    entity_count,
12886                    engagement.period_end_date,
12887                    currency,
12888                    &applicable_framework,
12889                    letter_date,
12890                );
12891                snapshot.engagement_letters.push(letter);
12892            }
12893
12894            info!(
12895                "ISA 210 engagement letters: {} generated",
12896                snapshot.engagement_letters.len()
12897            );
12898        }
12899
12900        // ----------------------------------------------------------------
12901        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12902        // ----------------------------------------------------------------
12903        if self.phase_config.generate_legal_documents {
12904            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12905            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12906            for engagement in &snapshot.engagements {
12907                // Build an employee name list for signatory drawing —
12908                // prefer employees from the engaged entity, fall back to
12909                // all employees.
12910                let employee_names: Vec<String> = self
12911                    .master_data
12912                    .employees
12913                    .iter()
12914                    .filter(|e| e.company_code == engagement.client_entity_id)
12915                    .map(|e| e.display_name.clone())
12916                    .collect();
12917                let names_to_use = if !employee_names.is_empty() {
12918                    employee_names
12919                } else {
12920                    self.master_data
12921                        .employees
12922                        .iter()
12923                        .take(10)
12924                        .map(|e| e.display_name.clone())
12925                        .collect()
12926                };
12927                let docs = legal_gen.generate(
12928                    &engagement.client_entity_id,
12929                    engagement.fiscal_year as i32,
12930                    &names_to_use,
12931                );
12932                snapshot.legal_documents.extend(docs);
12933            }
12934            info!(
12935                "v3.3.0 legal documents: {} emitted across {} engagements",
12936                snapshot.legal_documents.len(),
12937                snapshot.engagements.len()
12938            );
12939        }
12940
12941        // ----------------------------------------------------------------
12942        // v3.3.0: IT general controls — access logs + change records
12943        //
12944        // `ItControlsGenerator` runs one pass per company (not per
12945        // engagement) so employee sets and system catalogs stay
12946        // coherent. We derive the period from the earliest engagement's
12947        // planning_start through the latest engagement's period_end_date
12948        // for each company.
12949        // ----------------------------------------------------------------
12950        if self.phase_config.generate_it_controls {
12951            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12952            use std::collections::HashMap;
12953            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12954
12955            // Group engagements by company to produce one IT-controls
12956            // window per entity.
12957            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12958                HashMap::new();
12959            for engagement in &snapshot.engagements {
12960                let entry = by_company
12961                    .entry(engagement.client_entity_id.clone())
12962                    .or_insert((engagement.planning_start, engagement.period_end_date));
12963                if engagement.planning_start < entry.0 {
12964                    entry.0 = engagement.planning_start;
12965                }
12966                if engagement.period_end_date > entry.1 {
12967                    entry.1 = engagement.period_end_date;
12968                }
12969            }
12970
12971            // Standard system catalog — populated from known ERP / app
12972            // names. Keeps the generator's data shape stable when the
12973            // user hasn't configured IT-system naming separately.
12974            let systems: Vec<String> = vec![
12975                "SAP ECC",
12976                "SAP S/4 HANA",
12977                "Oracle EBS",
12978                "Workday",
12979                "NetSuite",
12980                "Active Directory",
12981                "SharePoint",
12982                "Salesforce",
12983                "ServiceNow",
12984                "Jira",
12985                "GitHub Enterprise",
12986                "AWS Console",
12987                "Okta",
12988            ]
12989            .into_iter()
12990            .map(String::from)
12991            .collect();
12992
12993            for (company_code, (start, end)) in by_company {
12994                let emps: Vec<(String, String)> = self
12995                    .master_data
12996                    .employees
12997                    .iter()
12998                    .filter(|e| e.company_code == company_code)
12999                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13000                    .collect();
13001                if emps.is_empty() {
13002                    continue;
13003                }
13004                // Compute period in months, rounded up to the nearest
13005                // whole month (min 1).
13006                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13007                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13008                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13009                snapshot.it_controls_access_logs.extend(access_logs);
13010                snapshot.it_controls_change_records.extend(change_records);
13011            }
13012
13013            info!(
13014                "v3.3.0 IT controls: {} access logs, {} change records",
13015                snapshot.it_controls_access_logs.len(),
13016                snapshot.it_controls_change_records.len()
13017            );
13018        }
13019
13020        // ----------------------------------------------------------------
13021        // ISA 560 / IAS 10: Subsequent events
13022        // ----------------------------------------------------------------
13023        {
13024            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13025            let entity_codes: Vec<String> = self
13026                .config
13027                .companies
13028                .iter()
13029                .map(|c| c.code.clone())
13030                .collect();
13031            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13032            info!(
13033                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13034                subsequent.len(),
13035                subsequent
13036                    .iter()
13037                    .filter(|e| matches!(
13038                        e.classification,
13039                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13040                    ))
13041                    .count(),
13042                subsequent
13043                    .iter()
13044                    .filter(|e| matches!(
13045                        e.classification,
13046                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13047                    ))
13048                    .count(),
13049            );
13050            snapshot.subsequent_events = subsequent;
13051        }
13052
13053        // ----------------------------------------------------------------
13054        // ISA 402: Service organization controls
13055        // ----------------------------------------------------------------
13056        {
13057            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13058            let entity_codes: Vec<String> = self
13059                .config
13060                .companies
13061                .iter()
13062                .map(|c| c.code.clone())
13063                .collect();
13064            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13065            info!(
13066                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13067                soc_snapshot.service_organizations.len(),
13068                soc_snapshot.soc_reports.len(),
13069                soc_snapshot.user_entity_controls.len(),
13070            );
13071            snapshot.service_organizations = soc_snapshot.service_organizations;
13072            snapshot.soc_reports = soc_snapshot.soc_reports;
13073            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13074        }
13075
13076        // ----------------------------------------------------------------
13077        // ISA 570: Going concern assessments
13078        // ----------------------------------------------------------------
13079        {
13080            use datasynth_generators::audit::going_concern_generator::{
13081                GoingConcernGenerator, GoingConcernInput,
13082            };
13083            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13084            let entity_codes: Vec<String> = self
13085                .config
13086                .companies
13087                .iter()
13088                .map(|c| c.code.clone())
13089                .collect();
13090            // Assessment date = period end + 75 days (typical sign-off window).
13091            let assessment_date = period_end + chrono::Duration::days(75);
13092            let period_label = format!("FY{}", period_end.year());
13093
13094            // Build financial inputs from actual journal entries.
13095            //
13096            // We derive approximate P&L, working capital, and operating cash flow
13097            // by aggregating GL account balances from the journal entry population.
13098            // Account ranges used (standard chart):
13099            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
13100            //   Expenses:        6xxx (debit-normal)
13101            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
13102            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
13103            //   Operating CF:    net income adjusted for D&A (rough proxy)
13104            let gc_inputs: Vec<GoingConcernInput> = self
13105                .config
13106                .companies
13107                .iter()
13108                .map(|company| {
13109                    let code = &company.code;
13110                    let mut revenue = rust_decimal::Decimal::ZERO;
13111                    let mut expenses = rust_decimal::Decimal::ZERO;
13112                    let mut current_assets = rust_decimal::Decimal::ZERO;
13113                    let mut current_liabs = rust_decimal::Decimal::ZERO;
13114                    let mut total_debt = rust_decimal::Decimal::ZERO;
13115
13116                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
13117                        for line in &je.lines {
13118                            let acct = line.gl_account.as_str();
13119                            let net = line.debit_amount - line.credit_amount;
13120                            if acct.starts_with('4') {
13121                                // Revenue accounts: credit-normal, so negative net = revenue earned
13122                                revenue -= net;
13123                            } else if acct.starts_with('6') {
13124                                // Expense accounts: debit-normal
13125                                expenses += net;
13126                            }
13127                            // Balance sheet accounts for working capital
13128                            if acct.starts_with('1') {
13129                                // Current asset accounts (1000–1499)
13130                                if let Ok(n) = acct.parse::<u32>() {
13131                                    if (1000..=1499).contains(&n) {
13132                                        current_assets += net;
13133                                    }
13134                                }
13135                            } else if acct.starts_with('2') {
13136                                if let Ok(n) = acct.parse::<u32>() {
13137                                    if (2000..=2499).contains(&n) {
13138                                        // Current liabilities
13139                                        current_liabs -= net; // credit-normal
13140                                    } else if (2500..=2999).contains(&n) {
13141                                        // Long-term debt
13142                                        total_debt -= net;
13143                                    }
13144                                }
13145                            }
13146                        }
13147                    }
13148
13149                    let net_income = revenue - expenses;
13150                    let working_capital = current_assets - current_liabs;
13151                    // Rough operating CF proxy: net income (full accrual CF calculation
13152                    // is done separately in the cash flow statement generator)
13153                    let operating_cash_flow = net_income;
13154
13155                    GoingConcernInput {
13156                        entity_code: code.clone(),
13157                        net_income,
13158                        working_capital,
13159                        operating_cash_flow,
13160                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13161                        assessment_date,
13162                    }
13163                })
13164                .collect();
13165
13166            let assessments = if gc_inputs.is_empty() {
13167                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13168            } else {
13169                gc_gen.generate_for_entities_with_inputs(
13170                    &entity_codes,
13171                    &gc_inputs,
13172                    assessment_date,
13173                    &period_label,
13174                )
13175            };
13176            info!(
13177                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13178                assessments.len(),
13179                assessments.iter().filter(|a| matches!(
13180                    a.auditor_conclusion,
13181                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13182                )).count(),
13183                assessments.iter().filter(|a| matches!(
13184                    a.auditor_conclusion,
13185                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13186                )).count(),
13187                assessments.iter().filter(|a| matches!(
13188                    a.auditor_conclusion,
13189                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13190                )).count(),
13191            );
13192            snapshot.going_concern_assessments = assessments;
13193        }
13194
13195        // ----------------------------------------------------------------
13196        // ISA 540: Accounting estimates
13197        // ----------------------------------------------------------------
13198        {
13199            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13200            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13201            let entity_codes: Vec<String> = self
13202                .config
13203                .companies
13204                .iter()
13205                .map(|c| c.code.clone())
13206                .collect();
13207            let estimates = est_gen.generate_for_entities(&entity_codes);
13208            info!(
13209                "ISA 540 accounting estimates: {} estimates across {} entities \
13210                 ({} with retrospective reviews, {} with auditor point estimates)",
13211                estimates.len(),
13212                entity_codes.len(),
13213                estimates
13214                    .iter()
13215                    .filter(|e| e.retrospective_review.is_some())
13216                    .count(),
13217                estimates
13218                    .iter()
13219                    .filter(|e| e.auditor_point_estimate.is_some())
13220                    .count(),
13221            );
13222            snapshot.accounting_estimates = estimates;
13223        }
13224
13225        // ----------------------------------------------------------------
13226        // ISA 700/701/705/706: Audit opinions (one per engagement)
13227        // ----------------------------------------------------------------
13228        {
13229            use datasynth_generators::audit::audit_opinion_generator::{
13230                AuditOpinionGenerator, AuditOpinionInput,
13231            };
13232
13233            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13234
13235            // Build inputs — one per engagement, linking findings and going concern.
13236            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13237                .engagements
13238                .iter()
13239                .map(|eng| {
13240                    // Collect findings for this engagement.
13241                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13242                        .findings
13243                        .iter()
13244                        .filter(|f| f.engagement_id == eng.engagement_id)
13245                        .cloned()
13246                        .collect();
13247
13248                    // Going concern for this entity.
13249                    let gc = snapshot
13250                        .going_concern_assessments
13251                        .iter()
13252                        .find(|g| g.entity_code == eng.client_entity_id)
13253                        .cloned();
13254
13255                    // Component reports relevant to this engagement.
13256                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13257                        snapshot.component_reports.clone();
13258
13259                    let auditor = self
13260                        .master_data
13261                        .employees
13262                        .first()
13263                        .map(|e| e.display_name.clone())
13264                        .unwrap_or_else(|| "Global Audit LLP".into());
13265
13266                    let partner = self
13267                        .master_data
13268                        .employees
13269                        .get(1)
13270                        .map(|e| e.display_name.clone())
13271                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
13272
13273                    AuditOpinionInput {
13274                        entity_code: eng.client_entity_id.clone(),
13275                        entity_name: eng.client_name.clone(),
13276                        engagement_id: eng.engagement_id,
13277                        period_end: eng.period_end_date,
13278                        findings: eng_findings,
13279                        going_concern: gc,
13280                        component_reports: comp_reports,
13281                        // Mark as US-listed when audit standards include PCAOB.
13282                        is_us_listed: {
13283                            let fw = &self.config.audit_standards.isa_compliance.framework;
13284                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13285                        },
13286                        auditor_name: auditor,
13287                        engagement_partner: partner,
13288                    }
13289                })
13290                .collect();
13291
13292            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13293
13294            for go in &generated_opinions {
13295                snapshot
13296                    .key_audit_matters
13297                    .extend(go.key_audit_matters.clone());
13298            }
13299            snapshot.audit_opinions = generated_opinions
13300                .into_iter()
13301                .map(|go| go.opinion)
13302                .collect();
13303
13304            info!(
13305                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13306                snapshot.audit_opinions.len(),
13307                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13308                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13309                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13310                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13311            );
13312        }
13313
13314        // ----------------------------------------------------------------
13315        // SOX 302 / 404 assessments
13316        // ----------------------------------------------------------------
13317        {
13318            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13319
13320            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13321
13322            for (i, company) in self.config.companies.iter().enumerate() {
13323                // Collect findings for this company's engagements.
13324                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13325                    .engagements
13326                    .iter()
13327                    .filter(|e| e.client_entity_id == company.code)
13328                    .map(|e| e.engagement_id)
13329                    .collect();
13330
13331                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13332                    .findings
13333                    .iter()
13334                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13335                    .cloned()
13336                    .collect();
13337
13338                // Derive executive names from employee list.
13339                let emp_count = self.master_data.employees.len();
13340                let ceo_name = if emp_count > 0 {
13341                    self.master_data.employees[i % emp_count]
13342                        .display_name
13343                        .clone()
13344                } else {
13345                    format!("CEO of {}", company.name)
13346                };
13347                let cfo_name = if emp_count > 1 {
13348                    self.master_data.employees[(i + 1) % emp_count]
13349                        .display_name
13350                        .clone()
13351                } else {
13352                    format!("CFO of {}", company.name)
13353                };
13354
13355                // Use engagement materiality if available.
13356                let materiality = snapshot
13357                    .engagements
13358                    .iter()
13359                    .find(|e| e.client_entity_id == company.code)
13360                    .map(|e| e.materiality)
13361                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13362
13363                let input = SoxGeneratorInput {
13364                    company_code: company.code.clone(),
13365                    company_name: company.name.clone(),
13366                    fiscal_year,
13367                    period_end,
13368                    findings: company_findings,
13369                    ceo_name,
13370                    cfo_name,
13371                    materiality_threshold: materiality,
13372                    revenue_percent: rust_decimal::Decimal::from(100),
13373                    assets_percent: rust_decimal::Decimal::from(100),
13374                    significant_accounts: vec![
13375                        "Revenue".into(),
13376                        "Accounts Receivable".into(),
13377                        "Inventory".into(),
13378                        "Fixed Assets".into(),
13379                        "Accounts Payable".into(),
13380                    ],
13381                };
13382
13383                let (certs, assessment) = sox_gen.generate(&input);
13384                snapshot.sox_302_certifications.extend(certs);
13385                snapshot.sox_404_assessments.push(assessment);
13386            }
13387
13388            info!(
13389                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13390                snapshot.sox_302_certifications.len(),
13391                snapshot.sox_404_assessments.len(),
13392                snapshot
13393                    .sox_404_assessments
13394                    .iter()
13395                    .filter(|a| a.icfr_effective)
13396                    .count(),
13397                snapshot
13398                    .sox_404_assessments
13399                    .iter()
13400                    .filter(|a| !a.icfr_effective)
13401                    .count(),
13402            );
13403        }
13404
13405        // ----------------------------------------------------------------
13406        // ISA 320: Materiality calculations (one per entity)
13407        // ----------------------------------------------------------------
13408        {
13409            use datasynth_generators::audit::materiality_generator::{
13410                MaterialityGenerator, MaterialityInput,
13411            };
13412
13413            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13414
13415            // Compute per-company financials from JEs.
13416            // Asset accounts start with '1', revenue with '4',
13417            // expense accounts with '5' or '6'.
13418            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13419
13420            for company in &self.config.companies {
13421                let company_code = company.code.clone();
13422
13423                // Revenue: credit-side entries on 4xxx accounts
13424                let company_revenue: rust_decimal::Decimal = entries
13425                    .iter()
13426                    .filter(|e| e.company_code() == company_code)
13427                    .flat_map(|e| e.lines.iter())
13428                    .filter(|l| l.account_code.starts_with('4'))
13429                    .map(|l| l.credit_amount)
13430                    .sum();
13431
13432                // Total assets: debit balances on 1xxx accounts
13433                let total_assets: rust_decimal::Decimal = entries
13434                    .iter()
13435                    .filter(|e| e.company_code() == company_code)
13436                    .flat_map(|e| e.lines.iter())
13437                    .filter(|l| l.account_code.starts_with('1'))
13438                    .map(|l| l.debit_amount)
13439                    .sum();
13440
13441                // Expenses: debit-side entries on 5xxx/6xxx accounts
13442                let total_expenses: rust_decimal::Decimal = entries
13443                    .iter()
13444                    .filter(|e| e.company_code() == company_code)
13445                    .flat_map(|e| e.lines.iter())
13446                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13447                    .map(|l| l.debit_amount)
13448                    .sum();
13449
13450                // Equity: credit balances on 3xxx accounts
13451                let equity: rust_decimal::Decimal = entries
13452                    .iter()
13453                    .filter(|e| e.company_code() == company_code)
13454                    .flat_map(|e| e.lines.iter())
13455                    .filter(|l| l.account_code.starts_with('3'))
13456                    .map(|l| l.credit_amount)
13457                    .sum();
13458
13459                let pretax_income = company_revenue - total_expenses;
13460
13461                // If no company-specific data, fall back to proportional share
13462                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13463                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13464                        .unwrap_or(rust_decimal::Decimal::ONE);
13465                    (
13466                        total_revenue * w,
13467                        total_revenue * w * rust_decimal::Decimal::from(3),
13468                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13469                        total_revenue * w * rust_decimal::Decimal::from(2),
13470                    )
13471                } else {
13472                    (company_revenue, total_assets, pretax_income, equity)
13473                };
13474
13475                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13476
13477                materiality_inputs.push(MaterialityInput {
13478                    entity_code: company_code,
13479                    period: format!("FY{}", fiscal_year),
13480                    revenue: rev,
13481                    pretax_income: pti,
13482                    total_assets: assets,
13483                    equity: eq,
13484                    gross_profit,
13485                });
13486            }
13487
13488            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13489
13490            info!(
13491                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13492                 {} total assets, {} equity benchmarks)",
13493                snapshot.materiality_calculations.len(),
13494                snapshot
13495                    .materiality_calculations
13496                    .iter()
13497                    .filter(|m| matches!(
13498                        m.benchmark,
13499                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13500                    ))
13501                    .count(),
13502                snapshot
13503                    .materiality_calculations
13504                    .iter()
13505                    .filter(|m| matches!(
13506                        m.benchmark,
13507                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13508                    ))
13509                    .count(),
13510                snapshot
13511                    .materiality_calculations
13512                    .iter()
13513                    .filter(|m| matches!(
13514                        m.benchmark,
13515                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13516                    ))
13517                    .count(),
13518                snapshot
13519                    .materiality_calculations
13520                    .iter()
13521                    .filter(|m| matches!(
13522                        m.benchmark,
13523                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13524                    ))
13525                    .count(),
13526            );
13527        }
13528
13529        // ----------------------------------------------------------------
13530        // ISA 315: Combined Risk Assessments (per entity, per account area)
13531        // ----------------------------------------------------------------
13532        {
13533            use datasynth_generators::audit::cra_generator::CraGenerator;
13534
13535            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13536
13537            // Build entity → scope_id map from already-generated scopes
13538            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13539                .audit_scopes
13540                .iter()
13541                .map(|s| (s.entity_code.clone(), s.id.clone()))
13542                .collect();
13543
13544            for company in &self.config.companies {
13545                let cras = cra_gen.generate_for_entity(&company.code, None);
13546                let scope_id = entity_scope_map.get(&company.code).cloned();
13547                let cras_with_scope: Vec<_> = cras
13548                    .into_iter()
13549                    .map(|mut cra| {
13550                        cra.scope_id = scope_id.clone();
13551                        cra
13552                    })
13553                    .collect();
13554                snapshot.combined_risk_assessments.extend(cras_with_scope);
13555            }
13556
13557            let significant_count = snapshot
13558                .combined_risk_assessments
13559                .iter()
13560                .filter(|c| c.significant_risk)
13561                .count();
13562            let high_cra_count = snapshot
13563                .combined_risk_assessments
13564                .iter()
13565                .filter(|c| {
13566                    matches!(
13567                        c.combined_risk,
13568                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13569                    )
13570                })
13571                .count();
13572
13573            info!(
13574                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13575                snapshot.combined_risk_assessments.len(),
13576                significant_count,
13577                high_cra_count,
13578            );
13579        }
13580
13581        // ----------------------------------------------------------------
13582        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13583        // ----------------------------------------------------------------
13584        {
13585            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13586
13587            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13588
13589            // Group CRAs by entity and use per-entity tolerable error from materiality
13590            for company in &self.config.companies {
13591                let entity_code = company.code.clone();
13592
13593                // Find tolerable error for this entity (= performance materiality)
13594                let tolerable_error = snapshot
13595                    .materiality_calculations
13596                    .iter()
13597                    .find(|m| m.entity_code == entity_code)
13598                    .map(|m| m.tolerable_error);
13599
13600                // Collect CRAs for this entity
13601                let entity_cras: Vec<_> = snapshot
13602                    .combined_risk_assessments
13603                    .iter()
13604                    .filter(|c| c.entity_code == entity_code)
13605                    .cloned()
13606                    .collect();
13607
13608                if !entity_cras.is_empty() {
13609                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13610                    snapshot.sampling_plans.extend(plans);
13611                    snapshot.sampled_items.extend(items);
13612                }
13613            }
13614
13615            let misstatement_count = snapshot
13616                .sampled_items
13617                .iter()
13618                .filter(|i| i.misstatement_found)
13619                .count();
13620
13621            info!(
13622                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13623                snapshot.sampling_plans.len(),
13624                snapshot.sampled_items.len(),
13625                misstatement_count,
13626            );
13627        }
13628
13629        // ----------------------------------------------------------------
13630        // ISA 315: Significant Classes of Transactions (SCOTS)
13631        // ----------------------------------------------------------------
13632        {
13633            use datasynth_generators::audit::scots_generator::{
13634                ScotsGenerator, ScotsGeneratorConfig,
13635            };
13636
13637            let ic_enabled = self.config.intercompany.enabled;
13638
13639            let config = ScotsGeneratorConfig {
13640                intercompany_enabled: ic_enabled,
13641                ..ScotsGeneratorConfig::default()
13642            };
13643            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13644
13645            for company in &self.config.companies {
13646                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13647                snapshot
13648                    .significant_transaction_classes
13649                    .extend(entity_scots);
13650            }
13651
13652            let estimation_count = snapshot
13653                .significant_transaction_classes
13654                .iter()
13655                .filter(|s| {
13656                    matches!(
13657                        s.transaction_type,
13658                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13659                    )
13660                })
13661                .count();
13662
13663            info!(
13664                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13665                snapshot.significant_transaction_classes.len(),
13666                estimation_count,
13667            );
13668        }
13669
13670        // ----------------------------------------------------------------
13671        // ISA 520: Unusual Item Markers
13672        // ----------------------------------------------------------------
13673        {
13674            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13675
13676            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13677            let entity_codes: Vec<String> = self
13678                .config
13679                .companies
13680                .iter()
13681                .map(|c| c.code.clone())
13682                .collect();
13683            let unusual_flags =
13684                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13685            info!(
13686                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13687                unusual_flags.len(),
13688                unusual_flags
13689                    .iter()
13690                    .filter(|f| matches!(
13691                        f.severity,
13692                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13693                    ))
13694                    .count(),
13695                unusual_flags
13696                    .iter()
13697                    .filter(|f| matches!(
13698                        f.severity,
13699                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13700                    ))
13701                    .count(),
13702                unusual_flags
13703                    .iter()
13704                    .filter(|f| matches!(
13705                        f.severity,
13706                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13707                    ))
13708                    .count(),
13709            );
13710            snapshot.unusual_items = unusual_flags;
13711        }
13712
13713        // ----------------------------------------------------------------
13714        // ISA 520: Analytical Relationships
13715        // ----------------------------------------------------------------
13716        {
13717            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13718
13719            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13720            let entity_codes: Vec<String> = self
13721                .config
13722                .companies
13723                .iter()
13724                .map(|c| c.code.clone())
13725                .collect();
13726            let current_period_label = format!("FY{fiscal_year}");
13727            let prior_period_label = format!("FY{}", fiscal_year - 1);
13728            let analytical_rels = ar_gen.generate_for_entities(
13729                &entity_codes,
13730                entries,
13731                &current_period_label,
13732                &prior_period_label,
13733            );
13734            let out_of_range = analytical_rels
13735                .iter()
13736                .filter(|r| !r.within_expected_range)
13737                .count();
13738            info!(
13739                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13740                analytical_rels.len(),
13741                out_of_range,
13742            );
13743            snapshot.analytical_relationships = analytical_rels;
13744        }
13745
13746        if let Some(pb) = pb {
13747            pb.finish_with_message(format!(
13748                "Audit data: {} engagements, {} workpapers, {} evidence, \
13749                 {} confirmations, {} procedure steps, {} samples, \
13750                 {} analytical, {} IA funcs, {} related parties, \
13751                 {} component auditors, {} letters, {} subsequent events, \
13752                 {} service orgs, {} going concern, {} accounting estimates, \
13753                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13754                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13755                 {} unusual items, {} analytical relationships",
13756                snapshot.engagements.len(),
13757                snapshot.workpapers.len(),
13758                snapshot.evidence.len(),
13759                snapshot.confirmations.len(),
13760                snapshot.procedure_steps.len(),
13761                snapshot.samples.len(),
13762                snapshot.analytical_results.len(),
13763                snapshot.ia_functions.len(),
13764                snapshot.related_parties.len(),
13765                snapshot.component_auditors.len(),
13766                snapshot.engagement_letters.len(),
13767                snapshot.subsequent_events.len(),
13768                snapshot.service_organizations.len(),
13769                snapshot.going_concern_assessments.len(),
13770                snapshot.accounting_estimates.len(),
13771                snapshot.audit_opinions.len(),
13772                snapshot.key_audit_matters.len(),
13773                snapshot.sox_302_certifications.len(),
13774                snapshot.sox_404_assessments.len(),
13775                snapshot.materiality_calculations.len(),
13776                snapshot.combined_risk_assessments.len(),
13777                snapshot.sampling_plans.len(),
13778                snapshot.significant_transaction_classes.len(),
13779                snapshot.unusual_items.len(),
13780                snapshot.analytical_relationships.len(),
13781            ));
13782        }
13783
13784        // ----------------------------------------------------------------
13785        // PCAOB-ISA cross-reference mappings
13786        // ----------------------------------------------------------------
13787        // Always include the standard PCAOB-ISA mappings when audit generation is
13788        // enabled. These are static reference data (no randomness required) so we
13789        // call standard_mappings() directly.
13790        {
13791            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13792            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13793            debug!(
13794                "PCAOB-ISA mappings generated: {} mappings",
13795                snapshot.isa_pcaob_mappings.len()
13796            );
13797        }
13798
13799        // ----------------------------------------------------------------
13800        // ISA standard reference entries
13801        // ----------------------------------------------------------------
13802        // Emit flat ISA standard reference data (number, title, series) so
13803        // consumers get a machine-readable listing of all 34 ISA standards in
13804        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13805        {
13806            use datasynth_standards::audit::isa_reference::IsaStandard;
13807            snapshot.isa_mappings = IsaStandard::standard_entries();
13808            debug!(
13809                "ISA standard entries generated: {} standards",
13810                snapshot.isa_mappings.len()
13811            );
13812        }
13813
13814        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13815        // For each RPT, find the chronologically closest JE for the engagement's entity.
13816        {
13817            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13818                .engagements
13819                .iter()
13820                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13821                .collect();
13822
13823            for rpt in &mut snapshot.related_party_transactions {
13824                if rpt.journal_entry_id.is_some() {
13825                    continue; // already set
13826                }
13827                let entity = engagement_by_id
13828                    .get(&rpt.engagement_id.to_string())
13829                    .copied()
13830                    .unwrap_or("");
13831
13832                // Find closest JE by date in the entity's company
13833                let best_je = entries
13834                    .iter()
13835                    .filter(|je| je.header.company_code == entity)
13836                    .min_by_key(|je| {
13837                        (je.header.posting_date - rpt.transaction_date)
13838                            .num_days()
13839                            .abs()
13840                    });
13841
13842                if let Some(je) = best_je {
13843                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13844                }
13845            }
13846
13847            let linked = snapshot
13848                .related_party_transactions
13849                .iter()
13850                .filter(|t| t.journal_entry_id.is_some())
13851                .count();
13852            debug!(
13853                "Linked {}/{} related party transactions to journal entries",
13854                linked,
13855                snapshot.related_party_transactions.len()
13856            );
13857        }
13858
13859        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13860        // One opinion per engagement, derived from that engagement's findings,
13861        // going-concern assessment, and any component-auditor reports. Fills
13862        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13863        // export.
13864        if !snapshot.engagements.is_empty() {
13865            use datasynth_generators::audit_opinion_generator::{
13866                AuditOpinionGenerator, AuditOpinionInput,
13867            };
13868
13869            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13870            let inputs: Vec<AuditOpinionInput> = snapshot
13871                .engagements
13872                .iter()
13873                .map(|eng| {
13874                    let findings = snapshot
13875                        .findings
13876                        .iter()
13877                        .filter(|f| f.engagement_id == eng.engagement_id)
13878                        .cloned()
13879                        .collect();
13880                    let going_concern = snapshot
13881                        .going_concern_assessments
13882                        .iter()
13883                        .find(|gc| gc.entity_code == eng.client_entity_id)
13884                        .cloned();
13885                    // ComponentAuditorReport doesn't carry an engagement id, but
13886                    // component scope is keyed by `entity_code`, so filter on that.
13887                    let component_reports = snapshot
13888                        .component_reports
13889                        .iter()
13890                        .filter(|r| r.entity_code == eng.client_entity_id)
13891                        .cloned()
13892                        .collect();
13893
13894                    AuditOpinionInput {
13895                        entity_code: eng.client_entity_id.clone(),
13896                        entity_name: eng.client_name.clone(),
13897                        engagement_id: eng.engagement_id,
13898                        period_end: eng.period_end_date,
13899                        findings,
13900                        going_concern,
13901                        component_reports,
13902                        is_us_listed: matches!(
13903                            eng.engagement_type,
13904                            datasynth_core::audit::EngagementType::IntegratedAudit
13905                                | datasynth_core::audit::EngagementType::Sox404
13906                        ),
13907                        auditor_name: "DataSynth Audit LLP".to_string(),
13908                        engagement_partner: "Engagement Partner".to_string(),
13909                    }
13910                })
13911                .collect();
13912
13913            let generated = opinion_gen.generate_batch(&inputs);
13914            for g in generated {
13915                snapshot.key_audit_matters.extend(g.key_audit_matters);
13916                snapshot.audit_opinions.push(g.opinion);
13917            }
13918            debug!(
13919                "Generated {} audit opinions with {} key audit matters",
13920                snapshot.audit_opinions.len(),
13921                snapshot.key_audit_matters.len()
13922            );
13923        }
13924
13925        Ok(snapshot)
13926    }
13927
13928    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13929    ///
13930    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13931    /// from the current orchestrator state, runs the FSM engine, and maps the
13932    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13933    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13934    fn generate_audit_data_with_fsm(
13935        &mut self,
13936        entries: &[JournalEntry],
13937    ) -> SynthResult<AuditSnapshot> {
13938        use datasynth_audit_fsm::{
13939            context::EngagementContext,
13940            engine::AuditFsmEngine,
13941            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13942        };
13943        use rand::SeedableRng;
13944        use rand_chacha::ChaCha8Rng;
13945
13946        info!("Audit FSM: generating audit data via FSM engine");
13947
13948        let fsm_config = self
13949            .config
13950            .audit
13951            .fsm
13952            .as_ref()
13953            .expect("FSM config must be present when FSM is enabled");
13954
13955        // 1. Load blueprint from config string.
13956        let bwp = match fsm_config.blueprint.as_str() {
13957            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13958            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13959            _ => {
13960                warn!(
13961                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13962                    fsm_config.blueprint
13963                );
13964                BlueprintWithPreconditions::load_builtin_fsa()
13965            }
13966        }
13967        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13968
13969        // 2. Load overlay from config string.
13970        let overlay = match fsm_config.overlay.as_str() {
13971            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13972            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13973            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13974            _ => {
13975                warn!(
13976                    "Unknown FSM overlay '{}', falling back to builtin:default",
13977                    fsm_config.overlay
13978                );
13979                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13980            }
13981        }
13982        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13983
13984        // 3. Build EngagementContext from orchestrator state.
13985        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13986            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13987        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13988
13989        // Determine the engagement entity early so we can filter JEs.
13990        let company = self.config.companies.first();
13991        let company_code = company
13992            .map(|c| c.code.clone())
13993            .unwrap_or_else(|| "UNKNOWN".to_string());
13994        let company_name = company
13995            .map(|c| c.name.clone())
13996            .unwrap_or_else(|| "Unknown Company".to_string());
13997        let currency = company
13998            .map(|c| c.currency.clone())
13999            .unwrap_or_else(|| "USD".to_string());
14000
14001        // Filter JEs to the engagement entity for single-company coherence.
14002        let entity_entries: Vec<_> = entries
14003            .iter()
14004            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14005            .cloned()
14006            .collect();
14007        let entries = &entity_entries; // Shadow the parameter for remaining usage
14008
14009        // Financial aggregates from journal entries.
14010        let total_revenue: rust_decimal::Decimal = entries
14011            .iter()
14012            .flat_map(|e| e.lines.iter())
14013            .filter(|l| l.account_code.starts_with('4'))
14014            .map(|l| l.credit_amount - l.debit_amount)
14015            .sum();
14016
14017        let total_assets: rust_decimal::Decimal = entries
14018            .iter()
14019            .flat_map(|e| e.lines.iter())
14020            .filter(|l| l.account_code.starts_with('1'))
14021            .map(|l| l.debit_amount - l.credit_amount)
14022            .sum();
14023
14024        let total_expenses: rust_decimal::Decimal = entries
14025            .iter()
14026            .flat_map(|e| e.lines.iter())
14027            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14028            .map(|l| l.debit_amount)
14029            .sum();
14030
14031        let equity: rust_decimal::Decimal = entries
14032            .iter()
14033            .flat_map(|e| e.lines.iter())
14034            .filter(|l| l.account_code.starts_with('3'))
14035            .map(|l| l.credit_amount - l.debit_amount)
14036            .sum();
14037
14038        let total_debt: rust_decimal::Decimal = entries
14039            .iter()
14040            .flat_map(|e| e.lines.iter())
14041            .filter(|l| l.account_code.starts_with('2'))
14042            .map(|l| l.credit_amount - l.debit_amount)
14043            .sum();
14044
14045        let pretax_income = total_revenue - total_expenses;
14046
14047        let cogs: rust_decimal::Decimal = entries
14048            .iter()
14049            .flat_map(|e| e.lines.iter())
14050            .filter(|l| l.account_code.starts_with('5'))
14051            .map(|l| l.debit_amount)
14052            .sum();
14053        let gross_profit = total_revenue - cogs;
14054
14055        let current_assets: rust_decimal::Decimal = entries
14056            .iter()
14057            .flat_map(|e| e.lines.iter())
14058            .filter(|l| {
14059                l.account_code.starts_with("10")
14060                    || l.account_code.starts_with("11")
14061                    || l.account_code.starts_with("12")
14062                    || l.account_code.starts_with("13")
14063            })
14064            .map(|l| l.debit_amount - l.credit_amount)
14065            .sum();
14066        let current_liabilities: rust_decimal::Decimal = entries
14067            .iter()
14068            .flat_map(|e| e.lines.iter())
14069            .filter(|l| {
14070                l.account_code.starts_with("20")
14071                    || l.account_code.starts_with("21")
14072                    || l.account_code.starts_with("22")
14073            })
14074            .map(|l| l.credit_amount - l.debit_amount)
14075            .sum();
14076        let working_capital = current_assets - current_liabilities;
14077
14078        let depreciation: rust_decimal::Decimal = entries
14079            .iter()
14080            .flat_map(|e| e.lines.iter())
14081            .filter(|l| l.account_code.starts_with("60"))
14082            .map(|l| l.debit_amount)
14083            .sum();
14084        let operating_cash_flow = pretax_income + depreciation;
14085
14086        // GL accounts for reference data.
14087        let accounts: Vec<String> = self
14088            .coa
14089            .as_ref()
14090            .map(|coa| {
14091                coa.get_postable_accounts()
14092                    .iter()
14093                    .map(|acc| acc.account_code().to_string())
14094                    .collect()
14095            })
14096            .unwrap_or_default();
14097
14098        // Team member IDs and display names from master data.
14099        let team_member_ids: Vec<String> = self
14100            .master_data
14101            .employees
14102            .iter()
14103            .take(8) // Cap team size
14104            .map(|e| e.employee_id.clone())
14105            .collect();
14106        let team_member_pairs: Vec<(String, String)> = self
14107            .master_data
14108            .employees
14109            .iter()
14110            .take(8)
14111            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14112            .collect();
14113
14114        let vendor_names: Vec<String> = self
14115            .master_data
14116            .vendors
14117            .iter()
14118            .map(|v| v.name.clone())
14119            .collect();
14120        let customer_names: Vec<String> = self
14121            .master_data
14122            .customers
14123            .iter()
14124            .map(|c| c.name.clone())
14125            .collect();
14126
14127        let entity_codes: Vec<String> = self
14128            .config
14129            .companies
14130            .iter()
14131            .map(|c| c.code.clone())
14132            .collect();
14133
14134        // Journal entry IDs for evidence tracing (sample up to 50).
14135        let journal_entry_ids: Vec<String> = entries
14136            .iter()
14137            .take(50)
14138            .map(|e| e.header.document_id.to_string())
14139            .collect();
14140
14141        // Account balances for risk weighting (aggregate debit - credit per account).
14142        let mut account_balances = std::collections::HashMap::<String, f64>::new();
14143        for entry in entries {
14144            for line in &entry.lines {
14145                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14146                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14147                *account_balances
14148                    .entry(line.account_code.clone())
14149                    .or_insert(0.0) += debit_f64 - credit_f64;
14150            }
14151        }
14152
14153        // Internal control IDs and anomaly refs are populated by the
14154        // caller when available; here we default to empty because the
14155        // orchestrator state may not have generated controls/anomalies
14156        // yet at this point in the pipeline.
14157        let control_ids: Vec<String> = Vec::new();
14158        let anomaly_refs: Vec<String> = Vec::new();
14159
14160        let mut context = EngagementContext {
14161            company_code,
14162            company_name,
14163            fiscal_year: start_date.year(),
14164            currency,
14165            total_revenue,
14166            total_assets,
14167            engagement_start: start_date,
14168            report_date: period_end,
14169            pretax_income,
14170            equity,
14171            gross_profit,
14172            working_capital,
14173            operating_cash_flow,
14174            total_debt,
14175            team_member_ids,
14176            team_member_pairs,
14177            accounts,
14178            vendor_names,
14179            customer_names,
14180            journal_entry_ids,
14181            account_balances,
14182            control_ids,
14183            anomaly_refs,
14184            journal_entries: entries.to_vec(),
14185            is_us_listed: false,
14186            entity_codes,
14187            auditor_firm_name: "DataSynth Audit LLP".into(),
14188            accounting_framework: self
14189                .config
14190                .accounting_standards
14191                .framework
14192                .map(|f| match f {
14193                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14194                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14195                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14196                        "French GAAP"
14197                    }
14198                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14199                        "German GAAP"
14200                    }
14201                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14202                        "Dual Reporting"
14203                    }
14204                })
14205                .unwrap_or("IFRS")
14206                .into(),
14207        };
14208
14209        // 4. Create and run the FSM engine.
14210        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14211        let rng = ChaCha8Rng::seed_from_u64(seed);
14212        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14213
14214        let mut result = engine
14215            .run_engagement(&context)
14216            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14217
14218        info!(
14219            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14220             {} phases completed, duration {:.1}h",
14221            result.event_log.len(),
14222            result.artifacts.total_artifacts(),
14223            result.anomalies.len(),
14224            result.phases_completed.len(),
14225            result.total_duration_hours,
14226        );
14227
14228        // 4b. Populate financial data in the artifact bag for downstream consumers.
14229        let tb_entity = context.company_code.clone();
14230        let tb_fy = context.fiscal_year;
14231        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14232        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14233            entries,
14234            &tb_entity,
14235            tb_fy,
14236            self.coa.as_ref().map(|c| c.as_ref()),
14237        );
14238
14239        // 5. Map ArtifactBag fields to AuditSnapshot.
14240        let bag = result.artifacts;
14241        let mut snapshot = AuditSnapshot {
14242            engagements: bag.engagements,
14243            engagement_letters: bag.engagement_letters,
14244            materiality_calculations: bag.materiality_calculations,
14245            risk_assessments: bag.risk_assessments,
14246            combined_risk_assessments: bag.combined_risk_assessments,
14247            workpapers: bag.workpapers,
14248            evidence: bag.evidence,
14249            findings: bag.findings,
14250            judgments: bag.judgments,
14251            sampling_plans: bag.sampling_plans,
14252            sampled_items: bag.sampled_items,
14253            analytical_results: bag.analytical_results,
14254            going_concern_assessments: bag.going_concern_assessments,
14255            subsequent_events: bag.subsequent_events,
14256            audit_opinions: bag.audit_opinions,
14257            key_audit_matters: bag.key_audit_matters,
14258            procedure_steps: bag.procedure_steps,
14259            samples: bag.samples,
14260            confirmations: bag.confirmations,
14261            confirmation_responses: bag.confirmation_responses,
14262            // Store the event trail for downstream export.
14263            fsm_event_trail: Some(result.event_log),
14264            // Fields not produced by the FSM engine remain at their defaults.
14265            ..Default::default()
14266        };
14267
14268        // 6. Add static reference data (same as legacy path).
14269        {
14270            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14271            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14272        }
14273        {
14274            use datasynth_standards::audit::isa_reference::IsaStandard;
14275            snapshot.isa_mappings = IsaStandard::standard_entries();
14276        }
14277
14278        info!(
14279            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14280             {} risk assessments, {} findings, {} materiality calcs",
14281            snapshot.engagements.len(),
14282            snapshot.workpapers.len(),
14283            snapshot.evidence.len(),
14284            snapshot.risk_assessments.len(),
14285            snapshot.findings.len(),
14286            snapshot.materiality_calculations.len(),
14287        );
14288
14289        Ok(snapshot)
14290    }
14291
14292    /// Export journal entries as graph data for ML training and network reconstruction.
14293    ///
14294    /// Builds a transaction graph where:
14295    /// - Nodes are GL accounts
14296    /// - Edges are money flows from credit to debit accounts
14297    /// - Edge attributes include amount, date, business process, anomaly flags
14298    fn export_graphs(
14299        &mut self,
14300        entries: &[JournalEntry],
14301        _coa: &Arc<ChartOfAccounts>,
14302        stats: &mut EnhancedGenerationStatistics,
14303    ) -> SynthResult<GraphExportSnapshot> {
14304        let pb = self.create_progress_bar(100, "Exporting Graphs");
14305
14306        let mut snapshot = GraphExportSnapshot::default();
14307
14308        // Get output directory
14309        let output_dir = self
14310            .output_path
14311            .clone()
14312            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14313        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14314
14315        // Process each graph type configuration
14316        for graph_type in &self.config.graph_export.graph_types {
14317            if let Some(pb) = &pb {
14318                pb.inc(10);
14319            }
14320
14321            // Build transaction graph
14322            let graph_config = TransactionGraphConfig {
14323                include_vendors: false,
14324                include_customers: false,
14325                create_debit_credit_edges: true,
14326                include_document_nodes: graph_type.include_document_nodes,
14327                min_edge_weight: graph_type.min_edge_weight,
14328                aggregate_parallel_edges: graph_type.aggregate_edges,
14329                framework: None,
14330            };
14331
14332            let mut builder = TransactionGraphBuilder::new(graph_config);
14333            builder.add_journal_entries(entries);
14334            let graph = builder.build();
14335
14336            // Update stats
14337            stats.graph_node_count += graph.node_count();
14338            stats.graph_edge_count += graph.edge_count();
14339
14340            if let Some(pb) = &pb {
14341                pb.inc(40);
14342            }
14343
14344            // Export to each configured format
14345            for format in &self.config.graph_export.formats {
14346                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14347
14348                // Create output directory
14349                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14350                    warn!("Failed to create graph output directory: {}", e);
14351                    continue;
14352                }
14353
14354                match format {
14355                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14356                        let pyg_config = PyGExportConfig {
14357                            common: datasynth_graph::CommonExportConfig {
14358                                export_node_features: true,
14359                                export_edge_features: true,
14360                                export_node_labels: true,
14361                                export_edge_labels: true,
14362                                export_masks: true,
14363                                train_ratio: self.config.graph_export.train_ratio,
14364                                val_ratio: self.config.graph_export.validation_ratio,
14365                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14366                            },
14367                            one_hot_categoricals: false,
14368                        };
14369
14370                        let exporter = PyGExporter::new(pyg_config);
14371                        match exporter.export(&graph, &format_dir) {
14372                            Ok(metadata) => {
14373                                snapshot.exports.insert(
14374                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14375                                    GraphExportInfo {
14376                                        name: graph_type.name.clone(),
14377                                        format: "pytorch_geometric".to_string(),
14378                                        output_path: format_dir.clone(),
14379                                        node_count: metadata.num_nodes,
14380                                        edge_count: metadata.num_edges,
14381                                    },
14382                                );
14383                                snapshot.graph_count += 1;
14384                            }
14385                            Err(e) => {
14386                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14387                            }
14388                        }
14389                    }
14390                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14391                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14392
14393                        let neo4j_config = Neo4jExportConfig {
14394                            export_node_properties: true,
14395                            export_edge_properties: true,
14396                            export_features: true,
14397                            generate_cypher: true,
14398                            generate_admin_import: true,
14399                            database_name: "synth".to_string(),
14400                            cypher_batch_size: 1000,
14401                        };
14402
14403                        let exporter = Neo4jExporter::new(neo4j_config);
14404                        match exporter.export(&graph, &format_dir) {
14405                            Ok(metadata) => {
14406                                snapshot.exports.insert(
14407                                    format!("{}_{}", graph_type.name, "neo4j"),
14408                                    GraphExportInfo {
14409                                        name: graph_type.name.clone(),
14410                                        format: "neo4j".to_string(),
14411                                        output_path: format_dir.clone(),
14412                                        node_count: metadata.num_nodes,
14413                                        edge_count: metadata.num_edges,
14414                                    },
14415                                );
14416                                snapshot.graph_count += 1;
14417                            }
14418                            Err(e) => {
14419                                warn!("Failed to export Neo4j graph: {}", e);
14420                            }
14421                        }
14422                    }
14423                    datasynth_config::schema::GraphExportFormat::Dgl => {
14424                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14425
14426                        let dgl_config = DGLExportConfig {
14427                            common: datasynth_graph::CommonExportConfig {
14428                                export_node_features: true,
14429                                export_edge_features: true,
14430                                export_node_labels: true,
14431                                export_edge_labels: true,
14432                                export_masks: true,
14433                                train_ratio: self.config.graph_export.train_ratio,
14434                                val_ratio: self.config.graph_export.validation_ratio,
14435                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14436                            },
14437                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14438                            include_pickle_script: true, // DGL ecosystem standard helper
14439                        };
14440
14441                        let exporter = DGLExporter::new(dgl_config);
14442                        match exporter.export(&graph, &format_dir) {
14443                            Ok(metadata) => {
14444                                snapshot.exports.insert(
14445                                    format!("{}_{}", graph_type.name, "dgl"),
14446                                    GraphExportInfo {
14447                                        name: graph_type.name.clone(),
14448                                        format: "dgl".to_string(),
14449                                        output_path: format_dir.clone(),
14450                                        node_count: metadata.common.num_nodes,
14451                                        edge_count: metadata.common.num_edges,
14452                                    },
14453                                );
14454                                snapshot.graph_count += 1;
14455                            }
14456                            Err(e) => {
14457                                warn!("Failed to export DGL graph: {}", e);
14458                            }
14459                        }
14460                    }
14461                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14462                        use datasynth_graph::{
14463                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14464                        };
14465
14466                        let rustgraph_config = RustGraphExportConfig {
14467                            include_features: true,
14468                            include_temporal: true,
14469                            include_labels: true,
14470                            source_name: "datasynth".to_string(),
14471                            batch_id: None,
14472                            output_format: RustGraphOutputFormat::JsonLines,
14473                            export_node_properties: true,
14474                            export_edge_properties: true,
14475                            pretty_print: false,
14476                        };
14477
14478                        let exporter = RustGraphExporter::new(rustgraph_config);
14479                        match exporter.export(&graph, &format_dir) {
14480                            Ok(metadata) => {
14481                                snapshot.exports.insert(
14482                                    format!("{}_{}", graph_type.name, "rustgraph"),
14483                                    GraphExportInfo {
14484                                        name: graph_type.name.clone(),
14485                                        format: "rustgraph".to_string(),
14486                                        output_path: format_dir.clone(),
14487                                        node_count: metadata.num_nodes,
14488                                        edge_count: metadata.num_edges,
14489                                    },
14490                                );
14491                                snapshot.graph_count += 1;
14492                            }
14493                            Err(e) => {
14494                                warn!("Failed to export RustGraph: {}", e);
14495                            }
14496                        }
14497                    }
14498                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14499                        // Hypergraph export is handled separately in Phase 10b
14500                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14501                    }
14502                }
14503            }
14504
14505            if let Some(pb) = &pb {
14506                pb.inc(40);
14507            }
14508        }
14509
14510        stats.graph_export_count = snapshot.graph_count;
14511        snapshot.exported = snapshot.graph_count > 0;
14512
14513        if let Some(pb) = pb {
14514            pb.finish_with_message(format!(
14515                "Graphs exported: {} graphs ({} nodes, {} edges)",
14516                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14517            ));
14518        }
14519
14520        Ok(snapshot)
14521    }
14522
14523    /// Build additional graph types (banking, approval, entity) when relevant data
14524    /// is available. These run as a late phase because the data they need (banking
14525    /// snapshot, intercompany snapshot) is only generated after the main graph
14526    /// export phase.
14527    fn build_additional_graphs(
14528        &self,
14529        banking: &BankingSnapshot,
14530        intercompany: &IntercompanySnapshot,
14531        entries: &[JournalEntry],
14532        stats: &mut EnhancedGenerationStatistics,
14533    ) {
14534        let output_dir = self
14535            .output_path
14536            .clone()
14537            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14538        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14539
14540        // Banking graph: build when banking customers and transactions exist
14541        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14542            info!("Phase 10c: Building banking network graph");
14543            let config = BankingGraphConfig::default();
14544            let mut builder = BankingGraphBuilder::new(config);
14545            builder.add_customers(&banking.customers);
14546            builder.add_accounts(&banking.accounts, &banking.customers);
14547            builder.add_transactions(&banking.transactions);
14548            let graph = builder.build();
14549
14550            let node_count = graph.node_count();
14551            let edge_count = graph.edge_count();
14552            stats.graph_node_count += node_count;
14553            stats.graph_edge_count += edge_count;
14554
14555            // Export as PyG if configured
14556            for format in &self.config.graph_export.formats {
14557                if matches!(
14558                    format,
14559                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14560                ) {
14561                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14562                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14563                        warn!("Failed to create banking graph output dir: {}", e);
14564                        continue;
14565                    }
14566                    let pyg_config = PyGExportConfig::default();
14567                    let exporter = PyGExporter::new(pyg_config);
14568                    if let Err(e) = exporter.export(&graph, &format_dir) {
14569                        warn!("Failed to export banking graph as PyG: {}", e);
14570                    } else {
14571                        info!(
14572                            "Banking network graph exported: {} nodes, {} edges",
14573                            node_count, edge_count
14574                        );
14575                    }
14576                }
14577            }
14578        }
14579
14580        // Approval graph: build from journal entry approval workflows
14581        let approval_entries: Vec<_> = entries
14582            .iter()
14583            .filter(|je| je.header.approval_workflow.is_some())
14584            .collect();
14585
14586        if !approval_entries.is_empty() {
14587            info!(
14588                "Phase 10c: Building approval network graph ({} entries with approvals)",
14589                approval_entries.len()
14590            );
14591            let config = ApprovalGraphConfig::default();
14592            let mut builder = ApprovalGraphBuilder::new(config);
14593
14594            for je in &approval_entries {
14595                if let Some(ref wf) = je.header.approval_workflow {
14596                    for action in &wf.actions {
14597                        let record = datasynth_core::models::ApprovalRecord {
14598                            approval_id: format!(
14599                                "APR-{}-{}",
14600                                je.header.document_id, action.approval_level
14601                            ),
14602                            document_number: je.header.document_id.to_string(),
14603                            document_type: "JE".to_string(),
14604                            company_code: je.company_code().to_string(),
14605                            requester_id: wf.preparer_id.clone(),
14606                            requester_name: Some(wf.preparer_name.clone()),
14607                            approver_id: action.actor_id.clone(),
14608                            approver_name: action.actor_name.clone(),
14609                            approval_date: je.posting_date(),
14610                            action: format!("{:?}", action.action),
14611                            amount: wf.amount,
14612                            approval_limit: None,
14613                            comments: action.comments.clone(),
14614                            delegation_from: None,
14615                            is_auto_approved: false,
14616                        };
14617                        builder.add_approval(&record);
14618                    }
14619                }
14620            }
14621
14622            let graph = builder.build();
14623            let node_count = graph.node_count();
14624            let edge_count = graph.edge_count();
14625            stats.graph_node_count += node_count;
14626            stats.graph_edge_count += edge_count;
14627
14628            // Export as PyG if configured
14629            for format in &self.config.graph_export.formats {
14630                if matches!(
14631                    format,
14632                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14633                ) {
14634                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14635                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14636                        warn!("Failed to create approval graph output dir: {}", e);
14637                        continue;
14638                    }
14639                    let pyg_config = PyGExportConfig::default();
14640                    let exporter = PyGExporter::new(pyg_config);
14641                    if let Err(e) = exporter.export(&graph, &format_dir) {
14642                        warn!("Failed to export approval graph as PyG: {}", e);
14643                    } else {
14644                        info!(
14645                            "Approval network graph exported: {} nodes, {} edges",
14646                            node_count, edge_count
14647                        );
14648                    }
14649                }
14650            }
14651        }
14652
14653        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14654        if self.config.companies.len() >= 2 {
14655            info!(
14656                "Phase 10c: Building entity relationship graph ({} companies)",
14657                self.config.companies.len()
14658            );
14659
14660            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14661                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14662
14663            // Map CompanyConfig → Company objects
14664            let parent_code = &self.config.companies[0].code;
14665            let mut companies: Vec<datasynth_core::models::Company> =
14666                Vec::with_capacity(self.config.companies.len());
14667
14668            // First company is the parent
14669            let first = &self.config.companies[0];
14670            companies.push(datasynth_core::models::Company::parent(
14671                &first.code,
14672                &first.name,
14673                &first.country,
14674                &first.currency,
14675            ));
14676
14677            // Remaining companies are subsidiaries (100% owned by parent)
14678            for cc in self.config.companies.iter().skip(1) {
14679                companies.push(datasynth_core::models::Company::subsidiary(
14680                    &cc.code,
14681                    &cc.name,
14682                    &cc.country,
14683                    &cc.currency,
14684                    parent_code,
14685                    rust_decimal::Decimal::from(100),
14686                ));
14687            }
14688
14689            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14690            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14691                self.config
14692                    .companies
14693                    .iter()
14694                    .skip(1)
14695                    .enumerate()
14696                    .map(|(i, cc)| {
14697                        let mut rel =
14698                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14699                                format!("REL{:03}", i + 1),
14700                                parent_code.clone(),
14701                                cc.code.clone(),
14702                                rust_decimal::Decimal::from(100),
14703                                start_date,
14704                            );
14705                        rel.functional_currency = cc.currency.clone();
14706                        rel
14707                    })
14708                    .collect();
14709
14710            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14711            builder.add_companies(&companies);
14712            builder.add_ownership_relationships(&relationships);
14713
14714            // Thread IC matched-pair transaction edges into the entity graph
14715            for pair in &intercompany.matched_pairs {
14716                builder.add_intercompany_edge(
14717                    &pair.seller_company,
14718                    &pair.buyer_company,
14719                    pair.amount,
14720                    &format!("{:?}", pair.transaction_type),
14721                );
14722            }
14723
14724            let graph = builder.build();
14725            let node_count = graph.node_count();
14726            let edge_count = graph.edge_count();
14727            stats.graph_node_count += node_count;
14728            stats.graph_edge_count += edge_count;
14729
14730            // Export as PyG if configured
14731            for format in &self.config.graph_export.formats {
14732                if matches!(
14733                    format,
14734                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14735                ) {
14736                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14737                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14738                        warn!("Failed to create entity graph output dir: {}", e);
14739                        continue;
14740                    }
14741                    let pyg_config = PyGExportConfig::default();
14742                    let exporter = PyGExporter::new(pyg_config);
14743                    if let Err(e) = exporter.export(&graph, &format_dir) {
14744                        warn!("Failed to export entity graph as PyG: {}", e);
14745                    } else {
14746                        info!(
14747                            "Entity relationship graph exported: {} nodes, {} edges",
14748                            node_count, edge_count
14749                        );
14750                    }
14751                }
14752            }
14753        } else {
14754            debug!(
14755                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14756                self.config.companies.len()
14757            );
14758        }
14759    }
14760
14761    /// Export a multi-layer hypergraph for RustGraph integration.
14762    ///
14763    /// Builds a 3-layer hypergraph:
14764    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14765    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14766    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14767    #[allow(clippy::too_many_arguments)]
14768    fn export_hypergraph(
14769        &self,
14770        coa: &Arc<ChartOfAccounts>,
14771        entries: &[JournalEntry],
14772        document_flows: &DocumentFlowSnapshot,
14773        sourcing: &SourcingSnapshot,
14774        hr: &HrSnapshot,
14775        manufacturing: &ManufacturingSnapshot,
14776        banking: &BankingSnapshot,
14777        audit: &AuditSnapshot,
14778        financial_reporting: &FinancialReportingSnapshot,
14779        ocpm: &OcpmSnapshot,
14780        compliance: &ComplianceRegulationsSnapshot,
14781        stats: &mut EnhancedGenerationStatistics,
14782    ) -> SynthResult<HypergraphExportInfo> {
14783        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14784        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14785        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14786        use datasynth_graph::models::hypergraph::AggregationStrategy;
14787
14788        let hg_settings = &self.config.graph_export.hypergraph;
14789
14790        // Parse aggregation strategy from config string
14791        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14792            "truncate" => AggregationStrategy::Truncate,
14793            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14794            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14795            "importance_sample" => AggregationStrategy::ImportanceSample,
14796            _ => AggregationStrategy::PoolByCounterparty,
14797        };
14798
14799        let builder_config = HypergraphConfig {
14800            max_nodes: hg_settings.max_nodes,
14801            aggregation_strategy,
14802            include_coso: hg_settings.governance_layer.include_coso,
14803            include_controls: hg_settings.governance_layer.include_controls,
14804            include_sox: hg_settings.governance_layer.include_sox,
14805            include_vendors: hg_settings.governance_layer.include_vendors,
14806            include_customers: hg_settings.governance_layer.include_customers,
14807            include_employees: hg_settings.governance_layer.include_employees,
14808            include_p2p: hg_settings.process_layer.include_p2p,
14809            include_o2c: hg_settings.process_layer.include_o2c,
14810            include_s2c: hg_settings.process_layer.include_s2c,
14811            include_h2r: hg_settings.process_layer.include_h2r,
14812            include_mfg: hg_settings.process_layer.include_mfg,
14813            include_bank: hg_settings.process_layer.include_bank,
14814            include_audit: hg_settings.process_layer.include_audit,
14815            include_r2r: hg_settings.process_layer.include_r2r,
14816            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14817            docs_per_counterparty_threshold: hg_settings
14818                .process_layer
14819                .docs_per_counterparty_threshold,
14820            include_accounts: hg_settings.accounting_layer.include_accounts,
14821            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14822            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14823            include_compliance: self.config.compliance_regulations.enabled,
14824            include_tax: true,
14825            include_treasury: true,
14826            include_esg: true,
14827            include_project: true,
14828            include_intercompany: true,
14829            include_temporal_events: true,
14830        };
14831
14832        let mut builder = HypergraphBuilder::new(builder_config);
14833
14834        // Layer 1: Governance & Controls
14835        builder.add_coso_framework();
14836
14837        // Add controls if available (generated during JE generation)
14838        // Controls are generated per-company; we use the standard set
14839        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14840            let controls = InternalControl::standard_controls();
14841            builder.add_controls(&controls);
14842        }
14843
14844        // Add master data
14845        builder.add_vendors(&self.master_data.vendors);
14846        builder.add_customers(&self.master_data.customers);
14847        builder.add_employees(&self.master_data.employees);
14848
14849        // Layer 2: Process Events (all process families)
14850        builder.add_p2p_documents(
14851            &document_flows.purchase_orders,
14852            &document_flows.goods_receipts,
14853            &document_flows.vendor_invoices,
14854            &document_flows.payments,
14855        );
14856        builder.add_o2c_documents(
14857            &document_flows.sales_orders,
14858            &document_flows.deliveries,
14859            &document_flows.customer_invoices,
14860        );
14861        builder.add_s2c_documents(
14862            &sourcing.sourcing_projects,
14863            &sourcing.qualifications,
14864            &sourcing.rfx_events,
14865            &sourcing.bids,
14866            &sourcing.bid_evaluations,
14867            &sourcing.contracts,
14868        );
14869        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14870        builder.add_mfg_documents(
14871            &manufacturing.production_orders,
14872            &manufacturing.quality_inspections,
14873            &manufacturing.cycle_counts,
14874        );
14875        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14876        builder.add_audit_documents(
14877            &audit.engagements,
14878            &audit.workpapers,
14879            &audit.findings,
14880            &audit.evidence,
14881            &audit.risk_assessments,
14882            &audit.judgments,
14883            &audit.materiality_calculations,
14884            &audit.audit_opinions,
14885            &audit.going_concern_assessments,
14886        );
14887        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14888
14889        // OCPM events as hyperedges
14890        if let Some(ref event_log) = ocpm.event_log {
14891            builder.add_ocpm_events(event_log);
14892        }
14893
14894        // Compliance regulations as cross-layer nodes
14895        if self.config.compliance_regulations.enabled
14896            && hg_settings.governance_layer.include_controls
14897        {
14898            // Reconstruct ComplianceStandard objects from the registry
14899            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14900            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14901                .standard_records
14902                .iter()
14903                .filter_map(|r| {
14904                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14905                    registry.get(&sid).cloned()
14906                })
14907                .collect();
14908
14909            builder.add_compliance_regulations(
14910                &standards,
14911                &compliance.findings,
14912                &compliance.filings,
14913            );
14914        }
14915
14916        // Layer 3: Accounting Network
14917        builder.add_accounts(coa);
14918        builder.add_journal_entries_as_hyperedges(entries);
14919
14920        // Build the hypergraph
14921        let hypergraph = builder.build();
14922
14923        // Export
14924        let output_dir = self
14925            .output_path
14926            .clone()
14927            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14928        let hg_dir = output_dir
14929            .join(&self.config.graph_export.output_subdirectory)
14930            .join(&hg_settings.output_subdirectory);
14931
14932        // Branch on output format
14933        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14934            "unified" => {
14935                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14936                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14937                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14938                })?;
14939                (
14940                    metadata.num_nodes,
14941                    metadata.num_edges,
14942                    metadata.num_hyperedges,
14943                )
14944            }
14945            _ => {
14946                // "native" or any unrecognized format → use existing exporter
14947                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14948                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14949                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14950                })?;
14951                (
14952                    metadata.num_nodes,
14953                    metadata.num_edges,
14954                    metadata.num_hyperedges,
14955                )
14956            }
14957        };
14958
14959        // Stream to RustGraph ingest endpoint if configured
14960        #[cfg(feature = "streaming")]
14961        if let Some(ref target_url) = hg_settings.stream_target {
14962            use crate::stream_client::{StreamClient, StreamConfig};
14963            use std::io::Write as _;
14964
14965            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14966            let stream_config = StreamConfig {
14967                target_url: target_url.clone(),
14968                batch_size: hg_settings.stream_batch_size,
14969                api_key,
14970                ..StreamConfig::default()
14971            };
14972
14973            match StreamClient::new(stream_config) {
14974                Ok(mut client) => {
14975                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14976                    match exporter.export_to_writer(&hypergraph, &mut client) {
14977                        Ok(_) => {
14978                            if let Err(e) = client.flush() {
14979                                warn!("Failed to flush stream client: {}", e);
14980                            } else {
14981                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14982                            }
14983                        }
14984                        Err(e) => {
14985                            warn!("Streaming export failed: {}", e);
14986                        }
14987                    }
14988                }
14989                Err(e) => {
14990                    warn!("Failed to create stream client: {}", e);
14991                }
14992            }
14993        }
14994
14995        // Update stats
14996        stats.graph_node_count += num_nodes;
14997        stats.graph_edge_count += num_edges;
14998        stats.graph_export_count += 1;
14999
15000        Ok(HypergraphExportInfo {
15001            node_count: num_nodes,
15002            edge_count: num_edges,
15003            hyperedge_count: num_hyperedges,
15004            output_path: hg_dir,
15005        })
15006    }
15007
15008    /// Generate banking KYC/AML data.
15009    ///
15010    /// Creates banking customers, accounts, and transactions with AML typology injection.
15011    /// Uses the BankingOrchestrator from synth-banking crate.
15012    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15013        let pb = self.create_progress_bar(100, "Generating Banking Data");
15014
15015        // Build the banking orchestrator from config
15016        let orchestrator = BankingOrchestratorBuilder::new()
15017            .config(self.config.banking.clone())
15018            .seed(self.seed + 9000)
15019            .country_pack(self.primary_pack().clone())
15020            .build();
15021
15022        if let Some(pb) = &pb {
15023            pb.inc(10);
15024        }
15025
15026        // Generate the banking data
15027        let result = orchestrator.generate();
15028
15029        if let Some(pb) = &pb {
15030            pb.inc(90);
15031            pb.finish_with_message(format!(
15032                "Banking: {} customers, {} transactions",
15033                result.customers.len(),
15034                result.transactions.len()
15035            ));
15036        }
15037
15038        // Cross-reference banking customers with core master data so that
15039        // banking customer names align with the enterprise customer list.
15040        // We rotate through core customers, overlaying their name and country
15041        // onto the generated banking customers where possible.
15042        let mut banking_customers = result.customers;
15043        let core_customers = &self.master_data.customers;
15044        if !core_customers.is_empty() {
15045            for (i, bc) in banking_customers.iter_mut().enumerate() {
15046                let core = &core_customers[i % core_customers.len()];
15047                bc.name = CustomerName::business(&core.name);
15048                bc.residence_country = core.country.clone();
15049                bc.enterprise_customer_id = Some(core.customer_id.clone());
15050            }
15051            debug!(
15052                "Cross-referenced {} banking customers with {} core customers",
15053                banking_customers.len(),
15054                core_customers.len()
15055            );
15056        }
15057
15058        Ok(BankingSnapshot {
15059            customers: banking_customers,
15060            accounts: result.accounts,
15061            transactions: result.transactions,
15062            transaction_labels: result.transaction_labels,
15063            customer_labels: result.customer_labels,
15064            account_labels: result.account_labels,
15065            relationship_labels: result.relationship_labels,
15066            narratives: result.narratives,
15067            suspicious_count: result.stats.suspicious_count,
15068            scenario_count: result.scenarios.len(),
15069        })
15070    }
15071
15072    /// Calculate total transactions to generate.
15073    fn calculate_total_transactions(&self) -> u64 {
15074        let months = self.config.global.period_months as f64;
15075        self.config
15076            .companies
15077            .iter()
15078            .map(|c| {
15079                let annual = c.annual_transaction_volume.count() as f64;
15080                let weighted = annual * c.volume_weight;
15081                (weighted * months / 12.0) as u64
15082            })
15083            .sum()
15084    }
15085
15086    /// Create a progress bar if progress display is enabled.
15087    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15088        if !self.phase_config.show_progress {
15089            return None;
15090        }
15091
15092        let pb = if let Some(mp) = &self.multi_progress {
15093            mp.add(ProgressBar::new(total))
15094        } else {
15095            ProgressBar::new(total)
15096        };
15097
15098        pb.set_style(
15099            ProgressStyle::default_bar()
15100                .template(&format!(
15101                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15102                ))
15103                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15104                .progress_chars("#>-"),
15105        );
15106
15107        Some(pb)
15108    }
15109
15110    /// Get the generated chart of accounts.
15111    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15112        self.coa.clone()
15113    }
15114
15115    /// Get the generated master data.
15116    pub fn get_master_data(&self) -> &MasterDataSnapshot {
15117        &self.master_data
15118    }
15119
15120    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
15121    fn phase_compliance_regulations(
15122        &mut self,
15123        _stats: &mut EnhancedGenerationStatistics,
15124    ) -> SynthResult<ComplianceRegulationsSnapshot> {
15125        if !self.phase_config.generate_compliance_regulations {
15126            return Ok(ComplianceRegulationsSnapshot::default());
15127        }
15128
15129        info!("Phase: Generating Compliance Regulations Data");
15130
15131        let cr_config = &self.config.compliance_regulations;
15132
15133        // Determine jurisdictions: from config or inferred from companies
15134        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15135            self.config
15136                .companies
15137                .iter()
15138                .map(|c| c.country.clone())
15139                .collect::<std::collections::HashSet<_>>()
15140                .into_iter()
15141                .collect()
15142        } else {
15143            cr_config.jurisdictions.clone()
15144        };
15145
15146        // Determine reference date
15147        let fallback_date =
15148            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15149        let reference_date = cr_config
15150            .reference_date
15151            .as_ref()
15152            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15153            .unwrap_or_else(|| {
15154                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15155                    .unwrap_or(fallback_date)
15156            });
15157
15158        // Generate standards registry data
15159        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15160        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15161        let cross_reference_records = reg_gen.generate_cross_reference_records();
15162        let jurisdiction_records =
15163            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15164
15165        info!(
15166            "  Standards: {} records, {} cross-references, {} jurisdictions",
15167            standard_records.len(),
15168            cross_reference_records.len(),
15169            jurisdiction_records.len()
15170        );
15171
15172        // Generate audit procedures (if enabled)
15173        let audit_procedures = if cr_config.audit_procedures.enabled {
15174            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15175                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15176                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15177                confidence_level: cr_config.audit_procedures.confidence_level,
15178                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15179            };
15180            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15181                self.seed + 9000,
15182                proc_config,
15183            );
15184            let registry = reg_gen.registry();
15185            let mut all_procs = Vec::new();
15186            for jurisdiction in &jurisdictions {
15187                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15188                all_procs.extend(procs);
15189            }
15190            info!("  Audit procedures: {}", all_procs.len());
15191            all_procs
15192        } else {
15193            Vec::new()
15194        };
15195
15196        // Generate compliance findings (if enabled)
15197        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15198            let finding_config =
15199                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15200                    finding_rate: cr_config.findings.finding_rate,
15201                    material_weakness_rate: cr_config.findings.material_weakness_rate,
15202                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15203                    generate_remediation: cr_config.findings.generate_remediation,
15204                };
15205            let mut finding_gen =
15206                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15207                    self.seed + 9100,
15208                    finding_config,
15209                );
15210            let mut all_findings = Vec::new();
15211            for company in &self.config.companies {
15212                let company_findings =
15213                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15214                all_findings.extend(company_findings);
15215            }
15216            info!("  Compliance findings: {}", all_findings.len());
15217            all_findings
15218        } else {
15219            Vec::new()
15220        };
15221
15222        // Generate regulatory filings (if enabled)
15223        let filings = if cr_config.filings.enabled {
15224            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15225                filing_types: cr_config.filings.filing_types.clone(),
15226                generate_status_progression: cr_config.filings.generate_status_progression,
15227            };
15228            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15229                self.seed + 9200,
15230                filing_config,
15231            );
15232            let company_codes: Vec<String> = self
15233                .config
15234                .companies
15235                .iter()
15236                .map(|c| c.code.clone())
15237                .collect();
15238            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15239                .unwrap_or(fallback_date);
15240            let filings = filing_gen.generate_filings(
15241                &company_codes,
15242                &jurisdictions,
15243                start_date,
15244                self.config.global.period_months,
15245            );
15246            info!("  Regulatory filings: {}", filings.len());
15247            filings
15248        } else {
15249            Vec::new()
15250        };
15251
15252        // Build compliance graph (if enabled)
15253        let compliance_graph = if cr_config.graph.enabled {
15254            let graph_config = datasynth_graph::ComplianceGraphConfig {
15255                include_standard_nodes: cr_config.graph.include_compliance_nodes,
15256                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15257                include_cross_references: cr_config.graph.include_cross_references,
15258                include_supersession_edges: cr_config.graph.include_supersession_edges,
15259                include_account_links: cr_config.graph.include_account_links,
15260                include_control_links: cr_config.graph.include_control_links,
15261                include_company_links: cr_config.graph.include_company_links,
15262            };
15263            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15264
15265            // Add standard nodes
15266            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15267                .iter()
15268                .map(|r| datasynth_graph::StandardNodeInput {
15269                    standard_id: r.standard_id.clone(),
15270                    title: r.title.clone(),
15271                    category: r.category.clone(),
15272                    domain: r.domain.clone(),
15273                    is_active: r.is_active,
15274                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
15275                    applicable_account_types: r.applicable_account_types.clone(),
15276                    applicable_processes: r.applicable_processes.clone(),
15277                })
15278                .collect();
15279            builder.add_standards(&standard_inputs);
15280
15281            // Add jurisdiction nodes
15282            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15283                jurisdiction_records
15284                    .iter()
15285                    .map(|r| datasynth_graph::JurisdictionNodeInput {
15286                        country_code: r.country_code.clone(),
15287                        country_name: r.country_name.clone(),
15288                        framework: r.accounting_framework.clone(),
15289                        standard_count: r.standard_count,
15290                        tax_rate: r.statutory_tax_rate,
15291                    })
15292                    .collect();
15293            builder.add_jurisdictions(&jurisdiction_inputs);
15294
15295            // Add cross-reference edges
15296            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15297                cross_reference_records
15298                    .iter()
15299                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15300                        from_standard: r.from_standard.clone(),
15301                        to_standard: r.to_standard.clone(),
15302                        relationship: r.relationship.clone(),
15303                        convergence_level: r.convergence_level,
15304                    })
15305                    .collect();
15306            builder.add_cross_references(&xref_inputs);
15307
15308            // Add jurisdiction→standard mappings
15309            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15310                .iter()
15311                .map(|r| datasynth_graph::JurisdictionMappingInput {
15312                    country_code: r.jurisdiction.clone(),
15313                    standard_id: r.standard_id.clone(),
15314                })
15315                .collect();
15316            builder.add_jurisdiction_mappings(&mapping_inputs);
15317
15318            // Add procedure nodes
15319            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15320                .iter()
15321                .map(|p| datasynth_graph::ProcedureNodeInput {
15322                    procedure_id: p.procedure_id.clone(),
15323                    standard_id: p.standard_id.clone(),
15324                    procedure_type: p.procedure_type.clone(),
15325                    sample_size: p.sample_size,
15326                    confidence_level: p.confidence_level,
15327                })
15328                .collect();
15329            builder.add_procedures(&proc_inputs);
15330
15331            // Add finding nodes
15332            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15333                .iter()
15334                .map(|f| datasynth_graph::FindingNodeInput {
15335                    finding_id: f.finding_id.to_string(),
15336                    standard_id: f
15337                        .related_standards
15338                        .first()
15339                        .map(|s| s.as_str().to_string())
15340                        .unwrap_or_default(),
15341                    severity: f.severity.to_string(),
15342                    deficiency_level: f.deficiency_level.to_string(),
15343                    severity_score: f.deficiency_level.severity_score(),
15344                    control_id: f.control_id.clone(),
15345                    affected_accounts: f.affected_accounts.clone(),
15346                })
15347                .collect();
15348            builder.add_findings(&finding_inputs);
15349
15350            // Cross-domain: link standards to accounts from chart of accounts
15351            if cr_config.graph.include_account_links {
15352                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15353                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15354                for std_record in &standard_records {
15355                    if let Some(std_obj) =
15356                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15357                            &std_record.standard_id,
15358                        ))
15359                    {
15360                        for acct_type in &std_obj.applicable_account_types {
15361                            account_links.push(datasynth_graph::AccountLinkInput {
15362                                standard_id: std_record.standard_id.clone(),
15363                                account_code: acct_type.clone(),
15364                                account_name: acct_type.clone(),
15365                            });
15366                        }
15367                    }
15368                }
15369                builder.add_account_links(&account_links);
15370            }
15371
15372            // Cross-domain: link standards to internal controls
15373            if cr_config.graph.include_control_links {
15374                let mut control_links = Vec::new();
15375                // SOX/PCAOB standards link to all controls
15376                let sox_like_ids: Vec<String> = standard_records
15377                    .iter()
15378                    .filter(|r| {
15379                        r.standard_id.starts_with("SOX")
15380                            || r.standard_id.starts_with("PCAOB-AS-2201")
15381                    })
15382                    .map(|r| r.standard_id.clone())
15383                    .collect();
15384                // Get control IDs from config (C001-C060 standard controls)
15385                let control_ids = [
15386                    ("C001", "Cash Controls"),
15387                    ("C002", "Large Transaction Approval"),
15388                    ("C010", "PO Approval"),
15389                    ("C011", "Three-Way Match"),
15390                    ("C020", "Revenue Recognition"),
15391                    ("C021", "Credit Check"),
15392                    ("C030", "Manual JE Approval"),
15393                    ("C031", "Period Close Review"),
15394                    ("C032", "Account Reconciliation"),
15395                    ("C040", "Payroll Processing"),
15396                    ("C050", "Fixed Asset Capitalization"),
15397                    ("C060", "Intercompany Elimination"),
15398                ];
15399                for sox_id in &sox_like_ids {
15400                    for (ctrl_id, ctrl_name) in &control_ids {
15401                        control_links.push(datasynth_graph::ControlLinkInput {
15402                            standard_id: sox_id.clone(),
15403                            control_id: ctrl_id.to_string(),
15404                            control_name: ctrl_name.to_string(),
15405                        });
15406                    }
15407                }
15408                builder.add_control_links(&control_links);
15409            }
15410
15411            // Cross-domain: filing nodes with company links
15412            if cr_config.graph.include_company_links {
15413                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15414                    .iter()
15415                    .enumerate()
15416                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15417                        filing_id: format!("F{:04}", i + 1),
15418                        filing_type: f.filing_type.to_string(),
15419                        company_code: f.company_code.clone(),
15420                        jurisdiction: f.jurisdiction.clone(),
15421                        status: format!("{:?}", f.status),
15422                    })
15423                    .collect();
15424                builder.add_filings(&filing_inputs);
15425            }
15426
15427            let graph = builder.build();
15428            info!(
15429                "  Compliance graph: {} nodes, {} edges",
15430                graph.nodes.len(),
15431                graph.edges.len()
15432            );
15433            Some(graph)
15434        } else {
15435            None
15436        };
15437
15438        self.check_resources_with_log("post-compliance-regulations")?;
15439
15440        Ok(ComplianceRegulationsSnapshot {
15441            standard_records,
15442            cross_reference_records,
15443            jurisdiction_records,
15444            audit_procedures,
15445            findings,
15446            filings,
15447            compliance_graph,
15448        })
15449    }
15450
15451    /// Build a lineage graph describing config → phase → output relationships.
15452    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15453        use super::lineage::LineageGraphBuilder;
15454
15455        let mut builder = LineageGraphBuilder::new();
15456
15457        // Config sections
15458        builder.add_config_section("config:global", "Global Config");
15459        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15460        builder.add_config_section("config:transactions", "Transaction Config");
15461
15462        // Generator phases
15463        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15464        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15465
15466        // Config → phase edges
15467        builder.configured_by("phase:coa", "config:chart_of_accounts");
15468        builder.configured_by("phase:je", "config:transactions");
15469
15470        // Output files
15471        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15472        builder.produced_by("output:je", "phase:je");
15473
15474        // Optional phases based on config
15475        if self.phase_config.generate_master_data {
15476            builder.add_config_section("config:master_data", "Master Data Config");
15477            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15478            builder.configured_by("phase:master_data", "config:master_data");
15479            builder.input_to("phase:master_data", "phase:je");
15480        }
15481
15482        if self.phase_config.generate_document_flows {
15483            builder.add_config_section("config:document_flows", "Document Flow Config");
15484            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15485            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15486            builder.configured_by("phase:p2p", "config:document_flows");
15487            builder.configured_by("phase:o2c", "config:document_flows");
15488
15489            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15490            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15491            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15492            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15493            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15494
15495            builder.produced_by("output:po", "phase:p2p");
15496            builder.produced_by("output:gr", "phase:p2p");
15497            builder.produced_by("output:vi", "phase:p2p");
15498            builder.produced_by("output:so", "phase:o2c");
15499            builder.produced_by("output:ci", "phase:o2c");
15500        }
15501
15502        if self.phase_config.inject_anomalies {
15503            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15504            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15505            builder.configured_by("phase:anomaly", "config:fraud");
15506            builder.add_output_file(
15507                "output:labels",
15508                "Anomaly Labels",
15509                "labels/anomaly_labels.csv",
15510            );
15511            builder.produced_by("output:labels", "phase:anomaly");
15512        }
15513
15514        if self.phase_config.generate_audit {
15515            builder.add_config_section("config:audit", "Audit Config");
15516            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15517            builder.configured_by("phase:audit", "config:audit");
15518        }
15519
15520        if self.phase_config.generate_banking {
15521            builder.add_config_section("config:banking", "Banking Config");
15522            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15523            builder.configured_by("phase:banking", "config:banking");
15524        }
15525
15526        if self.config.llm.enabled {
15527            builder.add_config_section("config:llm", "LLM Enrichment Config");
15528            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15529            builder.configured_by("phase:llm_enrichment", "config:llm");
15530        }
15531
15532        if self.config.diffusion.enabled {
15533            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15534            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15535            builder.configured_by("phase:diffusion", "config:diffusion");
15536        }
15537
15538        if self.config.causal.enabled {
15539            builder.add_config_section("config:causal", "Causal Generation Config");
15540            builder.add_generator_phase("phase:causal", "Causal Overlay");
15541            builder.configured_by("phase:causal", "config:causal");
15542        }
15543
15544        builder.build()
15545    }
15546
15547    // -----------------------------------------------------------------------
15548    // Trial-balance helpers used to replace hardcoded proxy values
15549    // -----------------------------------------------------------------------
15550
15551    /// Compute total revenue for a company from its journal entries.
15552    ///
15553    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15554    /// net credits on all revenue-account lines filtered to `company_code`.
15555    fn compute_company_revenue(
15556        entries: &[JournalEntry],
15557        company_code: &str,
15558    ) -> rust_decimal::Decimal {
15559        use rust_decimal::Decimal;
15560        let mut revenue = Decimal::ZERO;
15561        for je in entries {
15562            if je.header.company_code != company_code {
15563                continue;
15564            }
15565            for line in &je.lines {
15566                if line.gl_account.starts_with('4') {
15567                    // Revenue is credit-normal
15568                    revenue += line.credit_amount - line.debit_amount;
15569                }
15570            }
15571        }
15572        revenue.max(Decimal::ZERO)
15573    }
15574
15575    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15576    ///
15577    /// Asset accounts start with "1"; liability accounts start with "2".
15578    fn compute_entity_net_assets(
15579        entries: &[JournalEntry],
15580        entity_code: &str,
15581    ) -> rust_decimal::Decimal {
15582        use rust_decimal::Decimal;
15583        let mut asset_net = Decimal::ZERO;
15584        let mut liability_net = Decimal::ZERO;
15585        for je in entries {
15586            if je.header.company_code != entity_code {
15587                continue;
15588            }
15589            for line in &je.lines {
15590                if line.gl_account.starts_with('1') {
15591                    asset_net += line.debit_amount - line.credit_amount;
15592                } else if line.gl_account.starts_with('2') {
15593                    liability_net += line.credit_amount - line.debit_amount;
15594                }
15595            }
15596        }
15597        asset_net - liability_net
15598    }
15599
15600    /// v3.5.1+: Run the statistical validation suite configured in
15601    /// `distributions.validation.tests` over the final amount
15602    /// distribution.  Collects every non-zero line-level amount (debit +
15603    /// credit) and hands it to the runners in
15604    /// `datasynth_core::distributions::validation`.
15605    ///
15606    /// Returns `Ok(None)` when validation is disabled (the default).
15607    /// When `reporting.fail_on_error = true` and any test fails, returns
15608    /// `Err` with a concise message; otherwise attaches the report to
15609    /// the result and lets callers inspect it.
15610    fn phase_statistical_validation(
15611        &self,
15612        entries: &[JournalEntry],
15613    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15614        use datasynth_config::schema::StatisticalTestConfig;
15615        use datasynth_core::distributions::{
15616            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15617            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15618        };
15619        use rust_decimal::prelude::ToPrimitive;
15620
15621        let cfg = &self.config.distributions.validation;
15622        if !cfg.enabled {
15623            return Ok(None);
15624        }
15625
15626        // Collect per-line positive amounts (debit + credit is zero on the
15627        // non-posting side, so this naturally picks the magnitude).
15628        let amounts: Vec<rust_decimal::Decimal> = entries
15629            .iter()
15630            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15631            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15632            .collect();
15633
15634        // v4.1.0+ paired (amount, line_count) per entry for correlation
15635        // checks. Amount per entry is the debit-side total (= credit-side
15636        // total for a balanced entry).
15637        let paired_amount_linecount: Vec<(f64, f64)> = entries
15638            .iter()
15639            .filter_map(|je| {
15640                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15641                if amt > rust_decimal::Decimal::ZERO {
15642                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15643                } else {
15644                    None
15645                }
15646            })
15647            .collect();
15648
15649        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15650        for test_cfg in &cfg.tests {
15651            match test_cfg {
15652                StatisticalTestConfig::BenfordFirstDigit {
15653                    threshold_mad,
15654                    warning_mad,
15655                } => {
15656                    results.push(run_benford_first_digit(
15657                        &amounts,
15658                        *threshold_mad,
15659                        *warning_mad,
15660                    ));
15661                }
15662                StatisticalTestConfig::ChiSquared { bins, significance } => {
15663                    results.push(run_chi_squared(&amounts, *bins, *significance));
15664                }
15665                StatisticalTestConfig::DistributionFit {
15666                    target: _,
15667                    ks_significance,
15668                    method: _,
15669                } => {
15670                    // v3.5.1+: log-uniformity KS check. Target-specific
15671                    // fits against Normal / Exponential land in v4.1.1+.
15672                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15673                }
15674                StatisticalTestConfig::AndersonDarling {
15675                    target: _,
15676                    significance,
15677                } => {
15678                    // v4.1.0+: A*² statistic against log-normal on the
15679                    // log-scale. Other targets follow the same pattern.
15680                    results.push(run_anderson_darling(&amounts, *significance));
15681                }
15682                StatisticalTestConfig::CorrelationCheck {
15683                    expected_correlations,
15684                } => {
15685                    // v4.1.0+: (amount, line_count) is tracked today.
15686                    // Other pairs resolve to Skipped pending richer
15687                    // per-entry attribute collection.
15688                    if expected_correlations.is_empty() {
15689                        results.push(StatisticalTestResult {
15690                            name: "correlation_check".to_string(),
15691                            outcome: TestOutcome::Skipped,
15692                            statistic: 0.0,
15693                            threshold: 0.0,
15694                            message: "no expected correlations declared".to_string(),
15695                        });
15696                    } else {
15697                        for ec in expected_correlations {
15698                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15699                            let is_amount_linecount = (ec.field1 == "amount"
15700                                && ec.field2 == "line_count")
15701                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15702                            if is_amount_linecount {
15703                                let xs: Vec<f64> =
15704                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15705                                let ys: Vec<f64> =
15706                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15707                                results.push(run_correlation_check(
15708                                    &pair_key,
15709                                    &xs,
15710                                    &ys,
15711                                    ec.expected_r,
15712                                    ec.tolerance,
15713                                ));
15714                            } else {
15715                                results.push(StatisticalTestResult {
15716                                    name: format!("correlation_check_{pair_key}"),
15717                                    outcome: TestOutcome::Skipped,
15718                                    statistic: 0.0,
15719                                    threshold: ec.tolerance,
15720                                    message: format!(
15721                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15722                                        ec.field1, ec.field2
15723                                    ),
15724                                });
15725                            }
15726                        }
15727                    }
15728                }
15729            }
15730        }
15731
15732        let report = StatisticalValidationReport {
15733            sample_count: amounts.len(),
15734            results,
15735        };
15736
15737        if cfg.reporting.fail_on_error && !report.all_passed() {
15738            let failed = report.failed_names().join(", ");
15739            return Err(SynthError::validation(format!(
15740                "statistical validation failed: {failed}"
15741            )));
15742        }
15743
15744        Ok(Some(report))
15745    }
15746
15747    /// v3.3.0: analytics-metadata phase.
15748    ///
15749    /// Runs AFTER all JE-adding phases (including Phase 20b's
15750    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15751    /// gated by an individual `analytics_metadata.<flag>` toggle:
15752    ///
15753    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15754    ///    current-period account balances.
15755    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15756    ///    configured `global.industry`.
15757    /// 3. `ManagementReportGenerator` — management-report artefacts.
15758    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15759    fn phase_analytics_metadata(
15760        &mut self,
15761        entries: &[JournalEntry],
15762    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15763        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15764        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15765        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15766        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15767        use std::collections::BTreeMap;
15768
15769        let mut snap = AnalyticsMetadataSnapshot::default();
15770
15771        if !self.phase_config.generate_analytics_metadata {
15772            return Ok(snap);
15773        }
15774
15775        let cfg = &self.config.analytics_metadata;
15776        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15777            .map(|d| d.year())
15778            .unwrap_or(2025);
15779
15780        // ---- 1. Prior-year comparatives ----
15781        if cfg.prior_year {
15782            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15783            for company in &self.config.companies {
15784                // Aggregate current-period balances per account code +
15785                // account name from the entries slice.
15786                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15787                    BTreeMap::new();
15788                for je in entries {
15789                    if je.header.company_code != company.code {
15790                        continue;
15791                    }
15792                    for line in &je.lines {
15793                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15794                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15795                        });
15796                        entry.1 += line.debit_amount - line.credit_amount;
15797                    }
15798                }
15799                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15800                    .into_iter()
15801                    .filter(|(_, (_, bal))| !bal.is_zero())
15802                    .map(|(code, (name, bal))| (code, name, bal))
15803                    .collect();
15804                if !current.is_empty() {
15805                    let comparatives =
15806                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15807                    snap.prior_year_comparatives.extend(comparatives);
15808                }
15809            }
15810            info!(
15811                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15812                snap.prior_year_comparatives.len(),
15813                self.config.companies.len()
15814            );
15815        }
15816
15817        // ---- 2. Industry benchmarks ----
15818        if cfg.industry_benchmark {
15819            use datasynth_core::models::IndustrySector;
15820            let industry = match self.config.global.industry {
15821                IndustrySector::Manufacturing => "manufacturing",
15822                IndustrySector::Retail => "retail",
15823                IndustrySector::FinancialServices => "financial_services",
15824                IndustrySector::Technology => "technology",
15825                IndustrySector::Healthcare => "healthcare",
15826                _ => "other",
15827            };
15828            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15829            let benchmarks = gen.generate(industry, fiscal_year);
15830            info!(
15831                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15832                benchmarks.len()
15833            );
15834            snap.industry_benchmarks = benchmarks;
15835        }
15836
15837        // ---- 3. Management reports ----
15838        if cfg.management_reports {
15839            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15840            let period_months = self.config.global.period_months;
15841            for company in &self.config.companies {
15842                let reports =
15843                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15844                snap.management_reports.extend(reports);
15845            }
15846            info!(
15847                "v3.3.0 analytics: {} management reports across {} companies",
15848                snap.management_reports.len(),
15849                self.config.companies.len()
15850            );
15851        }
15852
15853        // ---- 4. Drift-event labels ----
15854        if cfg.drift_events {
15855            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15856                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15857            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15858                .unwrap_or(fallback_start);
15859            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15860            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15861            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15862            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15863            snap.drift_events = drifts;
15864        }
15865        // `entries` parameter reserved for future JE-aware drift detection
15866        let _ = entries;
15867
15868        Ok(snap)
15869    }
15870}
15871
15872/// Get the directory name for a graph export format.
15873fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15874    match format {
15875        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15876        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15877        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15878        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15879        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15880    }
15881}
15882
15883/// Aggregate journal entry lines into per-account trial balance rows.
15884///
15885/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15886/// debit/credit totals and a net balance (debit minus credit).
15887fn compute_trial_balance_entries(
15888    entries: &[JournalEntry],
15889    entity_code: &str,
15890    fiscal_year: i32,
15891    coa: Option<&ChartOfAccounts>,
15892) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15893    use std::collections::BTreeMap;
15894
15895    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15896        BTreeMap::new();
15897
15898    for je in entries {
15899        for line in &je.lines {
15900            let entry = balances.entry(line.account_code.clone()).or_default();
15901            entry.0 += line.debit_amount;
15902            entry.1 += line.credit_amount;
15903        }
15904    }
15905
15906    balances
15907        .into_iter()
15908        .map(
15909            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15910                account_description: coa
15911                    .and_then(|c| c.get_account(&account_code))
15912                    .map(|a| a.description().to_string())
15913                    .unwrap_or_else(|| account_code.clone()),
15914                account_code,
15915                debit_balance: debit,
15916                credit_balance: credit,
15917                net_balance: debit - credit,
15918                entity_code: entity_code.to_string(),
15919                period: format!("FY{}", fiscal_year),
15920            },
15921        )
15922        .collect()
15923}
15924
15925#[cfg(test)]
15926mod tests {
15927    use super::*;
15928    use datasynth_config::schema::*;
15929
15930    fn create_test_config() -> GeneratorConfig {
15931        GeneratorConfig {
15932            global: GlobalConfig {
15933                industry: IndustrySector::Manufacturing,
15934                start_date: "2024-01-01".to_string(),
15935                period_months: 1,
15936                seed: Some(42),
15937                parallel: false,
15938                group_currency: "USD".to_string(),
15939                presentation_currency: None,
15940                worker_threads: 0,
15941                memory_limit_mb: 0,
15942                fiscal_year_months: None,
15943            },
15944            companies: vec![CompanyConfig {
15945                code: "1000".to_string(),
15946                name: "Test Company".to_string(),
15947                currency: "USD".to_string(),
15948                functional_currency: None,
15949                country: "US".to_string(),
15950                annual_transaction_volume: TransactionVolume::TenK,
15951                volume_weight: 1.0,
15952                fiscal_year_variant: "K4".to_string(),
15953            }],
15954            chart_of_accounts: ChartOfAccountsConfig {
15955                complexity: CoAComplexity::Small,
15956                industry_specific: true,
15957                custom_accounts: None,
15958                min_hierarchy_depth: 2,
15959                max_hierarchy_depth: 4,
15960                expand_industry_subaccounts: false,
15961            },
15962            transactions: TransactionConfig::default(),
15963            output: OutputConfig::default(),
15964            fraud: FraudConfig::default(),
15965            internal_controls: InternalControlsConfig::default(),
15966            business_processes: BusinessProcessConfig::default(),
15967            user_personas: UserPersonaConfig::default(),
15968            templates: TemplateConfig::default(),
15969            approval: ApprovalConfig::default(),
15970            departments: DepartmentConfig::default(),
15971            master_data: MasterDataConfig::default(),
15972            document_flows: DocumentFlowConfig::default(),
15973            intercompany: IntercompanyConfig::default(),
15974            balance: BalanceConfig::default(),
15975            ocpm: OcpmConfig::default(),
15976            audit: AuditGenerationConfig::default(),
15977            banking: datasynth_banking::BankingConfig::default(),
15978            data_quality: DataQualitySchemaConfig::default(),
15979            scenario: ScenarioConfig::default(),
15980            temporal: TemporalDriftConfig::default(),
15981            graph_export: GraphExportConfig::default(),
15982            streaming: StreamingSchemaConfig::default(),
15983            rate_limit: RateLimitSchemaConfig::default(),
15984            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15985            relationships: RelationshipSchemaConfig::default(),
15986            accounting_standards: AccountingStandardsConfig::default(),
15987            audit_standards: AuditStandardsConfig::default(),
15988            distributions: Default::default(),
15989            temporal_patterns: Default::default(),
15990            vendor_network: VendorNetworkSchemaConfig::default(),
15991            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15992            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15993            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15994            organizational_events: OrganizationalEventsSchemaConfig::default(),
15995            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15996            market_drift: MarketDriftSchemaConfig::default(),
15997            drift_labeling: DriftLabelingSchemaConfig::default(),
15998            anomaly_injection: Default::default(),
15999            industry_specific: Default::default(),
16000            fingerprint_privacy: Default::default(),
16001            quality_gates: Default::default(),
16002            compliance: Default::default(),
16003            webhooks: Default::default(),
16004            llm: Default::default(),
16005            diffusion: Default::default(),
16006            causal: Default::default(),
16007            source_to_pay: Default::default(),
16008            financial_reporting: Default::default(),
16009            hr: Default::default(),
16010            manufacturing: Default::default(),
16011            sales_quotes: Default::default(),
16012            tax: Default::default(),
16013            treasury: Default::default(),
16014            project_accounting: Default::default(),
16015            esg: Default::default(),
16016            country_packs: None,
16017            scenarios: Default::default(),
16018            session: Default::default(),
16019            compliance_regulations: Default::default(),
16020            analytics_metadata: Default::default(),
16021            concentration: Default::default(),
16022        }
16023    }
16024
16025    #[test]
16026    fn test_enhanced_orchestrator_creation() {
16027        let config = create_test_config();
16028        let orchestrator = EnhancedOrchestrator::with_defaults(config);
16029        assert!(orchestrator.is_ok());
16030    }
16031
16032    #[test]
16033    fn test_minimal_generation() {
16034        let config = create_test_config();
16035        let phase_config = PhaseConfig {
16036            generate_master_data: false,
16037            generate_document_flows: false,
16038            generate_journal_entries: true,
16039            inject_anomalies: false,
16040            show_progress: false,
16041            ..Default::default()
16042        };
16043
16044        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16045        let result = orchestrator.generate();
16046
16047        assert!(result.is_ok());
16048        let result = result.unwrap();
16049        assert!(!result.journal_entries.is_empty());
16050    }
16051
16052    #[test]
16053    fn test_master_data_generation() {
16054        let config = create_test_config();
16055        let phase_config = PhaseConfig {
16056            generate_master_data: true,
16057            generate_document_flows: false,
16058            generate_journal_entries: false,
16059            inject_anomalies: false,
16060            show_progress: false,
16061            vendors_per_company: 5,
16062            customers_per_company: 5,
16063            materials_per_company: 10,
16064            assets_per_company: 5,
16065            employees_per_company: 10,
16066            ..Default::default()
16067        };
16068
16069        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16070        let result = orchestrator.generate().unwrap();
16071
16072        assert!(!result.master_data.vendors.is_empty());
16073        assert!(!result.master_data.customers.is_empty());
16074        assert!(!result.master_data.materials.is_empty());
16075    }
16076
16077    #[test]
16078    fn test_document_flow_generation() {
16079        let config = create_test_config();
16080        let phase_config = PhaseConfig {
16081            generate_master_data: true,
16082            generate_document_flows: true,
16083            generate_journal_entries: false,
16084            inject_anomalies: false,
16085            inject_data_quality: false,
16086            validate_balances: false,
16087            validate_coa_coverage_strict: false,
16088            generate_ocpm_events: false,
16089            show_progress: false,
16090            vendors_per_company: 5,
16091            customers_per_company: 5,
16092            materials_per_company: 10,
16093            assets_per_company: 5,
16094            employees_per_company: 10,
16095            p2p_chains: 5,
16096            o2c_chains: 5,
16097            ..Default::default()
16098        };
16099
16100        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16101        let result = orchestrator.generate().unwrap();
16102
16103        // Should have generated P2P and O2C chains
16104        assert!(!result.document_flows.p2p_chains.is_empty());
16105        assert!(!result.document_flows.o2c_chains.is_empty());
16106
16107        // Flattened documents should be populated
16108        assert!(!result.document_flows.purchase_orders.is_empty());
16109        assert!(!result.document_flows.sales_orders.is_empty());
16110    }
16111
16112    #[test]
16113    fn test_anomaly_injection() {
16114        let config = create_test_config();
16115        let phase_config = PhaseConfig {
16116            generate_master_data: false,
16117            generate_document_flows: false,
16118            generate_journal_entries: true,
16119            inject_anomalies: true,
16120            show_progress: false,
16121            ..Default::default()
16122        };
16123
16124        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16125        let result = orchestrator.generate().unwrap();
16126
16127        // Should have journal entries
16128        assert!(!result.journal_entries.is_empty());
16129
16130        // With ~833 entries and 2% rate, expect some anomalies
16131        // Note: This is probabilistic, so we just verify the structure exists
16132        assert!(result.anomaly_labels.summary.is_some());
16133    }
16134
16135    #[test]
16136    fn test_full_generation_pipeline() {
16137        let config = create_test_config();
16138        let phase_config = PhaseConfig {
16139            generate_master_data: true,
16140            generate_document_flows: true,
16141            generate_journal_entries: true,
16142            inject_anomalies: false,
16143            inject_data_quality: false,
16144            validate_balances: true,
16145            validate_coa_coverage_strict: false,
16146            generate_ocpm_events: false,
16147            show_progress: false,
16148            vendors_per_company: 3,
16149            customers_per_company: 3,
16150            materials_per_company: 5,
16151            assets_per_company: 3,
16152            employees_per_company: 5,
16153            p2p_chains: 3,
16154            o2c_chains: 3,
16155            ..Default::default()
16156        };
16157
16158        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16159        let result = orchestrator.generate().unwrap();
16160
16161        // All phases should have results
16162        assert!(!result.master_data.vendors.is_empty());
16163        assert!(!result.master_data.customers.is_empty());
16164        assert!(!result.document_flows.p2p_chains.is_empty());
16165        assert!(!result.document_flows.o2c_chains.is_empty());
16166        assert!(!result.journal_entries.is_empty());
16167        assert!(result.statistics.accounts_count > 0);
16168
16169        // Subledger linking should have run
16170        assert!(!result.subledger.ap_invoices.is_empty());
16171        assert!(!result.subledger.ar_invoices.is_empty());
16172
16173        // Balance validation should have run
16174        assert!(result.balance_validation.validated);
16175        assert!(result.balance_validation.entries_processed > 0);
16176    }
16177
16178    #[test]
16179    fn test_subledger_linking() {
16180        let config = create_test_config();
16181        let phase_config = PhaseConfig {
16182            generate_master_data: true,
16183            generate_document_flows: true,
16184            generate_journal_entries: false,
16185            inject_anomalies: false,
16186            inject_data_quality: false,
16187            validate_balances: false,
16188            validate_coa_coverage_strict: false,
16189            generate_ocpm_events: false,
16190            show_progress: false,
16191            vendors_per_company: 5,
16192            customers_per_company: 5,
16193            materials_per_company: 10,
16194            assets_per_company: 3,
16195            employees_per_company: 5,
16196            p2p_chains: 5,
16197            o2c_chains: 5,
16198            ..Default::default()
16199        };
16200
16201        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16202        let result = orchestrator.generate().unwrap();
16203
16204        // Should have document flows
16205        assert!(!result.document_flows.vendor_invoices.is_empty());
16206        assert!(!result.document_flows.customer_invoices.is_empty());
16207
16208        // Subledger should be linked from document flows
16209        assert!(!result.subledger.ap_invoices.is_empty());
16210        assert!(!result.subledger.ar_invoices.is_empty());
16211
16212        // AP invoices count should match vendor invoices count
16213        assert_eq!(
16214            result.subledger.ap_invoices.len(),
16215            result.document_flows.vendor_invoices.len()
16216        );
16217
16218        // AR invoices count should match customer invoices count
16219        assert_eq!(
16220            result.subledger.ar_invoices.len(),
16221            result.document_flows.customer_invoices.len()
16222        );
16223
16224        // Statistics should reflect subledger counts
16225        assert_eq!(
16226            result.statistics.ap_invoice_count,
16227            result.subledger.ap_invoices.len()
16228        );
16229        assert_eq!(
16230            result.statistics.ar_invoice_count,
16231            result.subledger.ar_invoices.len()
16232        );
16233    }
16234
16235    #[test]
16236    fn test_balance_validation() {
16237        let config = create_test_config();
16238        let phase_config = PhaseConfig {
16239            generate_master_data: false,
16240            generate_document_flows: false,
16241            generate_journal_entries: true,
16242            inject_anomalies: false,
16243            validate_balances: true,
16244            validate_coa_coverage_strict: false,
16245            show_progress: false,
16246            ..Default::default()
16247        };
16248
16249        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16250        let result = orchestrator.generate().unwrap();
16251
16252        // Balance validation should run
16253        assert!(result.balance_validation.validated);
16254        assert!(result.balance_validation.entries_processed > 0);
16255
16256        // Generated JEs should be balanced (no unbalanced entries)
16257        assert!(!result.balance_validation.has_unbalanced_entries);
16258
16259        // Total debits should equal total credits
16260        assert_eq!(
16261            result.balance_validation.total_debits,
16262            result.balance_validation.total_credits
16263        );
16264    }
16265
16266    #[test]
16267    fn test_statistics_accuracy() {
16268        let config = create_test_config();
16269        let phase_config = PhaseConfig {
16270            generate_master_data: true,
16271            generate_document_flows: false,
16272            generate_journal_entries: true,
16273            inject_anomalies: false,
16274            show_progress: false,
16275            vendors_per_company: 10,
16276            customers_per_company: 20,
16277            materials_per_company: 15,
16278            assets_per_company: 5,
16279            employees_per_company: 8,
16280            ..Default::default()
16281        };
16282
16283        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16284        let result = orchestrator.generate().unwrap();
16285
16286        // Statistics should match actual data
16287        assert_eq!(
16288            result.statistics.vendor_count,
16289            result.master_data.vendors.len()
16290        );
16291        assert_eq!(
16292            result.statistics.customer_count,
16293            result.master_data.customers.len()
16294        );
16295        assert_eq!(
16296            result.statistics.material_count,
16297            result.master_data.materials.len()
16298        );
16299        assert_eq!(
16300            result.statistics.total_entries as usize,
16301            result.journal_entries.len()
16302        );
16303    }
16304
16305    #[test]
16306    fn test_phase_config_defaults() {
16307        let config = PhaseConfig::default();
16308        assert!(config.generate_master_data);
16309        assert!(config.generate_document_flows);
16310        assert!(config.generate_journal_entries);
16311        assert!(!config.inject_anomalies);
16312        assert!(config.validate_balances);
16313        assert!(config.show_progress);
16314        assert!(config.vendors_per_company > 0);
16315        assert!(config.customers_per_company > 0);
16316    }
16317
16318    #[test]
16319    fn test_get_coa_before_generation() {
16320        let config = create_test_config();
16321        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16322
16323        // Before generation, CoA should be None
16324        assert!(orchestrator.get_coa().is_none());
16325    }
16326
16327    #[test]
16328    fn test_get_coa_after_generation() {
16329        let config = create_test_config();
16330        let phase_config = PhaseConfig {
16331            generate_master_data: false,
16332            generate_document_flows: false,
16333            generate_journal_entries: true,
16334            inject_anomalies: false,
16335            show_progress: false,
16336            ..Default::default()
16337        };
16338
16339        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16340        let _ = orchestrator.generate().unwrap();
16341
16342        // After generation, CoA should be available
16343        assert!(orchestrator.get_coa().is_some());
16344    }
16345
16346    #[test]
16347    fn test_get_master_data() {
16348        let config = create_test_config();
16349        let phase_config = PhaseConfig {
16350            generate_master_data: true,
16351            generate_document_flows: false,
16352            generate_journal_entries: false,
16353            inject_anomalies: false,
16354            show_progress: false,
16355            vendors_per_company: 5,
16356            customers_per_company: 5,
16357            materials_per_company: 5,
16358            assets_per_company: 5,
16359            employees_per_company: 5,
16360            ..Default::default()
16361        };
16362
16363        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16364        let result = orchestrator.generate().unwrap();
16365
16366        // After generate(), master_data is moved into the result
16367        assert!(!result.master_data.vendors.is_empty());
16368    }
16369
16370    #[test]
16371    fn test_with_progress_builder() {
16372        let config = create_test_config();
16373        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16374            .unwrap()
16375            .with_progress(false);
16376
16377        // Should still work without progress
16378        assert!(!orchestrator.phase_config.show_progress);
16379    }
16380
16381    #[test]
16382    fn test_multi_company_generation() {
16383        let mut config = create_test_config();
16384        config.companies.push(CompanyConfig {
16385            code: "2000".to_string(),
16386            name: "Subsidiary".to_string(),
16387            currency: "EUR".to_string(),
16388            functional_currency: None,
16389            country: "DE".to_string(),
16390            annual_transaction_volume: TransactionVolume::TenK,
16391            volume_weight: 0.5,
16392            fiscal_year_variant: "K4".to_string(),
16393        });
16394
16395        let phase_config = PhaseConfig {
16396            generate_master_data: true,
16397            generate_document_flows: false,
16398            generate_journal_entries: true,
16399            inject_anomalies: false,
16400            show_progress: false,
16401            vendors_per_company: 5,
16402            customers_per_company: 5,
16403            materials_per_company: 5,
16404            assets_per_company: 5,
16405            employees_per_company: 5,
16406            ..Default::default()
16407        };
16408
16409        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16410        let result = orchestrator.generate().unwrap();
16411
16412        // Should have master data for both companies
16413        assert!(result.statistics.vendor_count >= 10); // 5 per company
16414        assert!(result.statistics.customer_count >= 10);
16415        assert!(result.statistics.companies_count == 2);
16416    }
16417
16418    #[test]
16419    fn test_empty_master_data_skips_document_flows() {
16420        let config = create_test_config();
16421        let phase_config = PhaseConfig {
16422            generate_master_data: false,   // Skip master data
16423            generate_document_flows: true, // Try to generate flows
16424            generate_journal_entries: false,
16425            inject_anomalies: false,
16426            show_progress: false,
16427            ..Default::default()
16428        };
16429
16430        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16431        let result = orchestrator.generate().unwrap();
16432
16433        // Without master data, document flows should be empty
16434        assert!(result.document_flows.p2p_chains.is_empty());
16435        assert!(result.document_flows.o2c_chains.is_empty());
16436    }
16437
16438    #[test]
16439    fn test_journal_entry_line_item_count() {
16440        let config = create_test_config();
16441        let phase_config = PhaseConfig {
16442            generate_master_data: false,
16443            generate_document_flows: false,
16444            generate_journal_entries: true,
16445            inject_anomalies: false,
16446            show_progress: false,
16447            ..Default::default()
16448        };
16449
16450        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16451        let result = orchestrator.generate().unwrap();
16452
16453        // Total line items should match sum of all entry line counts
16454        let calculated_line_items: u64 = result
16455            .journal_entries
16456            .iter()
16457            .map(|e| e.line_count() as u64)
16458            .sum();
16459        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16460    }
16461
16462    #[test]
16463    fn test_audit_generation() {
16464        let config = create_test_config();
16465        let phase_config = PhaseConfig {
16466            generate_master_data: false,
16467            generate_document_flows: false,
16468            generate_journal_entries: true,
16469            inject_anomalies: false,
16470            show_progress: false,
16471            generate_audit: true,
16472            audit_engagements: 2,
16473            workpapers_per_engagement: 5,
16474            evidence_per_workpaper: 2,
16475            risks_per_engagement: 3,
16476            findings_per_engagement: 2,
16477            judgments_per_engagement: 2,
16478            ..Default::default()
16479        };
16480
16481        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482        let result = orchestrator.generate().unwrap();
16483
16484        // Should have generated audit data
16485        assert_eq!(result.audit.engagements.len(), 2);
16486        assert!(!result.audit.workpapers.is_empty());
16487        assert!(!result.audit.evidence.is_empty());
16488        assert!(!result.audit.risk_assessments.is_empty());
16489        assert!(!result.audit.findings.is_empty());
16490        assert!(!result.audit.judgments.is_empty());
16491
16492        // New ISA entity collections should also be populated
16493        assert!(
16494            !result.audit.confirmations.is_empty(),
16495            "ISA 505 confirmations should be generated"
16496        );
16497        assert!(
16498            !result.audit.confirmation_responses.is_empty(),
16499            "ISA 505 confirmation responses should be generated"
16500        );
16501        assert!(
16502            !result.audit.procedure_steps.is_empty(),
16503            "ISA 330 procedure steps should be generated"
16504        );
16505        // Samples may or may not be generated depending on workpaper sampling methods
16506        assert!(
16507            !result.audit.analytical_results.is_empty(),
16508            "ISA 520 analytical procedures should be generated"
16509        );
16510        assert!(
16511            !result.audit.ia_functions.is_empty(),
16512            "ISA 610 IA functions should be generated (one per engagement)"
16513        );
16514        assert!(
16515            !result.audit.related_parties.is_empty(),
16516            "ISA 550 related parties should be generated"
16517        );
16518
16519        // Statistics should match
16520        assert_eq!(
16521            result.statistics.audit_engagement_count,
16522            result.audit.engagements.len()
16523        );
16524        assert_eq!(
16525            result.statistics.audit_workpaper_count,
16526            result.audit.workpapers.len()
16527        );
16528        assert_eq!(
16529            result.statistics.audit_evidence_count,
16530            result.audit.evidence.len()
16531        );
16532        assert_eq!(
16533            result.statistics.audit_risk_count,
16534            result.audit.risk_assessments.len()
16535        );
16536        assert_eq!(
16537            result.statistics.audit_finding_count,
16538            result.audit.findings.len()
16539        );
16540        assert_eq!(
16541            result.statistics.audit_judgment_count,
16542            result.audit.judgments.len()
16543        );
16544        assert_eq!(
16545            result.statistics.audit_confirmation_count,
16546            result.audit.confirmations.len()
16547        );
16548        assert_eq!(
16549            result.statistics.audit_confirmation_response_count,
16550            result.audit.confirmation_responses.len()
16551        );
16552        assert_eq!(
16553            result.statistics.audit_procedure_step_count,
16554            result.audit.procedure_steps.len()
16555        );
16556        assert_eq!(
16557            result.statistics.audit_sample_count,
16558            result.audit.samples.len()
16559        );
16560        assert_eq!(
16561            result.statistics.audit_analytical_result_count,
16562            result.audit.analytical_results.len()
16563        );
16564        assert_eq!(
16565            result.statistics.audit_ia_function_count,
16566            result.audit.ia_functions.len()
16567        );
16568        assert_eq!(
16569            result.statistics.audit_ia_report_count,
16570            result.audit.ia_reports.len()
16571        );
16572        assert_eq!(
16573            result.statistics.audit_related_party_count,
16574            result.audit.related_parties.len()
16575        );
16576        assert_eq!(
16577            result.statistics.audit_related_party_transaction_count,
16578            result.audit.related_party_transactions.len()
16579        );
16580    }
16581
16582    #[test]
16583    fn test_new_phases_disabled_by_default() {
16584        let config = create_test_config();
16585        // Verify new config fields default to disabled
16586        assert!(!config.llm.enabled);
16587        assert!(!config.diffusion.enabled);
16588        assert!(!config.causal.enabled);
16589
16590        let phase_config = PhaseConfig {
16591            generate_master_data: false,
16592            generate_document_flows: false,
16593            generate_journal_entries: true,
16594            inject_anomalies: false,
16595            show_progress: false,
16596            ..Default::default()
16597        };
16598
16599        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16600        let result = orchestrator.generate().unwrap();
16601
16602        // All new phase statistics should be zero when disabled
16603        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16604        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16605        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16606        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16607        assert_eq!(result.statistics.causal_generation_ms, 0);
16608        assert_eq!(result.statistics.causal_samples_generated, 0);
16609        assert!(result.statistics.causal_validation_passed.is_none());
16610        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16611        assert!(result.counterfactual_pairs.is_empty());
16612    }
16613
16614    #[test]
16615    fn test_counterfactual_generation_enabled() {
16616        let config = create_test_config();
16617        let phase_config = PhaseConfig {
16618            generate_master_data: false,
16619            generate_document_flows: false,
16620            generate_journal_entries: true,
16621            inject_anomalies: false,
16622            show_progress: false,
16623            generate_counterfactuals: true,
16624            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16625            ..Default::default()
16626        };
16627
16628        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16629        let result = orchestrator.generate().unwrap();
16630
16631        // With JE generation enabled, counterfactual pairs should be generated
16632        if !result.journal_entries.is_empty() {
16633            assert_eq!(
16634                result.counterfactual_pairs.len(),
16635                result.journal_entries.len()
16636            );
16637            assert_eq!(
16638                result.statistics.counterfactual_pair_count,
16639                result.journal_entries.len()
16640            );
16641            // Each pair should have a distinct pair_id
16642            let ids: std::collections::HashSet<_> = result
16643                .counterfactual_pairs
16644                .iter()
16645                .map(|p| p.pair_id.clone())
16646                .collect();
16647            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16648        }
16649    }
16650
16651    #[test]
16652    fn test_llm_enrichment_enabled() {
16653        let mut config = create_test_config();
16654        config.llm.enabled = true;
16655        config.llm.max_vendor_enrichments = 3;
16656
16657        let phase_config = PhaseConfig {
16658            generate_master_data: true,
16659            generate_document_flows: false,
16660            generate_journal_entries: false,
16661            inject_anomalies: false,
16662            show_progress: false,
16663            vendors_per_company: 5,
16664            customers_per_company: 3,
16665            materials_per_company: 3,
16666            assets_per_company: 3,
16667            employees_per_company: 3,
16668            ..Default::default()
16669        };
16670
16671        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16672        let result = orchestrator.generate().unwrap();
16673
16674        // LLM enrichment should have run
16675        assert!(result.statistics.llm_vendors_enriched > 0);
16676        assert!(result.statistics.llm_vendors_enriched <= 3);
16677    }
16678
16679    #[test]
16680    fn test_diffusion_enhancement_enabled() {
16681        let mut config = create_test_config();
16682        config.diffusion.enabled = true;
16683        config.diffusion.n_steps = 50;
16684        config.diffusion.sample_size = 20;
16685
16686        let phase_config = PhaseConfig {
16687            generate_master_data: false,
16688            generate_document_flows: false,
16689            generate_journal_entries: true,
16690            inject_anomalies: false,
16691            show_progress: false,
16692            ..Default::default()
16693        };
16694
16695        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16696        let result = orchestrator.generate().unwrap();
16697
16698        // Diffusion phase should have generated samples
16699        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16700    }
16701
16702    #[test]
16703    fn test_causal_overlay_enabled() {
16704        let mut config = create_test_config();
16705        config.causal.enabled = true;
16706        config.causal.template = "fraud_detection".to_string();
16707        config.causal.sample_size = 100;
16708        config.causal.validate = true;
16709
16710        let phase_config = PhaseConfig {
16711            generate_master_data: false,
16712            generate_document_flows: false,
16713            generate_journal_entries: true,
16714            inject_anomalies: false,
16715            show_progress: false,
16716            ..Default::default()
16717        };
16718
16719        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16720        let result = orchestrator.generate().unwrap();
16721
16722        // Causal phase should have generated samples
16723        assert_eq!(result.statistics.causal_samples_generated, 100);
16724        // Validation should have run
16725        assert!(result.statistics.causal_validation_passed.is_some());
16726    }
16727
16728    #[test]
16729    fn test_causal_overlay_revenue_cycle_template() {
16730        let mut config = create_test_config();
16731        config.causal.enabled = true;
16732        config.causal.template = "revenue_cycle".to_string();
16733        config.causal.sample_size = 50;
16734        config.causal.validate = false;
16735
16736        let phase_config = PhaseConfig {
16737            generate_master_data: false,
16738            generate_document_flows: false,
16739            generate_journal_entries: true,
16740            inject_anomalies: false,
16741            show_progress: false,
16742            ..Default::default()
16743        };
16744
16745        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16746        let result = orchestrator.generate().unwrap();
16747
16748        // Causal phase should have generated samples
16749        assert_eq!(result.statistics.causal_samples_generated, 50);
16750        // Validation was disabled
16751        assert!(result.statistics.causal_validation_passed.is_none());
16752    }
16753
16754    #[test]
16755    fn test_all_new_phases_enabled_together() {
16756        let mut config = create_test_config();
16757        config.llm.enabled = true;
16758        config.llm.max_vendor_enrichments = 2;
16759        config.diffusion.enabled = true;
16760        config.diffusion.n_steps = 20;
16761        config.diffusion.sample_size = 10;
16762        config.causal.enabled = true;
16763        config.causal.sample_size = 50;
16764        config.causal.validate = true;
16765
16766        let phase_config = PhaseConfig {
16767            generate_master_data: true,
16768            generate_document_flows: false,
16769            generate_journal_entries: true,
16770            inject_anomalies: false,
16771            show_progress: false,
16772            vendors_per_company: 5,
16773            customers_per_company: 3,
16774            materials_per_company: 3,
16775            assets_per_company: 3,
16776            employees_per_company: 3,
16777            ..Default::default()
16778        };
16779
16780        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16781        let result = orchestrator.generate().unwrap();
16782
16783        // All three phases should have run
16784        assert!(result.statistics.llm_vendors_enriched > 0);
16785        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16786        assert_eq!(result.statistics.causal_samples_generated, 50);
16787        assert!(result.statistics.causal_validation_passed.is_some());
16788    }
16789
16790    #[test]
16791    fn test_statistics_serialization_with_new_fields() {
16792        let stats = EnhancedGenerationStatistics {
16793            total_entries: 100,
16794            total_line_items: 500,
16795            llm_enrichment_ms: 42,
16796            llm_vendors_enriched: 10,
16797            diffusion_enhancement_ms: 100,
16798            diffusion_samples_generated: 50,
16799            causal_generation_ms: 200,
16800            causal_samples_generated: 100,
16801            causal_validation_passed: Some(true),
16802            ..Default::default()
16803        };
16804
16805        let json = serde_json::to_string(&stats).unwrap();
16806        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16807
16808        assert_eq!(deserialized.llm_enrichment_ms, 42);
16809        assert_eq!(deserialized.llm_vendors_enriched, 10);
16810        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16811        assert_eq!(deserialized.diffusion_samples_generated, 50);
16812        assert_eq!(deserialized.causal_generation_ms, 200);
16813        assert_eq!(deserialized.causal_samples_generated, 100);
16814        assert_eq!(deserialized.causal_validation_passed, Some(true));
16815    }
16816
16817    #[test]
16818    fn test_statistics_backward_compat_deserialization() {
16819        // Old JSON without the new fields should still deserialize
16820        let old_json = r#"{
16821            "total_entries": 100,
16822            "total_line_items": 500,
16823            "accounts_count": 50,
16824            "companies_count": 1,
16825            "period_months": 12,
16826            "vendor_count": 10,
16827            "customer_count": 20,
16828            "material_count": 15,
16829            "asset_count": 5,
16830            "employee_count": 8,
16831            "p2p_chain_count": 5,
16832            "o2c_chain_count": 5,
16833            "ap_invoice_count": 5,
16834            "ar_invoice_count": 5,
16835            "ocpm_event_count": 0,
16836            "ocpm_object_count": 0,
16837            "ocpm_case_count": 0,
16838            "audit_engagement_count": 0,
16839            "audit_workpaper_count": 0,
16840            "audit_evidence_count": 0,
16841            "audit_risk_count": 0,
16842            "audit_finding_count": 0,
16843            "audit_judgment_count": 0,
16844            "anomalies_injected": 0,
16845            "data_quality_issues": 0,
16846            "banking_customer_count": 0,
16847            "banking_account_count": 0,
16848            "banking_transaction_count": 0,
16849            "banking_suspicious_count": 0,
16850            "graph_export_count": 0,
16851            "graph_node_count": 0,
16852            "graph_edge_count": 0
16853        }"#;
16854
16855        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16856
16857        // New fields should default to 0 / None
16858        assert_eq!(stats.llm_enrichment_ms, 0);
16859        assert_eq!(stats.llm_vendors_enriched, 0);
16860        assert_eq!(stats.diffusion_enhancement_ms, 0);
16861        assert_eq!(stats.diffusion_samples_generated, 0);
16862        assert_eq!(stats.causal_generation_ms, 0);
16863        assert_eq!(stats.causal_samples_generated, 0);
16864        assert!(stats.causal_validation_passed.is_none());
16865    }
16866}