Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// [`crate::output_writer::write_outputs`].
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829    /// Convert this in-memory period TB into the canonical
830    /// [`datasynth_core::models::balance::TrialBalance`] shape used
831    /// for the on-disk artefact.
832    ///
833    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
834    /// aggregate's `tb_loader` consumes the canonical type directly,
835    /// dropping the v5.0 dual-shape detection that converted from
836    /// `PeriodTrialBalance` JSON on the fly.
837    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838        let mut total_debits = Decimal::ZERO;
839        let mut total_credits = Decimal::ZERO;
840        let lines: Vec<TrialBalanceLine> = self
841            .entries
842            .into_iter()
843            .map(|e| {
844                total_debits += e.debit_balance;
845                total_credits += e.credit_balance;
846                let category = AccountCategory::from_account_code(&e.account_code);
847                TrialBalanceLine {
848                    account_code: e.account_code,
849                    account_description: e.account_name,
850                    category,
851                    account_type: AccountType::Asset,
852                    opening_balance: Decimal::ZERO,
853                    period_debits: e.debit_balance,
854                    period_credits: e.credit_balance,
855                    closing_balance: e.debit_balance - e.credit_balance,
856                    debit_balance: e.debit_balance,
857                    credit_balance: e.credit_balance,
858                    cost_center: None,
859                    profit_center: None,
860                }
861            })
862            .collect();
863        let imbalance = total_debits - total_credits;
864        let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865        TrialBalance {
866            trial_balance_id: format!(
867                "{company_code}-{:04}{:02}",
868                self.fiscal_year, self.fiscal_period
869            ),
870            company_code: company_code.to_string(),
871            company_name: None,
872            as_of_date: self.period_end,
873            fiscal_year: self.fiscal_year as i32,
874            fiscal_period: self.fiscal_period as u32,
875            currency: currency.to_string(),
876            balance_type: TrialBalanceType::Adjusted,
877            lines,
878            total_debits,
879            total_credits,
880            is_balanced,
881            out_of_balance: imbalance,
882            is_equation_valid: is_balanced,
883            equation_difference: imbalance,
884            category_summary: std::collections::HashMap::new(),
885            created_at: self
886                .period_start
887                .and_hms_opt(0, 0, 0)
888                .expect("midnight is a valid time"),
889            created_by: "ORCHESTRATOR".to_string(),
890            approved_by: None,
891            approved_at: None,
892            status: TrialBalanceStatus::Final,
893        }
894    }
895}
896
897/// Financial reporting snapshot (financial statements + bank reconciliations).
898#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900    /// Financial statements (balance sheet, income statement, cash flow).
901    /// For multi-entity configs this includes all standalone statements.
902    pub financial_statements: Vec<FinancialStatement>,
903    /// Standalone financial statements keyed by entity code.
904    /// Each entity has its own slice of statements.
905    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
907    pub consolidated_statements: Vec<FinancialStatement>,
908    /// Consolidation schedules (one per period) showing pre/post elimination detail.
909    pub consolidation_schedules: Vec<ConsolidationSchedule>,
910    /// Bank reconciliations.
911    pub bank_reconciliations: Vec<BankReconciliation>,
912    /// Period-close trial balances (one per period).
913    pub trial_balances: Vec<PeriodTrialBalance>,
914    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
915    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
917    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
919    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
923#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925    /// Payroll runs (actual data).
926    pub payroll_runs: Vec<PayrollRun>,
927    /// Payroll line items (actual data).
928    pub payroll_line_items: Vec<PayrollLineItem>,
929    /// Time entries (actual data).
930    pub time_entries: Vec<TimeEntry>,
931    /// Expense reports (actual data).
932    pub expense_reports: Vec<ExpenseReport>,
933    /// Benefit enrollments (actual data).
934    pub benefit_enrollments: Vec<BenefitEnrollment>,
935    /// Defined benefit pension plans (IAS 19 / ASC 715).
936    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937    /// Pension obligation (DBO) roll-forwards.
938    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939    /// Plan asset roll-forwards.
940    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941    /// Pension disclosures.
942    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943    /// Journal entries generated from pension expense and OCI remeasurements.
944    pub pension_journal_entries: Vec<JournalEntry>,
945    /// Stock grants (ASC 718 / IFRS 2).
946    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947    /// Stock-based compensation period expense records.
948    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949    /// Journal entries generated from stock-based compensation expense.
950    pub stock_comp_journal_entries: Vec<JournalEntry>,
951    /// Payroll runs.
952    pub payroll_run_count: usize,
953    /// Payroll line item count.
954    pub payroll_line_item_count: usize,
955    /// Time entry count.
956    pub time_entry_count: usize,
957    /// Expense report count.
958    pub expense_report_count: usize,
959    /// Benefit enrollment count.
960    pub benefit_enrollment_count: usize,
961    /// Pension plan count.
962    pub pension_plan_count: usize,
963    /// Stock grant count.
964    pub stock_grant_count: usize,
965}
966
967/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
968#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970    /// Revenue recognition contracts (actual data).
971    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972    /// Impairment tests (actual data).
973    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974    /// Business combinations (IFRS 3 / ASC 805).
975    pub business_combinations:
976        Vec<datasynth_core::models::business_combination::BusinessCombination>,
977    /// Journal entries generated from business combinations (Day 1 + amortization).
978    pub business_combination_journal_entries: Vec<JournalEntry>,
979    /// ECL models (IFRS 9 / ASC 326).
980    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981    /// ECL provision movements.
982    pub ecl_provision_movements:
983        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984    /// Journal entries from ECL provision.
985    pub ecl_journal_entries: Vec<JournalEntry>,
986    /// Provisions (IAS 37 / ASC 450).
987    pub provisions: Vec<datasynth_core::models::provision::Provision>,
988    /// Provision movement roll-forwards (IAS 37 / ASC 450).
989    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990    /// Contingent liabilities (IAS 37 / ASC 450).
991    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992    /// Journal entries from provisions.
993    pub provision_journal_entries: Vec<JournalEntry>,
994    /// IAS 21 functional currency translation results (one per entity per period).
995    pub currency_translation_results:
996        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997    /// Revenue recognition contract count.
998    pub revenue_contract_count: usize,
999    /// Impairment test count.
1000    pub impairment_test_count: usize,
1001    /// Business combination count.
1002    pub business_combination_count: usize,
1003    /// ECL model count.
1004    pub ecl_model_count: usize,
1005    /// Provision count.
1006    pub provision_count: usize,
1007    /// Currency translation result count (IAS 21).
1008    pub currency_translation_count: usize,
1009    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1010    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1011    /// ROU asset + lease liability details.
1012    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1014    pub fair_value_measurements:
1015        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016    /// Framework difference records (dual-reporting only).
1017    pub framework_differences:
1018        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019    /// Per-entity framework reconciliation (dual-reporting only).
1020    pub framework_reconciliations:
1021        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022    /// Counts for stats logging.
1023    pub lease_count: usize,
1024    pub fair_value_measurement_count: usize,
1025    pub framework_difference_count: usize,
1026}
1027
1028/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1029#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031    /// Flattened standard records for output.
1032    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033    /// Cross-reference records.
1034    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035    /// Jurisdiction profile records.
1036    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037    /// Generated audit procedures.
1038    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039    /// Generated compliance findings.
1040    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041    /// Generated regulatory filings.
1042    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043    /// Compliance graph (if graph integration enabled).
1044    pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1048#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050    /// Production orders (actual data).
1051    pub production_orders: Vec<ProductionOrder>,
1052    /// Quality inspections (actual data).
1053    pub quality_inspections: Vec<QualityInspection>,
1054    /// Cycle counts (actual data).
1055    pub cycle_counts: Vec<CycleCount>,
1056    /// BOM components (actual data).
1057    pub bom_components: Vec<BomComponent>,
1058    /// Inventory movements (actual data).
1059    pub inventory_movements: Vec<InventoryMovement>,
1060    /// Production order count.
1061    pub production_order_count: usize,
1062    /// Quality inspection count.
1063    pub quality_inspection_count: usize,
1064    /// Cycle count count.
1065    pub cycle_count_count: usize,
1066    /// BOM component count.
1067    pub bom_component_count: usize,
1068    /// Inventory movement count.
1069    pub inventory_movement_count: usize,
1070}
1071
1072/// Sales, KPI, and budget data snapshot.
1073#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075    /// Sales quotes (actual data).
1076    pub sales_quotes: Vec<SalesQuote>,
1077    /// Management KPIs (actual data).
1078    pub kpis: Vec<ManagementKpi>,
1079    /// Budgets (actual data).
1080    pub budgets: Vec<Budget>,
1081    /// Sales quote count.
1082    pub sales_quote_count: usize,
1083    /// Management KPI count.
1084    pub kpi_count: usize,
1085    /// Budget line count.
1086    pub budget_line_count: usize,
1087}
1088
1089/// Anomaly labels generated during injection.
1090#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092    /// All anomaly labels.
1093    pub labels: Vec<LabeledAnomaly>,
1094    /// Summary statistics.
1095    pub summary: Option<AnomalySummary>,
1096    /// Count by anomaly type.
1097    pub by_type: HashMap<String, usize>,
1098}
1099
1100/// Balance validation results from running balance tracker.
1101#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103    /// Whether validation was performed.
1104    pub validated: bool,
1105    /// Whether balance sheet equation is satisfied.
1106    pub is_balanced: bool,
1107    /// Number of entries processed.
1108    pub entries_processed: u64,
1109    /// Total debits across all entries.
1110    pub total_debits: rust_decimal::Decimal,
1111    /// Total credits across all entries.
1112    pub total_credits: rust_decimal::Decimal,
1113    /// Number of accounts tracked.
1114    pub accounts_tracked: usize,
1115    /// Number of companies tracked.
1116    pub companies_tracked: usize,
1117    /// Validation errors encountered.
1118    pub validation_errors: Vec<ValidationError>,
1119    /// Whether any unbalanced entries were found.
1120    pub has_unbalanced_entries: bool,
1121}
1122
1123/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1124#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126    /// Tax jurisdictions.
1127    pub jurisdictions: Vec<TaxJurisdiction>,
1128    /// Tax codes.
1129    pub codes: Vec<TaxCode>,
1130    /// Tax lines computed on documents.
1131    pub tax_lines: Vec<TaxLine>,
1132    /// Tax returns filed per period.
1133    pub tax_returns: Vec<TaxReturn>,
1134    /// Tax provisions.
1135    pub tax_provisions: Vec<TaxProvision>,
1136    /// Withholding tax records.
1137    pub withholding_records: Vec<WithholdingTaxRecord>,
1138    /// Tax anomaly labels.
1139    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140    /// Jurisdiction count.
1141    pub jurisdiction_count: usize,
1142    /// Code count.
1143    pub code_count: usize,
1144    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1145    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146    /// Journal entries posting tax payable/receivable from computed tax lines.
1147    pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1151#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153    /// Group ownership structure (parent/subsidiary/associate relationships).
1154    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155    /// IC matched pairs (transaction pairs between related entities).
1156    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157    /// IC journal entries generated from matched pairs (seller side).
1158    pub seller_journal_entries: Vec<JournalEntry>,
1159    /// IC journal entries generated from matched pairs (buyer side).
1160    pub buyer_journal_entries: Vec<JournalEntry>,
1161    /// Elimination entries for consolidation.
1162    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163    /// NCI measurements derived from group structure ownership percentages.
1164    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1166    #[serde(skip)]
1167    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168    /// IC matched pair count.
1169    pub matched_pair_count: usize,
1170    /// IC elimination entry count.
1171    pub elimination_entry_count: usize,
1172    /// IC matching rate (0.0 to 1.0).
1173    pub match_rate: f64,
1174}
1175
1176/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1177#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179    /// Emission records (scope 1, 2, 3).
1180    pub emissions: Vec<EmissionRecord>,
1181    /// Energy consumption records.
1182    pub energy: Vec<EnergyConsumption>,
1183    /// Water usage records.
1184    pub water: Vec<WaterUsage>,
1185    /// Waste records.
1186    pub waste: Vec<WasteRecord>,
1187    /// Workforce diversity metrics.
1188    pub diversity: Vec<WorkforceDiversityMetric>,
1189    /// Pay equity metrics.
1190    pub pay_equity: Vec<PayEquityMetric>,
1191    /// Safety incidents.
1192    pub safety_incidents: Vec<SafetyIncident>,
1193    /// Safety metrics.
1194    pub safety_metrics: Vec<SafetyMetric>,
1195    /// Governance metrics.
1196    pub governance: Vec<GovernanceMetric>,
1197    /// Supplier ESG assessments.
1198    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199    /// Materiality assessments.
1200    pub materiality: Vec<MaterialityAssessment>,
1201    /// ESG disclosures.
1202    pub disclosures: Vec<EsgDisclosure>,
1203    /// Climate scenarios.
1204    pub climate_scenarios: Vec<ClimateScenario>,
1205    /// ESG anomaly labels.
1206    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207    /// Total emission record count.
1208    pub emission_count: usize,
1209    /// Total disclosure count.
1210    pub disclosure_count: usize,
1211}
1212
1213/// Treasury data snapshot (cash management, hedging, debt, pooling).
1214#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216    /// Cash positions (daily balances per account).
1217    pub cash_positions: Vec<CashPosition>,
1218    /// Cash forecasts.
1219    pub cash_forecasts: Vec<CashForecast>,
1220    /// Cash pools.
1221    pub cash_pools: Vec<CashPool>,
1222    /// Cash pool sweep transactions.
1223    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224    /// Hedging instruments.
1225    pub hedging_instruments: Vec<HedgingInstrument>,
1226    /// Hedge relationships (ASC 815/IFRS 9 designations).
1227    pub hedge_relationships: Vec<HedgeRelationship>,
1228    /// Debt instruments.
1229    pub debt_instruments: Vec<DebtInstrument>,
1230    /// Bank guarantees and letters of credit.
1231    pub bank_guarantees: Vec<BankGuarantee>,
1232    /// Intercompany netting runs.
1233    pub netting_runs: Vec<NettingRun>,
1234    /// Treasury anomaly labels.
1235    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236    /// Journal entries generated from treasury instruments (debt interest accruals,
1237    /// hedge MTM, cash pool sweeps).
1238    pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1242#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244    /// Projects with WBS hierarchies.
1245    pub projects: Vec<Project>,
1246    /// Project cost lines (linked from source documents).
1247    pub cost_lines: Vec<ProjectCostLine>,
1248    /// Revenue recognition records.
1249    pub revenue_records: Vec<ProjectRevenue>,
1250    /// Earned value metrics.
1251    pub earned_value_metrics: Vec<EarnedValueMetric>,
1252    /// Change orders.
1253    pub change_orders: Vec<ChangeOrder>,
1254    /// Project milestones.
1255    pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258/// Complete result of enhanced generation run.
1259#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261    /// Generated chart of accounts.
1262    pub chart_of_accounts: ChartOfAccounts,
1263    /// Master data snapshot.
1264    pub master_data: MasterDataSnapshot,
1265    /// Document flow snapshot.
1266    pub document_flows: DocumentFlowSnapshot,
1267    /// Subledger snapshot (linked from document flows).
1268    pub subledger: SubledgerSnapshot,
1269    /// OCPM event log snapshot (if OCPM generation enabled).
1270    pub ocpm: OcpmSnapshot,
1271    /// Audit data snapshot (if audit generation enabled).
1272    pub audit: AuditSnapshot,
1273    /// Banking KYC/AML data snapshot (if banking generation enabled).
1274    pub banking: BankingSnapshot,
1275    /// Graph export snapshot (if graph export enabled).
1276    pub graph_export: GraphExportSnapshot,
1277    /// S2C sourcing data snapshot (if sourcing generation enabled).
1278    pub sourcing: SourcingSnapshot,
1279    /// Financial reporting snapshot (financial statements + bank reconciliations).
1280    pub financial_reporting: FinancialReportingSnapshot,
1281    /// HR data snapshot (payroll, time entries, expenses).
1282    pub hr: HrSnapshot,
1283    /// Accounting standards snapshot (revenue recognition, impairment).
1284    pub accounting_standards: AccountingStandardsSnapshot,
1285    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1286    pub manufacturing: ManufacturingSnapshot,
1287    /// Sales, KPI, and budget snapshot.
1288    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1290    pub tax: TaxSnapshot,
1291    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1292    pub esg: EsgSnapshot,
1293    /// Treasury data snapshot (cash management, hedging, debt).
1294    pub treasury: TreasurySnapshot,
1295    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1296    pub project_accounting: ProjectAccountingSnapshot,
1297    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1298    pub process_evolution: Vec<ProcessEvolutionEvent>,
1299    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1300    pub organizational_events: Vec<OrganizationalEvent>,
1301    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1302    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1304    pub intercompany: IntercompanySnapshot,
1305    /// Generated journal entries.
1306    pub journal_entries: Vec<JournalEntry>,
1307    /// Anomaly labels (if injection enabled).
1308    pub anomaly_labels: AnomalyLabels,
1309    /// Balance validation results (if validation enabled).
1310    pub balance_validation: BalanceValidationResult,
1311    /// Data quality statistics (if injection enabled).
1312    pub data_quality_stats: DataQualityStats,
1313    /// Data quality issue records (if injection enabled).
1314    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315    /// Generation statistics.
1316    pub statistics: EnhancedGenerationStatistics,
1317    /// Data lineage graph (if tracking enabled).
1318    pub lineage: Option<super::lineage::LineageGraph>,
1319    /// Quality gate evaluation result.
1320    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321    /// Internal controls (if controls generation enabled).
1322    pub internal_controls: Vec<InternalControl>,
1323    /// SoD (Segregation of Duties) violations identified during control application.
1324    ///
1325    /// Each record corresponds to a journal entry where `sod_violation == true`.
1326    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327    /// Opening balances (if opening balance generation enabled).
1328    pub opening_balances: Vec<GeneratedOpeningBalance>,
1329    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1330    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331    /// Counterfactual (original, mutated) JE pairs for ML training.
1332    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333    /// Fraud red-flag indicators on P2P/O2C documents.
1334    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335    /// Collusion rings (coordinated fraud networks).
1336    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337    /// Bi-temporal version chains for vendor entities.
1338    pub temporal_vendor_chains:
1339        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340    /// Entity relationship graph (nodes + edges with strength scores).
1341    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342    /// Cross-process links (P2P ↔ O2C via inventory movements).
1343    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344    /// Industry-specific GL accounts and metadata.
1345    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1347    pub compliance_regulations: ComplianceRegulationsSnapshot,
1348    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1349    /// industry benchmarks, management reports, drift events). Empty
1350    /// when `analytics_metadata.enabled = false`.
1351    pub analytics_metadata: AnalyticsMetadataSnapshot,
1352    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1353    /// KS) over the generated amount distribution.  `None` when
1354    /// `distributions.validation.enabled = false`.
1355    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1357    /// customer value-segment labels, and industry-specific metadata
1358    /// populated from the previously-inert `vendor_network`,
1359    /// `customer_segmentation`, and `industry_specific` schema
1360    /// sections. Empty when those sections are disabled.
1361    pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364/// v4.1.3+: interconnectivity snapshot. Populated when
1365/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1366/// `industry_specific.enabled` are set. Holds tier / segment / industry
1367/// labels for generated entities so downstream tooling (graph export,
1368/// risk models) can consume them without re-deriving from scratch.
1369#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1372    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1373    pub vendor_tiers: Vec<(String, u8)>,
1374    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1375    /// `"reliable_strategic" / "standard_operational" / "transactional"
1376    /// / "problematic"`.
1377    pub vendor_clusters: Vec<(String, String)>,
1378    /// `(customer_id, value_segment)` pairs where value_segment is one
1379    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1380    pub customer_value_segments: Vec<(String, String)>,
1381    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1382    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1383    /// "churned" / "won_back"`.
1384    pub customer_lifecycle_stages: Vec<(String, String)>,
1385    /// Summary: industry-specific knob applied, if any (e.g.
1386    /// `"manufacturing.bom_depth=3"`).
1387    pub industry_metadata: Vec<String>,
1388}
1389
1390/// v3.3.0: snapshot for the analytics-metadata phase.
1391#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393    /// Prior-year comparative balances per account, per entity.
1394    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395    /// Industry benchmarks for the configured industry.
1396    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397    /// Management-report artefacts (dashboards, MDA sections).
1398    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399    /// Drift-event labels emitted from the post-generation sweep.
1400    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403/// Enhanced statistics about a generation run.
1404#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406    /// Total journal entries generated.
1407    pub total_entries: u64,
1408    /// Total line items generated.
1409    pub total_line_items: u64,
1410    /// Number of accounts in CoA.
1411    pub accounts_count: usize,
1412    /// Number of companies.
1413    pub companies_count: usize,
1414    /// Period in months.
1415    pub period_months: u32,
1416    /// Master data counts.
1417    pub vendor_count: usize,
1418    pub customer_count: usize,
1419    pub material_count: usize,
1420    pub asset_count: usize,
1421    pub employee_count: usize,
1422    /// Document flow counts.
1423    pub p2p_chain_count: usize,
1424    pub o2c_chain_count: usize,
1425    /// Subledger counts.
1426    pub ap_invoice_count: usize,
1427    pub ar_invoice_count: usize,
1428    /// OCPM counts.
1429    pub ocpm_event_count: usize,
1430    pub ocpm_object_count: usize,
1431    pub ocpm_case_count: usize,
1432    /// Audit counts.
1433    pub audit_engagement_count: usize,
1434    pub audit_workpaper_count: usize,
1435    pub audit_evidence_count: usize,
1436    pub audit_risk_count: usize,
1437    pub audit_finding_count: usize,
1438    pub audit_judgment_count: usize,
1439    /// ISA 505 confirmation counts.
1440    #[serde(default)]
1441    pub audit_confirmation_count: usize,
1442    #[serde(default)]
1443    pub audit_confirmation_response_count: usize,
1444    /// ISA 330/530 procedure step and sample counts.
1445    #[serde(default)]
1446    pub audit_procedure_step_count: usize,
1447    #[serde(default)]
1448    pub audit_sample_count: usize,
1449    /// ISA 520 analytical procedure counts.
1450    #[serde(default)]
1451    pub audit_analytical_result_count: usize,
1452    /// ISA 610 internal audit counts.
1453    #[serde(default)]
1454    pub audit_ia_function_count: usize,
1455    #[serde(default)]
1456    pub audit_ia_report_count: usize,
1457    /// ISA 550 related party counts.
1458    #[serde(default)]
1459    pub audit_related_party_count: usize,
1460    #[serde(default)]
1461    pub audit_related_party_transaction_count: usize,
1462    /// Anomaly counts.
1463    pub anomalies_injected: usize,
1464    /// Data quality issue counts.
1465    pub data_quality_issues: usize,
1466    /// Banking counts.
1467    pub banking_customer_count: usize,
1468    pub banking_account_count: usize,
1469    pub banking_transaction_count: usize,
1470    pub banking_suspicious_count: usize,
1471    /// Graph export counts.
1472    pub graph_export_count: usize,
1473    pub graph_node_count: usize,
1474    pub graph_edge_count: usize,
1475    /// LLM enrichment timing (milliseconds).
1476    #[serde(default)]
1477    pub llm_enrichment_ms: u64,
1478    /// Number of vendor names enriched by LLM.
1479    #[serde(default)]
1480    pub llm_vendors_enriched: usize,
1481    /// v4.1.1+: number of customer names enriched by LLM.
1482    #[serde(default)]
1483    pub llm_customers_enriched: usize,
1484    /// v4.1.1+: number of material descriptions enriched by LLM.
1485    #[serde(default)]
1486    pub llm_materials_enriched: usize,
1487    /// v4.1.1+: number of audit finding titles enriched by LLM.
1488    #[serde(default)]
1489    pub llm_findings_enriched: usize,
1490    /// Diffusion enhancement timing (milliseconds).
1491    #[serde(default)]
1492    pub diffusion_enhancement_ms: u64,
1493    /// Number of diffusion samples generated.
1494    #[serde(default)]
1495    pub diffusion_samples_generated: usize,
1496    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1497    /// `None` when the neural/hybrid backend is not active.
1498    #[serde(default, skip_serializing_if = "Option::is_none")]
1499    pub neural_hybrid_weight: Option<f64>,
1500    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1501    #[serde(default, skip_serializing_if = "Option::is_none")]
1502    pub neural_hybrid_strategy: Option<String>,
1503    /// How many columns were routed through the neural backend.
1504    #[serde(default, skip_serializing_if = "Option::is_none")]
1505    pub neural_routed_column_count: Option<usize>,
1506    /// Causal generation timing (milliseconds).
1507    #[serde(default)]
1508    pub causal_generation_ms: u64,
1509    /// Number of causal samples generated.
1510    #[serde(default)]
1511    pub causal_samples_generated: usize,
1512    /// Whether causal validation passed.
1513    #[serde(default)]
1514    pub causal_validation_passed: Option<bool>,
1515    /// S2C sourcing counts.
1516    #[serde(default)]
1517    pub sourcing_project_count: usize,
1518    #[serde(default)]
1519    pub rfx_event_count: usize,
1520    #[serde(default)]
1521    pub bid_count: usize,
1522    #[serde(default)]
1523    pub contract_count: usize,
1524    #[serde(default)]
1525    pub catalog_item_count: usize,
1526    #[serde(default)]
1527    pub scorecard_count: usize,
1528    /// Financial reporting counts.
1529    #[serde(default)]
1530    pub financial_statement_count: usize,
1531    #[serde(default)]
1532    pub bank_reconciliation_count: usize,
1533    /// HR counts.
1534    #[serde(default)]
1535    pub payroll_run_count: usize,
1536    #[serde(default)]
1537    pub time_entry_count: usize,
1538    #[serde(default)]
1539    pub expense_report_count: usize,
1540    #[serde(default)]
1541    pub benefit_enrollment_count: usize,
1542    #[serde(default)]
1543    pub pension_plan_count: usize,
1544    #[serde(default)]
1545    pub stock_grant_count: usize,
1546    /// Accounting standards counts.
1547    #[serde(default)]
1548    pub revenue_contract_count: usize,
1549    #[serde(default)]
1550    pub impairment_test_count: usize,
1551    #[serde(default)]
1552    pub business_combination_count: usize,
1553    #[serde(default)]
1554    pub ecl_model_count: usize,
1555    #[serde(default)]
1556    pub provision_count: usize,
1557    /// Manufacturing counts.
1558    #[serde(default)]
1559    pub production_order_count: usize,
1560    #[serde(default)]
1561    pub quality_inspection_count: usize,
1562    #[serde(default)]
1563    pub cycle_count_count: usize,
1564    #[serde(default)]
1565    pub bom_component_count: usize,
1566    #[serde(default)]
1567    pub inventory_movement_count: usize,
1568    /// Sales & reporting counts.
1569    #[serde(default)]
1570    pub sales_quote_count: usize,
1571    #[serde(default)]
1572    pub kpi_count: usize,
1573    #[serde(default)]
1574    pub budget_line_count: usize,
1575    /// Tax counts.
1576    #[serde(default)]
1577    pub tax_jurisdiction_count: usize,
1578    #[serde(default)]
1579    pub tax_code_count: usize,
1580    /// ESG counts.
1581    #[serde(default)]
1582    pub esg_emission_count: usize,
1583    #[serde(default)]
1584    pub esg_disclosure_count: usize,
1585    /// Intercompany counts.
1586    #[serde(default)]
1587    pub ic_matched_pair_count: usize,
1588    #[serde(default)]
1589    pub ic_elimination_count: usize,
1590    /// Number of intercompany journal entries (seller + buyer side).
1591    #[serde(default)]
1592    pub ic_transaction_count: usize,
1593    /// Number of fixed asset subledger records.
1594    #[serde(default)]
1595    pub fa_subledger_count: usize,
1596    /// Number of inventory subledger records.
1597    #[serde(default)]
1598    pub inventory_subledger_count: usize,
1599    /// Treasury debt instrument count.
1600    #[serde(default)]
1601    pub treasury_debt_instrument_count: usize,
1602    /// Treasury hedging instrument count.
1603    #[serde(default)]
1604    pub treasury_hedging_instrument_count: usize,
1605    /// Project accounting project count.
1606    #[serde(default)]
1607    pub project_count: usize,
1608    /// Project accounting change order count.
1609    #[serde(default)]
1610    pub project_change_order_count: usize,
1611    /// Tax provision count.
1612    #[serde(default)]
1613    pub tax_provision_count: usize,
1614    /// Opening balance count.
1615    #[serde(default)]
1616    pub opening_balance_count: usize,
1617    /// Subledger reconciliation count.
1618    #[serde(default)]
1619    pub subledger_reconciliation_count: usize,
1620    /// Tax line count.
1621    #[serde(default)]
1622    pub tax_line_count: usize,
1623    /// Project cost line count.
1624    #[serde(default)]
1625    pub project_cost_line_count: usize,
1626    /// Cash position count.
1627    #[serde(default)]
1628    pub cash_position_count: usize,
1629    /// Cash forecast count.
1630    #[serde(default)]
1631    pub cash_forecast_count: usize,
1632    /// Cash pool count.
1633    #[serde(default)]
1634    pub cash_pool_count: usize,
1635    /// Process evolution event count.
1636    #[serde(default)]
1637    pub process_evolution_event_count: usize,
1638    /// Organizational event count.
1639    #[serde(default)]
1640    pub organizational_event_count: usize,
1641    /// Counterfactual pair count.
1642    #[serde(default)]
1643    pub counterfactual_pair_count: usize,
1644    /// Number of fraud red-flag indicators generated.
1645    #[serde(default)]
1646    pub red_flag_count: usize,
1647    /// Number of collusion rings generated.
1648    #[serde(default)]
1649    pub collusion_ring_count: usize,
1650    /// Number of bi-temporal vendor version chains generated.
1651    #[serde(default)]
1652    pub temporal_version_chain_count: usize,
1653    /// Number of nodes in the entity relationship graph.
1654    #[serde(default)]
1655    pub entity_relationship_node_count: usize,
1656    /// Number of edges in the entity relationship graph.
1657    #[serde(default)]
1658    pub entity_relationship_edge_count: usize,
1659    /// Number of cross-process links generated.
1660    #[serde(default)]
1661    pub cross_process_link_count: usize,
1662    /// Number of disruption events generated.
1663    #[serde(default)]
1664    pub disruption_event_count: usize,
1665    /// Number of industry-specific GL accounts generated.
1666    #[serde(default)]
1667    pub industry_gl_account_count: usize,
1668    /// Number of period-close journal entries generated (tax provision + closing entries).
1669    #[serde(default)]
1670    pub period_close_je_count: usize,
1671}
1672
1673/// Enhanced orchestrator with full feature integration.
1674pub struct EnhancedOrchestrator {
1675    config: GeneratorConfig,
1676    phase_config: PhaseConfig,
1677    coa: Option<Arc<ChartOfAccounts>>,
1678    master_data: MasterDataSnapshot,
1679    seed: u64,
1680    multi_progress: Option<MultiProgress>,
1681    /// Resource guard for memory, disk, and CPU monitoring
1682    resource_guard: ResourceGuard,
1683    /// Output path for disk space monitoring
1684    output_path: Option<PathBuf>,
1685    /// Copula generators for preserving correlations (from fingerprint)
1686    copula_generators: Vec<CopulaGeneratorSpec>,
1687    /// Country pack registry for localized data generation
1688    country_pack_registry: datasynth_core::CountryPackRegistry,
1689    /// Optional streaming sink for phase-by-phase output
1690    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691    /// Shared template provider for user-supplied template packs.
1692    ///
1693    /// Constructed from `config.templates.path` at orchestrator creation
1694    /// time. When the path is `None`, this is still populated with an
1695    /// embedded-only provider so generators can always call trait methods
1696    /// without an `Option<…>` guard. v3.2.0+.
1697    template_provider: datasynth_core::templates::SharedTemplateProvider,
1698    /// v3.4.1+ temporal context for business-day / holiday awareness.
1699    ///
1700    /// Populated only when `temporal_patterns.business_days.enabled`. When
1701    /// `None`, document-flow / HR / treasury / period-close generators keep
1702    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1703    /// for the same seed).
1704    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705    /// Optional shard-mode context (set by group-engine shard runners).
1706    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1707    shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711    /// Create a new enhanced orchestrator.
1712    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713        datasynth_config::validate_config(&config)?;
1714
1715        let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717        // Build resource guard from config
1718        let resource_guard = Self::build_resource_guard(&config, None);
1719
1720        // Build country pack registry from config
1721        let country_pack_registry = match &config.country_packs {
1722            Some(cp) => {
1723                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724                    .map_err(|e| SynthError::config(e.to_string()))?
1725            }
1726            None => datasynth_core::CountryPackRegistry::builtin_only()
1727                .map_err(|e| SynthError::config(e.to_string()))?,
1728        };
1729
1730        // Build the shared template provider from config.templates.path.
1731        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1732        // `Some(path)` → load file/dir and honour `merge_strategy`.
1733        let template_provider = Self::build_template_provider(&config)?;
1734
1735        // v3.4.1: build a shared temporal context when
1736        // `temporal_patterns.business_days.enabled`. `None` preserves the
1737        // raw-RNG date-offset behaviour per-generator.
1738        let temporal_context = Self::build_temporal_context(&config)?;
1739
1740        Ok(Self {
1741            config,
1742            phase_config,
1743            coa: None,
1744            master_data: MasterDataSnapshot::default(),
1745            seed,
1746            multi_progress: None,
1747            resource_guard,
1748            output_path: None,
1749            copula_generators: Vec::new(),
1750            country_pack_registry,
1751            phase_sink: None,
1752            template_provider,
1753            temporal_context,
1754            shard_context: None,
1755        })
1756    }
1757
1758    /// Install shard-mode context.  Called by the group shard runner
1759    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1760    /// entry point).  Has no effect on single-entity runs.
1761    ///
1762    /// See [`crate::shard_context::ShardContext`] for rationale.
1763    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764        self.shard_context = Some(ctx);
1765    }
1766
1767    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1768    ///
1769    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1770    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1771    /// enabled. Returns `Err` only for unrecoverable config errors.
1772    fn build_temporal_context(
1773        config: &GeneratorConfig,
1774    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777        let tp = &config.temporal_patterns;
1778        if !tp.enabled || !tp.business_days.enabled {
1779            return Ok(None);
1780        }
1781
1782        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784        let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786        let region_code = tp
1787            .calendars
1788            .regions
1789            .first()
1790            .cloned()
1791            .unwrap_or_else(|| "US".to_string());
1792        let region = parse_region_code(&region_code);
1793
1794        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795    }
1796
1797    /// Build the shared template provider from `config.templates`.
1798    ///
1799    /// Always returns a provider — falls back to embedded-only when
1800    /// `config.templates.path` is `None`. The merge-strategy from config
1801    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1802    /// orchestrator-construction time are fatal (preferable to silently
1803    /// using embedded pools when the user supplied a bad path).
1804    fn build_template_provider(
1805        config: &GeneratorConfig,
1806    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807        use datasynth_core::templates::{
1808            loader::{MergeStrategy, TemplateLoader},
1809            DefaultTemplateProvider,
1810        };
1811        use std::sync::Arc;
1812
1813        let provider = match &config.templates.path {
1814            None => DefaultTemplateProvider::new(),
1815            Some(path) => {
1816                let data = if path.is_dir() {
1817                    TemplateLoader::load_from_directory(path)
1818                } else {
1819                    TemplateLoader::load_from_file(path)
1820                }
1821                .map_err(|e| {
1822                    SynthError::config(format!(
1823                        "Failed to load templates from {}: {e}",
1824                        path.display()
1825                    ))
1826                })?;
1827                let strategy = match config.templates.merge_strategy {
1828                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831                        MergeStrategy::MergePreferFile
1832                    }
1833                };
1834                DefaultTemplateProvider::with_templates(data, strategy)
1835            }
1836        };
1837        Ok(Arc::new(provider))
1838    }
1839
1840    /// Create with default phase config.
1841    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842        Self::new(config, PhaseConfig::default())
1843    }
1844
1845    /// Set a streaming phase sink for real-time output (builder pattern).
1846    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847        self.phase_sink = Some(sink);
1848        self
1849    }
1850
1851    /// Set a streaming phase sink on an existing orchestrator.
1852    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853        self.phase_sink = Some(sink);
1854    }
1855
1856    /// Emit a batch of items to the phase sink (if configured).
1857    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858        if let Some(ref sink) = self.phase_sink {
1859            for item in items {
1860                if let Ok(value) = serde_json::to_value(item) {
1861                    if let Err(e) = sink.emit(phase, type_name, &value) {
1862                        warn!(
1863                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864                        );
1865                    }
1866                }
1867            }
1868            if let Err(e) = sink.phase_complete(phase) {
1869                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870            }
1871        }
1872    }
1873
1874    /// Enable/disable progress bars.
1875    pub fn with_progress(mut self, show: bool) -> Self {
1876        self.phase_config.show_progress = show;
1877        if show {
1878            self.multi_progress = Some(MultiProgress::new());
1879        }
1880        self
1881    }
1882
1883    /// Set the output path for disk space monitoring.
1884    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885        let path = path.into();
1886        self.output_path = Some(path.clone());
1887        // Rebuild resource guard with the output path
1888        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889        self
1890    }
1891
1892    /// Access the country pack registry.
1893    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894        &self.country_pack_registry
1895    }
1896
1897    /// Look up a country pack by country code string.
1898    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899        self.country_pack_registry.get_by_str(country)
1900    }
1901
1902    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1903    /// company, defaulting to `"US"` if no companies are configured.
1904    fn primary_country_code(&self) -> &str {
1905        self.config
1906            .companies
1907            .first()
1908            .map(|c| c.country.as_str())
1909            .unwrap_or("US")
1910    }
1911
1912    /// Resolve the country pack for the primary (first) company.
1913    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914        self.country_pack_for(self.primary_country_code())
1915    }
1916
1917    /// Resolve the CoA framework from config/country-pack.
1918    fn resolve_coa_framework(&self) -> CoAFramework {
1919        if self.config.accounting_standards.enabled {
1920            match self.config.accounting_standards.framework {
1921                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922                    return CoAFramework::FrenchPcg;
1923                }
1924                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925                    return CoAFramework::GermanSkr04;
1926                }
1927                _ => {}
1928            }
1929        }
1930        // Fallback: derive from country pack
1931        let pack = self.primary_pack();
1932        match pack.accounting.framework.as_str() {
1933            "french_gaap" => CoAFramework::FrenchPcg,
1934            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935            _ => CoAFramework::UsGaap,
1936        }
1937    }
1938
1939    /// Check if copula generators are available.
1940    ///
1941    /// Returns true if the orchestrator has copula generators for preserving
1942    /// correlations (typically from fingerprint-based generation).
1943    pub fn has_copulas(&self) -> bool {
1944        !self.copula_generators.is_empty()
1945    }
1946
1947    /// Get the copula generators.
1948    ///
1949    /// Returns a reference to the copula generators for use during generation.
1950    /// These can be used to generate correlated samples that preserve the
1951    /// statistical relationships from the source data.
1952    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953        &self.copula_generators
1954    }
1955
1956    /// Get a mutable reference to the copula generators.
1957    ///
1958    /// Allows generators to sample from copulas during data generation.
1959    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960        &mut self.copula_generators
1961    }
1962
1963    /// Sample correlated values from a named copula.
1964    ///
1965    /// Returns None if the copula doesn't exist.
1966    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967        self.copula_generators
1968            .iter_mut()
1969            .find(|c| c.name == copula_name)
1970            .map(|c| c.generator.sample())
1971    }
1972
1973    /// Create an orchestrator from a fingerprint file.
1974    ///
1975    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1976    /// and creates an orchestrator configured to generate data matching
1977    /// the statistical properties of the original data.
1978    ///
1979    /// # Arguments
1980    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1981    /// * `phase_config` - Phase configuration for generation
1982    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1983    ///
1984    /// # Example
1985    /// ```no_run
1986    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1987    /// use std::path::Path;
1988    ///
1989    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1990    ///     Path::new("fingerprint.dsf"),
1991    ///     PhaseConfig::default(),
1992    ///     1.0,
1993    /// ).unwrap();
1994    /// ```
1995    pub fn from_fingerprint(
1996        fingerprint_path: &std::path::Path,
1997        phase_config: PhaseConfig,
1998        scale: f64,
1999    ) -> SynthResult<Self> {
2000        info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002        // Read the fingerprint
2003        let reader = FingerprintReader::new();
2004        let fingerprint = reader
2005            .read_from_file(fingerprint_path)
2006            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009    }
2010
2011    /// Create an orchestrator from a loaded fingerprint.
2012    ///
2013    /// # Arguments
2014    /// * `fingerprint` - The loaded fingerprint
2015    /// * `phase_config` - Phase configuration for generation
2016    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2017    pub fn from_fingerprint_data(
2018        fingerprint: Fingerprint,
2019        phase_config: PhaseConfig,
2020        scale: f64,
2021    ) -> SynthResult<Self> {
2022        info!(
2023            "Synthesizing config from fingerprint (version: {}, tables: {})",
2024            fingerprint.manifest.version,
2025            fingerprint.schema.tables.len()
2026        );
2027
2028        // Generate a seed for the synthesis
2029        let seed: u64 = rand::random();
2030        info!("Fingerprint synthesis seed: {}", seed);
2031
2032        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2033        let options = SynthesisOptions {
2034            scale,
2035            seed: Some(seed),
2036            preserve_correlations: true,
2037            inject_anomalies: true,
2038        };
2039        let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041        // Synthesize full result including copula generators
2042        let synthesis_result = synthesizer
2043            .synthesize_full(&fingerprint, seed)
2044            .map_err(|e| {
2045                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046            })?;
2047
2048        // Start with a base config from the fingerprint's industry if available
2049        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050            Self::base_config_for_industry(industry)
2051        } else {
2052            Self::base_config_for_industry("manufacturing")
2053        };
2054
2055        // Apply the synthesized patches
2056        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058        // Log synthesis results
2059        info!(
2060            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061            fingerprint.schema.tables.len(),
2062            scale,
2063            synthesis_result.copula_generators.len()
2064        );
2065
2066        if !synthesis_result.copula_generators.is_empty() {
2067            for spec in &synthesis_result.copula_generators {
2068                info!(
2069                    "  Copula '{}' for table '{}': {} columns",
2070                    spec.name,
2071                    spec.table,
2072                    spec.columns.len()
2073                );
2074            }
2075        }
2076
2077        // Create the orchestrator with the synthesized config
2078        let mut orchestrator = Self::new(config, phase_config)?;
2079
2080        // Store copula generators for use during generation
2081        orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083        Ok(orchestrator)
2084    }
2085
2086    /// Create a base config for a given industry.
2087    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088        use datasynth_config::presets::create_preset;
2089        use datasynth_config::TransactionVolume;
2090        use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092        let sector = match industry.to_lowercase().as_str() {
2093            "manufacturing" => IndustrySector::Manufacturing,
2094            "retail" => IndustrySector::Retail,
2095            "financial" | "financial_services" => IndustrySector::FinancialServices,
2096            "healthcare" => IndustrySector::Healthcare,
2097            "technology" | "tech" => IndustrySector::Technology,
2098            _ => IndustrySector::Manufacturing,
2099        };
2100
2101        // Create a preset with reasonable defaults
2102        create_preset(
2103            sector,
2104            1,  // company count
2105            12, // period months
2106            CoAComplexity::Medium,
2107            TransactionVolume::TenK,
2108        )
2109    }
2110
2111    /// Apply a config patch to a GeneratorConfig.
2112    fn apply_config_patch(
2113        mut config: GeneratorConfig,
2114        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115    ) -> GeneratorConfig {
2116        use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118        for (key, value) in patch.values() {
2119            match (key.as_str(), value) {
2120                // Transaction count is handled via TransactionVolume enum on companies
2121                // Log it but cannot directly set it (would need to modify company volumes)
2122                ("transactions.count", ConfigValue::Integer(n)) => {
2123                    info!(
2124                        "Fingerprint suggests {} transactions (apply via company volumes)",
2125                        n
2126                    );
2127                }
2128                ("global.period_months", ConfigValue::Integer(n)) => {
2129                    config.global.period_months = (*n).clamp(1, 120) as u32;
2130                }
2131                ("global.start_date", ConfigValue::String(s)) => {
2132                    config.global.start_date = s.clone();
2133                }
2134                ("global.seed", ConfigValue::Integer(n)) => {
2135                    config.global.seed = Some(*n as u64);
2136                }
2137                ("fraud.enabled", ConfigValue::Bool(b)) => {
2138                    config.fraud.enabled = *b;
2139                }
2140                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141                    config.fraud.fraud_rate = *f;
2142                }
2143                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144                    config.data_quality.enabled = *b;
2145                }
2146                // Handle anomaly injection paths (mapped to fraud config)
2147                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148                    config.fraud.enabled = *b;
2149                }
2150                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151                    config.fraud.fraud_rate = *f;
2152                }
2153                _ => {
2154                    debug!("Ignoring unknown config patch key: {}", key);
2155                }
2156            }
2157        }
2158
2159        config
2160    }
2161
2162    /// Build a resource guard from the configuration.
2163    fn build_resource_guard(
2164        config: &GeneratorConfig,
2165        output_path: Option<PathBuf>,
2166    ) -> ResourceGuard {
2167        let mut builder = ResourceGuardBuilder::new();
2168
2169        // Configure memory limit if set
2170        if config.global.memory_limit_mb > 0 {
2171            builder = builder.memory_limit(config.global.memory_limit_mb);
2172        }
2173
2174        // Configure disk monitoring for output path
2175        if let Some(path) = output_path {
2176            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2177        }
2178
2179        // Use conservative degradation settings for production safety
2180        builder = builder.conservative();
2181
2182        builder.build()
2183    }
2184
2185    /// Check resources (memory, disk, CPU) and return degradation level.
2186    ///
2187    /// Returns an error if hard limits are exceeded.
2188    /// Returns Ok(DegradationLevel) indicating current resource state.
2189    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190        self.resource_guard.check()
2191    }
2192
2193    /// Check resources with logging.
2194    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195        let level = self.resource_guard.check()?;
2196
2197        if level != DegradationLevel::Normal {
2198            warn!(
2199                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200                phase,
2201                level,
2202                self.resource_guard.current_memory_mb(),
2203                self.resource_guard.available_disk_mb()
2204            );
2205        }
2206
2207        Ok(level)
2208    }
2209
2210    /// Get current degradation actions based on resource state.
2211    fn get_degradation_actions(&self) -> DegradationActions {
2212        self.resource_guard.get_actions()
2213    }
2214
2215    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2216    fn check_memory_limit(&self) -> SynthResult<()> {
2217        self.check_resources()?;
2218        Ok(())
2219    }
2220
2221    /// Run the complete generation workflow.
2222    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223        info!("Starting enhanced generation workflow");
2224        info!(
2225            "Config: industry={:?}, period_months={}, companies={}",
2226            self.config.global.industry,
2227            self.config.global.period_months,
2228            self.config.companies.len()
2229        );
2230
2231        // Set decimal serialization mode (thread-local, affects JSON output).
2232        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2233        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234        datasynth_core::serde_decimal::set_numeric_native(is_native);
2235        struct NumericModeGuard;
2236        impl Drop for NumericModeGuard {
2237            fn drop(&mut self) {
2238                datasynth_core::serde_decimal::set_numeric_native(false);
2239            }
2240        }
2241        let _numeric_guard = if is_native {
2242            Some(NumericModeGuard)
2243        } else {
2244            None
2245        };
2246
2247        // Initial resource check before starting
2248        let initial_level = self.check_resources_with_log("initial")?;
2249        if initial_level == DegradationLevel::Emergency {
2250            return Err(SynthError::resource(
2251                "Insufficient resources to start generation",
2252            ));
2253        }
2254
2255        let mut stats = EnhancedGenerationStatistics {
2256            companies_count: self.config.companies.len(),
2257            period_months: self.config.global.period_months,
2258            ..Default::default()
2259        };
2260
2261        // Phase 1: Chart of Accounts
2262        let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264        // Phase 2: Master Data
2265        self.phase_master_data(&mut stats)?;
2266
2267        // Emit master data to stream sink
2268        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272        // Phase 3: Document Flows + Subledger Linking
2273        let (mut document_flows, mut subledger, fa_journal_entries) =
2274            self.phase_document_flows(&mut stats)?;
2275
2276        // Emit document flows to stream sink
2277        self.emit_phase_items(
2278            "document_flows",
2279            "PurchaseOrder",
2280            &document_flows.purchase_orders,
2281        );
2282        self.emit_phase_items(
2283            "document_flows",
2284            "GoodsReceipt",
2285            &document_flows.goods_receipts,
2286        );
2287        self.emit_phase_items(
2288            "document_flows",
2289            "VendorInvoice",
2290            &document_flows.vendor_invoices,
2291        );
2292        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295        // Phase 3b: Opening Balances (before JE generation)
2296        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298        // Phase 3c: Convert opening balances to journal entries and prepend them.
2299        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2300        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2301        // balance map type.
2302        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303            .iter()
2304            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305            .collect();
2306        if !opening_balance_jes.is_empty() {
2307            debug!(
2308                "Prepending {} opening balance JEs to entries",
2309                opening_balance_jes.len()
2310            );
2311        }
2312
2313        // Phase 4: Journal Entries
2314        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2317        // starts from the correct initial state.
2318        if !opening_balance_jes.is_empty() {
2319            let mut combined = opening_balance_jes;
2320            combined.extend(entries);
2321            entries = combined;
2322        }
2323
2324        // Phase 4c: Append FA acquisition journal entries to main entries
2325        if !fa_journal_entries.is_empty() {
2326            debug!(
2327                "Appending {} FA acquisition JEs to main entries",
2328                fa_journal_entries.len()
2329            );
2330            entries.extend(fa_journal_entries);
2331        }
2332
2333        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2334        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336        // Get current degradation actions for optional phases
2337        let actions = self.get_degradation_actions();
2338
2339        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2340        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2343        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2344        if !sourcing.contracts.is_empty() {
2345            let mut linked_count = 0usize;
2346            // Collect (vendor_id, po_id) pairs from P2P chains
2347            let po_vendor_pairs: Vec<(String, String)> = document_flows
2348                .p2p_chains
2349                .iter()
2350                .map(|chain| {
2351                    (
2352                        chain.purchase_order.vendor_id.clone(),
2353                        chain.purchase_order.header.document_id.clone(),
2354                    )
2355                })
2356                .collect();
2357
2358            for chain in &mut document_flows.p2p_chains {
2359                if chain.purchase_order.contract_id.is_none() {
2360                    if let Some(contract) = sourcing
2361                        .contracts
2362                        .iter()
2363                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364                    {
2365                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366                        linked_count += 1;
2367                    }
2368                }
2369            }
2370
2371            // Populate reverse FK: purchase_order_ids on each contract
2372            for contract in &mut sourcing.contracts {
2373                let po_ids: Vec<String> = po_vendor_pairs
2374                    .iter()
2375                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376                    .map(|(_, po_id)| po_id.clone())
2377                    .collect();
2378                if !po_ids.is_empty() {
2379                    contract.purchase_order_ids = po_ids;
2380                }
2381            }
2382
2383            if linked_count > 0 {
2384                debug!(
2385                    "Linked {} purchase orders to S2C contracts by vendor match",
2386                    linked_count
2387                );
2388            }
2389        }
2390
2391        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2392        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394        // Phase 5c: Append IC journal entries to main entries
2395        if !intercompany.seller_journal_entries.is_empty()
2396            || !intercompany.buyer_journal_entries.is_empty()
2397        {
2398            let ic_je_count = intercompany.seller_journal_entries.len()
2399                + intercompany.buyer_journal_entries.len();
2400            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402            debug!(
2403                "Appended {} IC journal entries to main entries",
2404                ic_je_count
2405            );
2406        }
2407
2408        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2409        if !intercompany.elimination_entries.is_empty() {
2410            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411                &intercompany.elimination_entries,
2412            );
2413            if !elim_jes.is_empty() {
2414                debug!(
2415                    "Appended {} elimination journal entries to main entries",
2416                    elim_jes.len()
2417                );
2418                // IC elimination net-zero assertion (v2.5 hardening)
2419                let elim_debit: rust_decimal::Decimal =
2420                    elim_jes.iter().map(|je| je.total_debit()).sum();
2421                let elim_credit: rust_decimal::Decimal =
2422                    elim_jes.iter().map(|je| je.total_credit()).sum();
2423                let elim_diff = (elim_debit - elim_credit).abs();
2424                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2425                if elim_diff > tolerance {
2426                    return Err(datasynth_core::error::SynthError::generation(format!(
2427                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428                        elim_debit, elim_credit, elim_diff, tolerance
2429                    )));
2430                }
2431                debug!(
2432                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2433                    elim_debit, elim_credit, elim_diff
2434                );
2435                entries.extend(elim_jes);
2436            }
2437        }
2438
2439        // Phase 5e: Wire IC source documents into document flow snapshot
2440        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442                document_flows
2443                    .customer_invoices
2444                    .extend(ic_docs.seller_invoices.iter().cloned());
2445                document_flows
2446                    .purchase_orders
2447                    .extend(ic_docs.buyer_orders.iter().cloned());
2448                document_flows
2449                    .goods_receipts
2450                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451                document_flows
2452                    .vendor_invoices
2453                    .extend(ic_docs.buyer_invoices.iter().cloned());
2454                debug!(
2455                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456                    ic_docs.seller_invoices.len(),
2457                    ic_docs.buyer_orders.len(),
2458                    ic_docs.buyer_goods_receipts.len(),
2459                    ic_docs.buyer_invoices.len(),
2460                );
2461            }
2462        }
2463
2464        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2465        let hr = self.phase_hr_data(&mut stats)?;
2466
2467        // Phase 6b: Generate JEs from payroll runs
2468        if !hr.payroll_runs.is_empty() {
2469            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471            entries.extend(payroll_jes);
2472        }
2473
2474        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2475        if !hr.pension_journal_entries.is_empty() {
2476            debug!(
2477                "Generated {} JEs from pension plans",
2478                hr.pension_journal_entries.len()
2479            );
2480            entries.extend(hr.pension_journal_entries.iter().cloned());
2481        }
2482
2483        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2484        if !hr.stock_comp_journal_entries.is_empty() {
2485            debug!(
2486                "Generated {} JEs from stock-based compensation",
2487                hr.stock_comp_journal_entries.len()
2488            );
2489            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490        }
2491
2492        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2493        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2496        if !manufacturing_snap.production_orders.is_empty() {
2497            let currency = self
2498                .config
2499                .companies
2500                .first()
2501                .map(|c| c.currency.as_str())
2502                .unwrap_or("USD");
2503            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504                &manufacturing_snap.production_orders,
2505                &manufacturing_snap.quality_inspections,
2506                currency,
2507            );
2508            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509            entries.extend(mfg_jes);
2510        }
2511
2512        // Phase 7a-warranty: Generate warranty provisions per company
2513        if !manufacturing_snap.quality_inspections.is_empty() {
2514            let framework = match self.config.accounting_standards.framework {
2515                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516                _ => "US_GAAP",
2517            };
2518            for company in &self.config.companies {
2519                let company_orders: Vec<_> = manufacturing_snap
2520                    .production_orders
2521                    .iter()
2522                    .filter(|o| o.company_code == company.code)
2523                    .cloned()
2524                    .collect();
2525                let company_inspections: Vec<_> = manufacturing_snap
2526                    .quality_inspections
2527                    .iter()
2528                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529                    .cloned()
2530                    .collect();
2531                if company_inspections.is_empty() {
2532                    continue;
2533                }
2534                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535                let warranty_result = warranty_gen.generate(
2536                    &company.code,
2537                    &company_orders,
2538                    &company_inspections,
2539                    &company.currency,
2540                    framework,
2541                );
2542                if !warranty_result.journal_entries.is_empty() {
2543                    debug!(
2544                        "Generated {} warranty provision JEs for {}",
2545                        warranty_result.journal_entries.len(),
2546                        company.code
2547                    );
2548                    entries.extend(warranty_result.journal_entries);
2549                }
2550            }
2551        }
2552
2553        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2554        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555        {
2556            let cogs_currency = self
2557                .config
2558                .companies
2559                .first()
2560                .map(|c| c.currency.as_str())
2561                .unwrap_or("USD");
2562            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563                &document_flows.deliveries,
2564                &manufacturing_snap.production_orders,
2565                cogs_currency,
2566            );
2567            if !cogs_jes.is_empty() {
2568                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569                entries.extend(cogs_jes);
2570            }
2571        }
2572
2573        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2574        //
2575        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2576        // subledger inventory positions.  Here we reconcile them so that position balances
2577        // reflect the actual stock movements within the generation period.
2578        if !manufacturing_snap.inventory_movements.is_empty()
2579            && !subledger.inventory_positions.is_empty()
2580        {
2581            use datasynth_core::models::MovementType as MfgMovementType;
2582            let mut receipt_count = 0usize;
2583            let mut issue_count = 0usize;
2584            for movement in &manufacturing_snap.inventory_movements {
2585                // Find a matching position by material code and company
2586                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587                    p.material_id == movement.material_code
2588                        && p.company_code == movement.entity_code
2589                }) {
2590                    match movement.movement_type {
2591                        MfgMovementType::GoodsReceipt => {
2592                            // Increase stock and update weighted-average cost
2593                            pos.add_quantity(
2594                                movement.quantity,
2595                                movement.value,
2596                                movement.movement_date,
2597                            );
2598                            receipt_count += 1;
2599                        }
2600                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601                            // Decrease stock (best-effort; silently skip if insufficient)
2602                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603                            issue_count += 1;
2604                        }
2605                        _ => {}
2606                    }
2607                }
2608            }
2609            debug!(
2610                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611                manufacturing_snap.inventory_movements.len(),
2612                receipt_count,
2613                issue_count,
2614            );
2615        }
2616
2617        // Update final entry/line-item stats after all JE-generating phases
2618        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2619        if !entries.is_empty() {
2620            stats.total_entries = entries.len() as u64;
2621            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622            debug!(
2623                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624                stats.total_entries, stats.total_line_items
2625            );
2626        }
2627
2628        // Phase 7b: Apply internal controls to journal entries
2629        if self.config.internal_controls.enabled && !entries.is_empty() {
2630            info!("Phase 7b: Applying internal controls to journal entries");
2631            let control_config = ControlGeneratorConfig {
2632                exception_rate: self.config.internal_controls.exception_rate,
2633                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634                enable_sox_marking: true,
2635                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636                    self.config.internal_controls.sox_materiality_threshold,
2637                )
2638                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639                ..Default::default()
2640            };
2641            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642            for entry in &mut entries {
2643                control_gen.apply_controls(entry, &coa);
2644            }
2645            let with_controls = entries
2646                .iter()
2647                .filter(|e| !e.header.control_ids.is_empty())
2648                .count();
2649            info!(
2650                "Applied controls to {} entries ({} with control IDs assigned)",
2651                entries.len(),
2652                with_controls
2653            );
2654        }
2655
2656        // Phase 7c: Extract SoD violations from annotated journal entries.
2657        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2658        // Here we materialise those flags into standalone SodViolation records.
2659        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660            .iter()
2661            .filter(|e| e.header.sod_violation)
2662            .filter_map(|e| {
2663                e.header.sod_conflict_type.map(|ct| {
2664                    use datasynth_core::models::{RiskLevel, SodViolation};
2665                    let severity = match ct {
2666                        datasynth_core::models::SodConflictType::PaymentReleaser
2667                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2668                            RiskLevel::Critical
2669                        }
2670                        datasynth_core::models::SodConflictType::PreparerApprover
2671                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2673                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674                            RiskLevel::High
2675                        }
2676                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677                            RiskLevel::Medium
2678                        }
2679                    };
2680                    let action = format!(
2681                        "SoD conflict {:?} on entry {} ({})",
2682                        ct, e.header.document_id, e.header.company_code
2683                    );
2684                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685                })
2686            })
2687            .collect();
2688        if !sod_violations.is_empty() {
2689            info!(
2690                "Phase 7c: Extracted {} SoD violations from {} entries",
2691                sod_violations.len(),
2692                entries.len()
2693            );
2694        }
2695
2696        // Emit journal entries to stream sink (after all JE-generating phases)
2697        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2700        //
2701        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2702        // document-level fraud are exempt from subsequent line-level flag
2703        // overwrites, and so downstream consumers see a coherent picture.
2704        //
2705        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2706        {
2707            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708            if self.config.fraud.enabled && doc_rate > 0.0 {
2709                use datasynth_core::fraud_propagation::{
2710                    inject_document_fraud, propagate_documents_to_entries,
2711                };
2712                use datasynth_core::utils::weighted_select;
2713                use datasynth_core::FraudType;
2714                use rand_chacha::rand_core::SeedableRng;
2715
2716                let dist = &self.config.fraud.fraud_type_distribution;
2717                let fraud_type_weights: [(FraudType, f64); 8] = [
2718                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721                    (
2722                        FraudType::ImproperCapitalization,
2723                        dist.expense_capitalization,
2724                    ),
2725                    (FraudType::SplitTransaction, dist.split_transaction),
2726                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2727                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2729                ];
2730                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732                    if weights_sum <= 0.0 {
2733                        FraudType::FictitiousEntry
2734                    } else {
2735                        *weighted_select(rng, &fraud_type_weights)
2736                    }
2737                };
2738
2739                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740                let mut doc_tagged = 0usize;
2741                macro_rules! inject_into {
2742                    ($collection:expr) => {{
2743                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744                            $collection.iter_mut().map(|d| &mut d.header).collect();
2745                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746                    }};
2747                }
2748                inject_into!(document_flows.purchase_orders);
2749                inject_into!(document_flows.goods_receipts);
2750                inject_into!(document_flows.vendor_invoices);
2751                inject_into!(document_flows.payments);
2752                inject_into!(document_flows.sales_orders);
2753                inject_into!(document_flows.deliveries);
2754                inject_into!(document_flows.customer_invoices);
2755                if doc_tagged > 0 {
2756                    info!(
2757                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758                    );
2759                }
2760
2761                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763                        Vec::new();
2764                    headers.extend(
2765                        document_flows
2766                            .purchase_orders
2767                            .iter()
2768                            .map(|d| d.header.clone()),
2769                    );
2770                    headers.extend(
2771                        document_flows
2772                            .goods_receipts
2773                            .iter()
2774                            .map(|d| d.header.clone()),
2775                    );
2776                    headers.extend(
2777                        document_flows
2778                            .vendor_invoices
2779                            .iter()
2780                            .map(|d| d.header.clone()),
2781                    );
2782                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785                    headers.extend(
2786                        document_flows
2787                            .customer_invoices
2788                            .iter()
2789                            .map(|d| d.header.clone()),
2790                    );
2791                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792                    if propagated > 0 {
2793                        info!(
2794                            "Propagated document-level fraud to {propagated} derived journal entries"
2795                        );
2796                    }
2797                }
2798            }
2799        }
2800
2801        // Phase 8: Anomaly Injection (after all JE-generating phases)
2802        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2805        // through the anomaly injector.
2806        //
2807        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2808        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2809        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2810        //   - Any external mutation that sets is_fraud after the fact
2811        //
2812        // The anomaly injector already applies the same bias inline when it
2813        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2814        // so gating this sweep on `!is_anomaly` avoids double-application.
2815        //
2816        // Without this sweep, fraud entries from these paths show 0 lift on
2817        // the canonical forensic signals (is_round_1000, is_off_hours,
2818        // is_weekend, is_post_close), which is exactly what the SDK-side
2819        // evaluator caught in v3.1 — fraud features had worse lift than
2820        // baseline. See DS-3.1 post-deploy feedback.
2821        {
2822            use datasynth_core::fraud_bias::{
2823                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824            };
2825            use rand_chacha::rand_core::SeedableRng;
2826            let cfg = FraudBehavioralBiasConfig::default();
2827            if cfg.enabled {
2828                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829                let mut swept = 0usize;
2830                for entry in entries.iter_mut() {
2831                    if entry.header.is_fraud && !entry.header.is_anomaly {
2832                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833                        swept += 1;
2834                    }
2835                }
2836                if swept > 0 {
2837                    info!(
2838                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839                         (doc-propagated + je_generator intrinsic fraud)"
2840                    );
2841                }
2842            }
2843        }
2844
2845        // Emit anomaly labels to stream sink
2846        self.emit_phase_items(
2847            "anomaly_injection",
2848            "LabeledAnomaly",
2849            &anomaly_labels.labels,
2850        );
2851
2852        // Propagate fraud labels from journal entries to source documents.
2853        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2854        // instead of tracing through document_references.json.
2855        //
2856        // Gated by `fraud.propagate_to_document` (default true) — disable when
2857        // downstream consumers want document fraud flags to reflect only
2858        // document-level injection, not line-level.
2859        if self.config.fraud.propagate_to_document {
2860            use std::collections::HashMap;
2861            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2862            //
2863            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2864            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2865            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2866            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2867            // we register BOTH the prefixed form (raw reference) AND the bare form
2868            // (post-colon portion) in the map. Also register the JE's document_id
2869            // UUID so documents that set `journal_entry_id` match via that path.
2870            //
2871            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2872            // looked up "foo", silently producing 0 propagations.
2873            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874            for je in &entries {
2875                if je.header.is_fraud {
2876                    if let Some(ref fraud_type) = je.header.fraud_type {
2877                        if let Some(ref reference) = je.header.reference {
2878                            // Register the full reference ("GR:PO-2024-000001")
2879                            fraud_map.insert(reference.clone(), *fraud_type);
2880                            // Also register the bare document ID ("PO-2024-000001")
2881                            // by stripping the "PREFIX:" if present.
2882                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883                                if !bare.is_empty() {
2884                                    fraud_map.insert(bare.to_string(), *fraud_type);
2885                                }
2886                            }
2887                        }
2888                        // Also tag via journal_entry_id on document headers
2889                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890                    }
2891                }
2892            }
2893            if !fraud_map.is_empty() {
2894                let mut propagated = 0usize;
2895                // Use DocumentHeader::propagate_fraud method for each doc type
2896                macro_rules! propagate_to {
2897                    ($collection:expr) => {
2898                        for doc in &mut $collection {
2899                            if doc.header.propagate_fraud(&fraud_map) {
2900                                propagated += 1;
2901                            }
2902                        }
2903                    };
2904                }
2905                propagate_to!(document_flows.purchase_orders);
2906                propagate_to!(document_flows.goods_receipts);
2907                propagate_to!(document_flows.vendor_invoices);
2908                propagate_to!(document_flows.payments);
2909                propagate_to!(document_flows.sales_orders);
2910                propagate_to!(document_flows.deliveries);
2911                propagate_to!(document_flows.customer_invoices);
2912                if propagated > 0 {
2913                    info!(
2914                        "Propagated fraud labels to {} document flow records",
2915                        propagated
2916                    );
2917                }
2918            }
2919        }
2920
2921        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2922        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924        // Emit red flags to stream sink
2925        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927        // Phase 26b: Collusion Ring Generation (after red flags)
2928        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930        // Emit collusion rings to stream sink
2931        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2934        let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
2937        // chart of accounts. Soft warning by default; hard fail when the
2938        // user passes --validate-coa-coverage / sets the strict flag.
2939        self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941        // Phase 9b: GL-to-Subledger Reconciliation
2942        let subledger_reconciliation =
2943            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945        // Phase 10: Data Quality Injection
2946        let (data_quality_stats, quality_issues) =
2947            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2950        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2953        {
2954            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2955
2956            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2957            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2958            let mut unbalanced_clean = 0usize;
2959            for je in &entries {
2960                if je.header.is_fraud || je.header.is_anomaly {
2961                    continue;
2962                }
2963                let diff = (je.total_debit() - je.total_credit()).abs();
2964                if diff > tolerance {
2965                    unbalanced_clean += 1;
2966                    if unbalanced_clean <= 3 {
2967                        warn!(
2968                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969                            je.header.document_id,
2970                            je.total_debit(),
2971                            je.total_credit(),
2972                            diff
2973                        );
2974                    }
2975                }
2976            }
2977            if unbalanced_clean > 0 {
2978                return Err(datasynth_core::error::SynthError::generation(format!(
2979                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2980                     First few logged above. Tolerance={}",
2981                    unbalanced_clean, tolerance
2982                )));
2983            }
2984            debug!(
2985                "Phase 10c: All {} non-anomaly JEs individually balanced",
2986                entries
2987                    .iter()
2988                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989                    .count()
2990            );
2991
2992            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2993            let company_codes: Vec<String> = self
2994                .config
2995                .companies
2996                .iter()
2997                .map(|c| c.code.clone())
2998                .collect();
2999            for company_code in &company_codes {
3000                let mut assets = rust_decimal::Decimal::ZERO;
3001                let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003                for entry in &entries {
3004                    if entry.header.company_code != *company_code {
3005                        continue;
3006                    }
3007                    for line in &entry.lines {
3008                        let acct = &line.gl_account;
3009                        let net = line.debit_amount - line.credit_amount;
3010                        // Asset accounts (1xxx): normal debit balance
3011                        if acct.starts_with('1') {
3012                            assets += net;
3013                        }
3014                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3015                        else if acct.starts_with('2') || acct.starts_with('3') {
3016                            liab_equity -= net; // credit-normal, so negate debit-net
3017                        }
3018                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3019                        // so they net to zero after closing entries
3020                    }
3021                }
3022
3023                let bs_diff = (assets - liab_equity).abs();
3024                if bs_diff > tolerance {
3025                    warn!(
3026                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027                         revenue/expense closing entries may not fully offset",
3028                        company_code, assets, liab_equity, bs_diff
3029                    );
3030                    // Warn rather than error: multi-period datasets may have timing
3031                    // differences from accruals/deferrals that resolve in later periods.
3032                    // The TB footing check (Assert 1) is the hard gate.
3033                } else {
3034                    debug!(
3035                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036                        company_code, assets, liab_equity, bs_diff
3037                    );
3038                }
3039            }
3040
3041            info!("Phase 10c: All generation-time accounting assertions passed");
3042        }
3043
3044        // Phase 11: Audit Data
3045        let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047        // Phase 12: Banking KYC/AML Data
3048        let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3051        // Creates coherence between the accounting layer (payments, JEs) and the
3052        // banking layer (bank transactions). A vendor invoice payment now appears
3053        // on both sides with cross-references and fraud labels propagated.
3054        if self.phase_config.generate_banking
3055            && !document_flows.payments.is_empty()
3056            && !banking.accounts.is_empty()
3057        {
3058            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059            if bridge_rate > 0.0 {
3060                let mut bridge =
3061                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062                        self.seed,
3063                    );
3064                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065                    &document_flows.payments,
3066                    &banking.customers,
3067                    &banking.accounts,
3068                    bridge_rate,
3069                );
3070                info!(
3071                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072                    bridge_stats.bridged_count,
3073                    bridge_stats.transactions_emitted,
3074                    bridge_stats.fraud_propagated,
3075                );
3076                let bridged_count = bridged_txns.len();
3077                banking.transactions.extend(bridged_txns);
3078
3079                // Re-run velocity computation so bridged txns also get features
3080                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3081                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083                        &mut banking.transactions,
3084                    );
3085                }
3086
3087                // Recompute suspicious count after bridging
3088                banking.suspicious_count = banking
3089                    .transactions
3090                    .iter()
3091                    .filter(|t| t.is_suspicious)
3092                    .count();
3093                stats.banking_transaction_count = banking.transactions.len();
3094                stats.banking_suspicious_count = banking.suspicious_count;
3095            }
3096        }
3097
3098        // Phase 13: Graph Export
3099        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101        // Phase 14: LLM Enrichment
3102        self.phase_llm_enrichment(&mut stats);
3103
3104        // Phase 15: Diffusion Enhancement
3105        self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107        // Phase 16: Causal Overlay
3108        self.phase_causal_overlay(&mut stats);
3109
3110        // Phase 17: Bank Reconciliation + Financial Statements
3111        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3112        // provision data (from accounting_standards / tax snapshots) can be wired in.
3113        let mut financial_reporting = self.phase_financial_reporting(
3114            &document_flows,
3115            &entries,
3116            &coa,
3117            &hr,
3118            &audit,
3119            &mut stats,
3120        )?;
3121
3122        // BS coherence check: assets = liabilities + equity
3123        {
3124            use datasynth_core::models::StatementType;
3125            for stmt in &financial_reporting.consolidated_statements {
3126                if stmt.statement_type == StatementType::BalanceSheet {
3127                    let total_assets: rust_decimal::Decimal = stmt
3128                        .line_items
3129                        .iter()
3130                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131                        .map(|li| li.amount)
3132                        .sum();
3133                    let total_le: rust_decimal::Decimal = stmt
3134                        .line_items
3135                        .iter()
3136                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137                        .map(|li| li.amount)
3138                        .sum();
3139                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140                        warn!(
3141                            "BS equation imbalance: assets={}, L+E={}",
3142                            total_assets, total_le
3143                        );
3144                    }
3145                }
3146            }
3147        }
3148
3149        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3150        let accounting_standards =
3151            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153        // Phase 18a: Merge ECL journal entries into main GL
3154        if !accounting_standards.ecl_journal_entries.is_empty() {
3155            debug!(
3156                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157                accounting_standards.ecl_journal_entries.len()
3158            );
3159            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160        }
3161
3162        // Phase 18a: Merge provision journal entries into main GL
3163        if !accounting_standards.provision_journal_entries.is_empty() {
3164            debug!(
3165                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166                accounting_standards.provision_journal_entries.len()
3167            );
3168            entries.extend(
3169                accounting_standards
3170                    .provision_journal_entries
3171                    .iter()
3172                    .cloned(),
3173            );
3174        }
3175
3176        // Phase 18b: OCPM Events (after all process data is available)
3177        let mut ocpm = self.phase_ocpm_events(
3178            &document_flows,
3179            &sourcing,
3180            &hr,
3181            &manufacturing_snap,
3182            &banking,
3183            &audit,
3184            &financial_reporting,
3185            &mut stats,
3186        )?;
3187
3188        // Emit OCPM events to stream sink
3189        if let Some(ref event_log) = ocpm.event_log {
3190            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191        }
3192
3193        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3194        if let Some(ref event_log) = ocpm.event_log {
3195            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3196            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197                std::collections::HashMap::new();
3198            for (idx, event) in event_log.events.iter().enumerate() {
3199                if let Some(ref doc_ref) = event.document_ref {
3200                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201                }
3202            }
3203
3204            if !doc_index.is_empty() {
3205                let mut annotated = 0usize;
3206                for entry in &mut entries {
3207                    let doc_id_str = entry.header.document_id.to_string();
3208                    // Collect matching event indices from document_id and reference
3209                    let mut matched_indices: Vec<usize> = Vec::new();
3210                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211                        matched_indices.extend(indices);
3212                    }
3213                    if let Some(ref reference) = entry.header.reference {
3214                        let bare_ref = reference
3215                            .find(':')
3216                            .map(|i| &reference[i + 1..])
3217                            .unwrap_or(reference.as_str());
3218                        if let Some(indices) = doc_index.get(bare_ref) {
3219                            for &idx in indices {
3220                                if !matched_indices.contains(&idx) {
3221                                    matched_indices.push(idx);
3222                                }
3223                            }
3224                        }
3225                    }
3226                    // Apply matches to JE header
3227                    if !matched_indices.is_empty() {
3228                        for &idx in &matched_indices {
3229                            let event = &event_log.events[idx];
3230                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231                                entry.header.ocpm_event_ids.push(event.event_id);
3232                            }
3233                            for obj_ref in &event.object_refs {
3234                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236                                }
3237                            }
3238                            if entry.header.ocpm_case_id.is_none() {
3239                                entry.header.ocpm_case_id = event.case_id;
3240                            }
3241                        }
3242                        annotated += 1;
3243                    }
3244                }
3245                debug!(
3246                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247                    annotated
3248                );
3249            }
3250        }
3251
3252        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3253        // IC eliminations, opening balances, standards-driven entries) so
3254        // every JournalEntry carries at least one `ocpm_event_ids` link.
3255        if let Some(ref mut event_log) = ocpm.event_log {
3256            let synthesized =
3257                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258            if synthesized > 0 {
3259                info!(
3260                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261                );
3262            }
3263
3264            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3265            // events and their owning CaseTrace. Without this, every exported
3266            // OCEL event has `is_anomaly = false` even when the underlying JE
3267            // was flagged.
3268            let anomaly_events =
3269                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270            if anomaly_events > 0 {
3271                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272            }
3273
3274            // Phase 18f: Inject process-variant imperfections (rework, skipped
3275            // steps, out-of-order events) so conformance checkers see
3276            // realistic variant counts and fitness < 1.0. Uses the P2P
3277            // process rates as the single source of truth.
3278            let p2p_cfg = &self.config.ocpm.p2p_process;
3279            let any_imperfection = p2p_cfg.rework_probability > 0.0
3280                || p2p_cfg.skip_step_probability > 0.0
3281                || p2p_cfg.out_of_order_probability > 0.0;
3282            if any_imperfection {
3283                use rand_chacha::rand_core::SeedableRng;
3284                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285                    rework_rate: p2p_cfg.rework_probability,
3286                    skip_rate: p2p_cfg.skip_step_probability,
3287                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3288                };
3289                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290                let stats =
3291                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293                    info!(
3294                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295                        stats.rework, stats.skipped, stats.out_of_order
3296                    );
3297                }
3298            }
3299        }
3300
3301        // Phase 19: Sales Quotes, Management KPIs, Budgets
3302        let sales_kpi_budgets =
3303            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305        // Phase 22: Treasury Data Generation
3306        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3307        // are included in the pre-tax income used by phase_tax_generation.
3308        let treasury =
3309            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3312        if !treasury.journal_entries.is_empty() {
3313            debug!(
3314                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315                treasury.journal_entries.len()
3316            );
3317            entries.extend(treasury.journal_entries.iter().cloned());
3318        }
3319
3320        // Phase 20: Tax Generation
3321        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323        // Phase 20 JEs: Merge tax posting journal entries into main GL
3324        if !tax.tax_posting_journal_entries.is_empty() {
3325            debug!(
3326                "Merging {} tax posting JEs into GL",
3327                tax.tax_posting_journal_entries.len()
3328            );
3329            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330        }
3331
3332        // Phase 20b: FINAL fraud behavioral bias sweep.
3333        //
3334        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3335        // period close) extend `entries` with new journal entries that may
3336        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3337        // already-fraudulent transactions). Those late additions miss the
3338        // Phase 8b sweep and ship without bias applied — which is exactly
3339        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3340        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3341        //
3342        // Running the sweep one more time here guarantees every is_fraud
3343        // entry — regardless of which phase added it — has bias applied.
3344        // `!is_anomaly` gates out anomaly-injector entries (which already
3345        // got biased inline); the sweep is otherwise idempotent-ish:
3346        // weekend / off_hours re-fire to another valid weekend / off-hour,
3347        // post_close is guarded by `!is_post_close`, and round-dollar
3348        // rescaling on an already-round amount is a no-op (ratio = 1).
3349        {
3350            use datasynth_core::fraud_bias::{
3351                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352            };
3353            use rand_chacha::rand_core::SeedableRng;
3354            let cfg = FraudBehavioralBiasConfig::default();
3355            if cfg.enabled {
3356                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357                let mut swept = 0usize;
3358                for entry in entries.iter_mut() {
3359                    if entry.header.is_fraud && !entry.header.is_anomaly {
3360                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361                        swept += 1;
3362                    }
3363                }
3364                if swept > 0 {
3365                    info!(
3366                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367                         non-anomaly fraud entries (covers late-added JEs from \
3368                         ECL / provisions / treasury / tax / period-close)"
3369                    );
3370                }
3371            }
3372        }
3373
3374        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3375        // Build supplementary cash flow items from upstream JE data (depreciation,
3376        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3377        {
3378            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380            let framework_str = {
3381                use datasynth_config::schema::AccountingFrameworkConfig;
3382                match self
3383                    .config
3384                    .accounting_standards
3385                    .framework
3386                    .unwrap_or_default()
3387                {
3388                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389                        "IFRS"
3390                    }
3391                    _ => "US_GAAP",
3392                }
3393            };
3394
3395            // Sum depreciation debits (account 6000) from close JEs
3396            let depreciation_total: rust_decimal::Decimal = entries
3397                .iter()
3398                .filter(|je| je.header.document_type == "CL")
3399                .flat_map(|je| je.lines.iter())
3400                .filter(|l| l.gl_account.starts_with("6000"))
3401                .map(|l| l.debit_amount)
3402                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404            // Sum interest expense debits (account 7100)
3405            let interest_paid: rust_decimal::Decimal = entries
3406                .iter()
3407                .flat_map(|je| je.lines.iter())
3408                .filter(|l| l.gl_account.starts_with("7100"))
3409                .map(|l| l.debit_amount)
3410                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412            // Sum tax expense debits (account 8000)
3413            let tax_paid: rust_decimal::Decimal = entries
3414                .iter()
3415                .flat_map(|je| je.lines.iter())
3416                .filter(|l| l.gl_account.starts_with("8000"))
3417                .map(|l| l.debit_amount)
3418                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420            // Sum capex debits on fixed assets (account 1500)
3421            let capex: rust_decimal::Decimal = entries
3422                .iter()
3423                .flat_map(|je| je.lines.iter())
3424                .filter(|l| l.gl_account.starts_with("1500"))
3425                .map(|l| l.debit_amount)
3426                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3429            let dividends_paid: rust_decimal::Decimal = entries
3430                .iter()
3431                .flat_map(|je| je.lines.iter())
3432                .filter(|l| l.gl_account == "2170")
3433                .map(|l| l.debit_amount)
3434                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436            let cf_data = CashFlowSourceData {
3437                depreciation_total,
3438                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3439                delta_ar: rust_decimal::Decimal::ZERO,
3440                delta_ap: rust_decimal::Decimal::ZERO,
3441                delta_inventory: rust_decimal::Decimal::ZERO,
3442                capex,
3443                debt_issuance: rust_decimal::Decimal::ZERO,
3444                debt_repayment: rust_decimal::Decimal::ZERO,
3445                interest_paid,
3446                tax_paid,
3447                dividends_paid,
3448                framework: framework_str.to_string(),
3449            };
3450
3451            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452            if !enhanced_cf_items.is_empty() {
3453                // Merge into ALL cash flow statements (standalone + consolidated)
3454                use datasynth_core::models::StatementType;
3455                let merge_count = enhanced_cf_items.len();
3456                for stmt in financial_reporting
3457                    .financial_statements
3458                    .iter_mut()
3459                    .chain(financial_reporting.consolidated_statements.iter_mut())
3460                    .chain(
3461                        financial_reporting
3462                            .standalone_statements
3463                            .values_mut()
3464                            .flat_map(|v| v.iter_mut()),
3465                    )
3466                {
3467                    if stmt.statement_type == StatementType::CashFlowStatement {
3468                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469                    }
3470                }
3471                info!(
3472                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3473                    merge_count
3474                );
3475            }
3476        }
3477
3478        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3479        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3480        self.generate_notes_to_financial_statements(
3481            &mut financial_reporting,
3482            &accounting_standards,
3483            &tax,
3484            &hr,
3485            &audit,
3486            &treasury,
3487        );
3488
3489        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3490        // When we have 2+ companies, derive segment data from actual journal entries
3491        // to complement or replace the FS-generator-based segments.
3492        if self.config.companies.len() >= 2 && !entries.is_empty() {
3493            let companies: Vec<(String, String)> = self
3494                .config
3495                .companies
3496                .iter()
3497                .map(|c| (c.code.clone(), c.name.clone()))
3498                .collect();
3499            let ic_elim: rust_decimal::Decimal =
3500                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502                .unwrap_or(NaiveDate::MIN);
3503            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504            let period_label = format!(
3505                "{}-{:02}",
3506                end_date.year(),
3507                (end_date - chrono::Days::new(1)).month()
3508            );
3509
3510            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511            let (je_segments, je_recon) =
3512                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513            if !je_segments.is_empty() {
3514                info!(
3515                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516                    je_segments.len(),
3517                    ic_elim,
3518                );
3519                // Replace if existing segment_reports were empty; otherwise supplement
3520                if financial_reporting.segment_reports.is_empty() {
3521                    financial_reporting.segment_reports = je_segments;
3522                    financial_reporting.segment_reconciliations = vec![je_recon];
3523                } else {
3524                    financial_reporting.segment_reports.extend(je_segments);
3525                    financial_reporting.segment_reconciliations.push(je_recon);
3526                }
3527            }
3528        }
3529
3530        // Phase 21: ESG Data Generation
3531        let esg_snap =
3532            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534        // Phase 23: Project Accounting Data Generation
3535        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537        // Phase 24: Process Evolution + Organizational Events
3538        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540        // Phase 24b: Disruption Events
3541        let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543        // Phase 27: Bi-Temporal Vendor Version Chains
3544        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546        // Phase 28: Entity Relationship Graph + Cross-Process Links
3547        let (entity_relationship_graph, cross_process_links) =
3548            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550        // Phase 29: Industry-specific GL accounts
3551        let industry_output = self.phase_industry_data(&mut stats);
3552
3553        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3554        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3557        //
3558        // The neural / hybrid diffusion path was a documented L2 stub
3559        // in v3.x; actual neural-network training requires ML
3560        // infrastructure (PyTorch / candle bindings, GPU access,
3561        // training loops) that was never wired through the
3562        // orchestrator. Rather than keep a silently-no-op block that
3563        // misleads users into thinking neural training happens, v4.0
3564        // acknowledges the config — exposing stats so downstream
3565        // tooling can see the request — but emits a clear warning
3566        // when a non-statistical backend is requested. The statistical
3567        // diffusion backend continues to run via
3568        // `phase_diffusion_enhancement`.
3569        //
3570        // Users who need real neural diffusion: track the roadmap item
3571        // in the v4.x backlog and consider contributing the backend
3572        // (the `DiffusionBackend` trait is the integration point).
3573        if self.config.diffusion.enabled
3574            && (self.config.diffusion.backend == "neural"
3575                || self.config.diffusion.backend == "hybrid")
3576        {
3577            let neural = &self.config.diffusion.neural;
3578            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579            stats.neural_hybrid_weight = Some(weight);
3580            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582            warn!(
3583                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584                 the neural/hybrid training path is not yet shipped. Config \
3585                 is captured in stats (weight={weight:.2}, strategy={}, \
3586                 columns={}) but no neural training runs. Statistical \
3587                 diffusion (backend='statistical') continues to work.",
3588                self.config.diffusion.backend,
3589                neural.hybrid_strategy,
3590                neural.neural_columns.len(),
3591            );
3592        }
3593
3594        // Phase 19b: Hypergraph Export (after all data is available)
3595        self.phase_hypergraph_export(
3596            &coa,
3597            &entries,
3598            &document_flows,
3599            &sourcing,
3600            &hr,
3601            &manufacturing_snap,
3602            &banking,
3603            &audit,
3604            &financial_reporting,
3605            &ocpm,
3606            &compliance_regulations,
3607            &mut stats,
3608        )?;
3609
3610        // Phase 10c: Additional graph builders (approval, entity, banking)
3611        // These run after all data is available since they need banking/IC data.
3612        if self.phase_config.generate_graph_export {
3613            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614        }
3615
3616        // Log informational messages for config sections not yet fully wired
3617        if self.config.streaming.enabled {
3618            info!("Note: streaming config is enabled but batch mode does not use it");
3619        }
3620        if self.config.vendor_network.enabled {
3621            debug!("Vendor network config available; relationship graph generation is partial");
3622        }
3623        if self.config.customer_segmentation.enabled {
3624            debug!("Customer segmentation config available; segment-aware generation is partial");
3625        }
3626
3627        // Log final resource statistics
3628        let resource_stats = self.resource_guard.stats();
3629        info!(
3630            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632            resource_stats.disk.estimated_bytes_written,
3633            resource_stats.degradation_level
3634        );
3635
3636        // Flush any remaining stream sink data
3637        if let Some(ref sink) = self.phase_sink {
3638            if let Err(e) = sink.flush() {
3639                warn!("Stream sink flush failed: {e}");
3640            }
3641        }
3642
3643        // Build data lineage graph
3644        let lineage = self.build_lineage_graph();
3645
3646        // Evaluate quality gates if enabled in config
3647        let gate_result = if self.config.quality_gates.enabled {
3648            let profile_name = &self.config.quality_gates.profile;
3649            match datasynth_eval::gates::get_profile(profile_name) {
3650                Some(profile) => {
3651                    // Build an evaluation populated with actual generation metrics.
3652                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654                    // Populate balance sheet evaluation from balance validation results
3655                    if balance_validation.validated {
3656                        eval.coherence.balance =
3657                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658                                equation_balanced: balance_validation.is_balanced,
3659                                max_imbalance: (balance_validation.total_debits
3660                                    - balance_validation.total_credits)
3661                                    .abs(),
3662                                periods_evaluated: 1,
3663                                periods_imbalanced: if balance_validation.is_balanced {
3664                                    0
3665                                } else {
3666                                    1
3667                                },
3668                                period_results: Vec::new(),
3669                                companies_evaluated: self.config.companies.len(),
3670                            });
3671                    }
3672
3673                    // Set coherence passes based on balance validation
3674                    eval.coherence.passes = balance_validation.is_balanced;
3675                    if !balance_validation.is_balanced {
3676                        eval.coherence
3677                            .failures
3678                            .push("Balance sheet equation not satisfied".to_string());
3679                    }
3680
3681                    // Set statistical score based on entry count (basic sanity)
3682                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683                    eval.statistical.passes = !entries.is_empty();
3684
3685                    // Set quality score from data quality stats
3686                    eval.quality.overall_score = 0.9; // Default high for generated data
3687                    eval.quality.passes = true;
3688
3689                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690                    info!(
3691                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692                        profile_name, result.gates_passed, result.gates_total, result.summary
3693                    );
3694                    Some(result)
3695                }
3696                None => {
3697                    warn!(
3698                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699                        profile_name
3700                    );
3701                    None
3702                }
3703            }
3704        } else {
3705            None
3706        };
3707
3708        // Generate internal controls if enabled
3709        let internal_controls = if self.config.internal_controls.enabled {
3710            InternalControl::standard_controls()
3711        } else {
3712            Vec::new()
3713        };
3714
3715        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3716        // phases (including fraud-bias sweep at Phase 20b) so derived
3717        // outputs reflect final data.
3718        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720        // v3.5.1: statistical validation over the final amount
3721        // distribution. Runs *after* all JE-adding phases so the report
3722        // reflects everything the user will see in the output. Returns
3723        // `None` unless `distributions.validation.enabled = true`.
3724        let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726        // v4.1.3+: interconnectivity snapshot — tier assignments,
3727        // value-segment labels, industry-specific metadata. Runs after
3728        // master data is settled so it can index stable IDs.
3729        let interconnectivity = self.phase_interconnectivity();
3730
3731        Ok(EnhancedGenerationResult {
3732            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733            master_data: std::mem::take(&mut self.master_data),
3734            document_flows,
3735            subledger,
3736            ocpm,
3737            audit,
3738            banking,
3739            graph_export,
3740            sourcing,
3741            financial_reporting,
3742            hr,
3743            accounting_standards,
3744            manufacturing: manufacturing_snap,
3745            sales_kpi_budgets,
3746            tax,
3747            esg: esg_snap,
3748            treasury,
3749            project_accounting,
3750            process_evolution,
3751            organizational_events,
3752            disruption_events,
3753            intercompany,
3754            journal_entries: entries,
3755            anomaly_labels,
3756            balance_validation,
3757            data_quality_stats,
3758            quality_issues,
3759            statistics: stats,
3760            lineage: Some(lineage),
3761            gate_result,
3762            internal_controls,
3763            sod_violations,
3764            opening_balances,
3765            subledger_reconciliation,
3766            counterfactual_pairs,
3767            red_flags,
3768            collusion_rings,
3769            temporal_vendor_chains,
3770            entity_relationship_graph,
3771            cross_process_links,
3772            industry_output,
3773            compliance_regulations,
3774            analytics_metadata,
3775            statistical_validation,
3776            interconnectivity,
3777        })
3778    }
3779
3780    /// v4.1.3+: populate the interconnectivity snapshot from
3781    /// previously-inert schema sections. Empty when all sections are
3782    /// disabled.
3783    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784        use rand::{RngExt, SeedableRng};
3785        use rand_chacha::ChaCha8Rng;
3786
3787        let mut snap = InterconnectivitySnapshot::default();
3788        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790        // --- Vendor network ---
3791        let vn = &self.config.vendor_network;
3792        if vn.enabled {
3793            let total = self.master_data.vendors.len();
3794            if total > 0 {
3795                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796                let remaining_after_t1 = total.saturating_sub(tier1_count);
3797                let depth = vn.depth.clamp(1, 3);
3798                let tier2_count = if depth >= 2 {
3799                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800                    (tier1_count * avg).min(remaining_after_t1)
3801                } else {
3802                    0
3803                };
3804                let tier3_count = total
3805                    .saturating_sub(tier1_count)
3806                    .saturating_sub(tier2_count);
3807
3808                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809                    let tier = if idx < tier1_count {
3810                        1
3811                    } else if idx < tier1_count + tier2_count {
3812                        2
3813                    } else {
3814                        3
3815                    };
3816                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818                    // Cluster assignment via configured ratios.
3819                    let cl = &vn.clusters;
3820                    let roll: f64 = rng.random();
3821                    let cluster = if roll < cl.reliable_strategic {
3822                        "reliable_strategic"
3823                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3824                        "standard_operational"
3825                    } else if roll
3826                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827                    {
3828                        "transactional"
3829                    } else {
3830                        "problematic"
3831                    };
3832                    snap.vendor_clusters
3833                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3834                }
3835                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3836            }
3837        }
3838
3839        // --- Customer segmentation ---
3840        let cs = &self.config.customer_segmentation;
3841        if cs.enabled {
3842            let seg = &cs.value_segments;
3843            for customer in &self.master_data.customers {
3844                let roll: f64 = rng.random();
3845                let value_segment = if roll < seg.enterprise.customer_share {
3846                    "enterprise"
3847                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848                    "mid_market"
3849                } else if roll
3850                    < seg.enterprise.customer_share
3851                        + seg.mid_market.customer_share
3852                        + seg.smb.customer_share
3853                {
3854                    "smb"
3855                } else {
3856                    "consumer"
3857                };
3858                snap.customer_value_segments
3859                    .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861                let roll2: f64 = rng.random();
3862                let life = &cs.lifecycle;
3863                let lifecycle = if roll2 < life.prospect_rate {
3864                    "prospect"
3865                } else if roll2 < life.prospect_rate + life.new_rate {
3866                    "new"
3867                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868                    "growth"
3869                } else if roll2
3870                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871                {
3872                    "mature"
3873                } else if roll2
3874                    < life.prospect_rate
3875                        + life.new_rate
3876                        + life.growth_rate
3877                        + life.mature_rate
3878                        + life.at_risk_rate
3879                {
3880                    "at_risk"
3881                } else if roll2
3882                    < life.prospect_rate
3883                        + life.new_rate
3884                        + life.growth_rate
3885                        + life.mature_rate
3886                        + life.at_risk_rate
3887                        + life.churned_rate
3888                {
3889                    "churned"
3890                } else {
3891                    "won_back"
3892                };
3893                snap.customer_lifecycle_stages
3894                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3895            }
3896        }
3897
3898        // --- Industry-specific metadata (minimal) ---
3899        let is = &self.config.industry_specific;
3900        if is.enabled {
3901            snap.industry_metadata.push(format!(
3902                "industry_specific.enabled=true (industry={:?})",
3903                self.config.global.industry
3904            ));
3905        }
3906
3907        snap
3908    }
3909
3910    // ========================================================================
3911    // Generation Phase Methods
3912    // ========================================================================
3913
3914    /// Phase 1: Generate Chart of Accounts and update statistics.
3915    fn phase_chart_of_accounts(
3916        &mut self,
3917        stats: &mut EnhancedGenerationStatistics,
3918    ) -> SynthResult<Arc<ChartOfAccounts>> {
3919        info!("Phase 1: Generating Chart of Accounts");
3920        let coa = self.generate_coa()?;
3921        stats.accounts_count = coa.account_count();
3922        info!(
3923            "Chart of Accounts generated: {} accounts",
3924            stats.accounts_count
3925        );
3926        self.check_resources_with_log("post-coa")?;
3927        Ok(coa)
3928    }
3929
3930    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3931    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932        if self.phase_config.generate_master_data {
3933            info!("Phase 2: Generating Master Data");
3934            self.generate_master_data()?;
3935            stats.vendor_count = self.master_data.vendors.len();
3936            stats.customer_count = self.master_data.customers.len();
3937            stats.material_count = self.master_data.materials.len();
3938            stats.asset_count = self.master_data.assets.len();
3939            stats.employee_count = self.master_data.employees.len();
3940            info!(
3941                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942                stats.vendor_count, stats.customer_count, stats.material_count,
3943                stats.asset_count, stats.employee_count
3944            );
3945            self.check_resources_with_log("post-master-data")?;
3946        } else {
3947            debug!("Phase 2: Skipped (master data generation disabled)");
3948        }
3949        Ok(())
3950    }
3951
3952    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3953    fn phase_document_flows(
3954        &mut self,
3955        stats: &mut EnhancedGenerationStatistics,
3956    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957        let mut document_flows = DocumentFlowSnapshot::default();
3958        let mut subledger = SubledgerSnapshot::default();
3959        // Dunning JEs (interest + charges) accumulated here and merged into the
3960        // main FA-JE list below so they appear in the GL.
3961        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964            info!("Phase 3: Generating Document Flows");
3965            self.generate_document_flows(&mut document_flows)?;
3966            stats.p2p_chain_count = document_flows.p2p_chains.len();
3967            stats.o2c_chain_count = document_flows.o2c_chains.len();
3968            info!(
3969                "Document flows generated: {} P2P chains, {} O2C chains",
3970                stats.p2p_chain_count, stats.o2c_chain_count
3971            );
3972
3973            // Phase 3b: Link document flows to subledgers (for data coherence)
3974            debug!("Phase 3b: Linking document flows to subledgers");
3975            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976            stats.ap_invoice_count = subledger.ap_invoices.len();
3977            stats.ar_invoice_count = subledger.ar_invoices.len();
3978            debug!(
3979                "Subledgers linked: {} AP invoices, {} AR invoices",
3980                stats.ap_invoice_count, stats.ar_invoice_count
3981            );
3982
3983            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3984            // Without this step the subledger is systematically overstated because
3985            // amount_remaining is set at invoice creation and never reduced by
3986            // the payments that were generated in the document-flow phase.
3987            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990            debug!("Payment settlements applied to AP and AR subledgers");
3991
3992            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3993            // The as-of date is the last day of the configured period.
3994            if let Ok(start_date) =
3995                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996            {
3997                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998                    - chrono::Days::new(1);
3999                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4001                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4002                // derived from JE-level aggregation and will typically differ. This is a known
4003                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4004                // generated independently. A future reconciliation phase should align them by
4005                // using subledger totals as the authoritative source for BS Receivables.
4006                for company in &self.config.companies {
4007                    let ar_report = ARAgingReport::from_invoices(
4008                        company.code.clone(),
4009                        &subledger.ar_invoices,
4010                        as_of_date,
4011                    );
4012                    subledger.ar_aging_reports.push(ar_report);
4013
4014                    let ap_report = APAgingReport::from_invoices(
4015                        company.code.clone(),
4016                        &subledger.ap_invoices,
4017                        as_of_date,
4018                    );
4019                    subledger.ap_aging_reports.push(ap_report);
4020                }
4021                debug!(
4022                    "AR/AP aging reports built: {} AR, {} AP",
4023                    subledger.ar_aging_reports.len(),
4024                    subledger.ap_aging_reports.len()
4025                );
4026
4027                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4028                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029                {
4030                    use datasynth_generators::DunningGenerator;
4031                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032                    for company in &self.config.companies {
4033                        let currency = company.currency.as_str();
4034                        // Collect mutable references to AR invoices for this company
4035                        // (dunning generator updates dunning_info on invoices in-place).
4036                        let mut company_invoices: Vec<
4037                            datasynth_core::models::subledger::ar::ARInvoice,
4038                        > = subledger
4039                            .ar_invoices
4040                            .iter()
4041                            .filter(|inv| inv.company_code == company.code)
4042                            .cloned()
4043                            .collect();
4044
4045                        if company_invoices.is_empty() {
4046                            continue;
4047                        }
4048
4049                        let result = dunning_gen.execute_dunning_run(
4050                            &company.code,
4051                            as_of_date,
4052                            &mut company_invoices,
4053                            currency,
4054                        );
4055
4056                        // Write back updated dunning info to the main AR invoice list
4057                        for updated in &company_invoices {
4058                            if let Some(orig) = subledger
4059                                .ar_invoices
4060                                .iter_mut()
4061                                .find(|i| i.invoice_number == updated.invoice_number)
4062                            {
4063                                orig.dunning_info = updated.dunning_info.clone();
4064                            }
4065                        }
4066
4067                        subledger.dunning_runs.push(result.dunning_run);
4068                        subledger.dunning_letters.extend(result.letters);
4069                        // Dunning JEs (interest + charges) collected into local buffer.
4070                        dunning_journal_entries.extend(result.journal_entries);
4071                    }
4072                    debug!(
4073                        "Dunning runs complete: {} runs, {} letters",
4074                        subledger.dunning_runs.len(),
4075                        subledger.dunning_letters.len()
4076                    );
4077                }
4078            }
4079
4080            self.check_resources_with_log("post-document-flows")?;
4081        } else {
4082            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083        }
4084
4085        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4086        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087        if !self.master_data.assets.is_empty() {
4088            debug!("Generating FA subledger records");
4089            let company_code = self
4090                .config
4091                .companies
4092                .first()
4093                .map(|c| c.code.as_str())
4094                .unwrap_or("1000");
4095            let currency = self
4096                .config
4097                .companies
4098                .first()
4099                .map(|c| c.currency.as_str())
4100                .unwrap_or("USD");
4101
4102            let mut fa_gen = datasynth_generators::FAGenerator::new(
4103                datasynth_generators::FAGeneratorConfig::default(),
4104                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105            );
4106
4107            for asset in &self.master_data.assets {
4108                let (record, je) = fa_gen.generate_asset_acquisition(
4109                    company_code,
4110                    &format!("{:?}", asset.asset_class),
4111                    &asset.description,
4112                    asset.acquisition_date,
4113                    currency,
4114                    asset.cost_center.as_deref(),
4115                );
4116                subledger.fa_records.push(record);
4117                fa_journal_entries.push(je);
4118            }
4119
4120            stats.fa_subledger_count = subledger.fa_records.len();
4121            debug!(
4122                "FA subledger records generated: {} (with {} acquisition JEs)",
4123                stats.fa_subledger_count,
4124                fa_journal_entries.len()
4125            );
4126        }
4127
4128        // Generate Inventory subledger records from master data materials
4129        if !self.master_data.materials.is_empty() {
4130            debug!("Generating Inventory subledger records");
4131            let first_company = self.config.companies.first();
4132            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133            let inv_currency = first_company
4134                .map(|c| c.currency.clone())
4135                .unwrap_or_else(|| "USD".to_string());
4136
4137            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138                datasynth_generators::InventoryGeneratorConfig::default(),
4139                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140                inv_currency.clone(),
4141            );
4142
4143            for (i, material) in self.master_data.materials.iter().enumerate() {
4144                let plant = format!("PLANT{:02}", (i % 3) + 1);
4145                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146                let initial_qty = rust_decimal::Decimal::from(
4147                    material
4148                        .safety_stock
4149                        .to_string()
4150                        .parse::<i64>()
4151                        .unwrap_or(100),
4152                );
4153
4154                let position = inv_gen.generate_position(
4155                    company_code,
4156                    &plant,
4157                    &storage_loc,
4158                    &material.material_id,
4159                    &material.description,
4160                    initial_qty,
4161                    Some(material.standard_cost),
4162                    &inv_currency,
4163                );
4164                subledger.inventory_positions.push(position);
4165            }
4166
4167            stats.inventory_subledger_count = subledger.inventory_positions.len();
4168            debug!(
4169                "Inventory subledger records generated: {}",
4170                stats.inventory_subledger_count
4171            );
4172        }
4173
4174        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4175        if !subledger.fa_records.is_empty() {
4176            if let Ok(start_date) =
4177                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178            {
4179                let company_code = self
4180                    .config
4181                    .companies
4182                    .first()
4183                    .map(|c| c.code.as_str())
4184                    .unwrap_or("1000");
4185                let fiscal_year = start_date.year();
4186                let start_period = start_date.month();
4187                let end_period =
4188                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190                let depr_cfg = FaDepreciationScheduleConfig {
4191                    fiscal_year,
4192                    start_period,
4193                    end_period,
4194                    seed_offset: 800,
4195                };
4196                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198                let run_count = runs.len();
4199                subledger.depreciation_runs = runs;
4200                debug!(
4201                    "Depreciation runs generated: {} runs for {} periods",
4202                    run_count, self.config.global.period_months
4203                );
4204            }
4205        }
4206
4207        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4208        if !subledger.inventory_positions.is_empty() {
4209            if let Ok(start_date) =
4210                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211            {
4212                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213                    - chrono::Days::new(1);
4214
4215                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218                for company in &self.config.companies {
4219                    let result = inv_val_gen.generate(
4220                        &company.code,
4221                        &subledger.inventory_positions,
4222                        as_of_date,
4223                    );
4224                    subledger.inventory_valuations.push(result);
4225                }
4226                debug!(
4227                    "Inventory valuations generated: {} company reports",
4228                    subledger.inventory_valuations.len()
4229                );
4230            }
4231        }
4232
4233        Ok((document_flows, subledger, fa_journal_entries))
4234    }
4235
4236    /// Phase 3c: Generate OCPM events from document flows.
4237    #[allow(clippy::too_many_arguments)]
4238    fn phase_ocpm_events(
4239        &mut self,
4240        document_flows: &DocumentFlowSnapshot,
4241        sourcing: &SourcingSnapshot,
4242        hr: &HrSnapshot,
4243        manufacturing: &ManufacturingSnapshot,
4244        banking: &BankingSnapshot,
4245        audit: &AuditSnapshot,
4246        financial_reporting: &FinancialReportingSnapshot,
4247        stats: &mut EnhancedGenerationStatistics,
4248    ) -> SynthResult<OcpmSnapshot> {
4249        let degradation = self.check_resources()?;
4250        if degradation >= DegradationLevel::Reduced {
4251            debug!(
4252                "Phase skipped due to resource pressure (degradation: {:?})",
4253                degradation
4254            );
4255            return Ok(OcpmSnapshot::default());
4256        }
4257        if self.phase_config.generate_ocpm_events {
4258            info!("Phase 3c: Generating OCPM Events");
4259            let ocpm_snapshot = self.generate_ocpm_events(
4260                document_flows,
4261                sourcing,
4262                hr,
4263                manufacturing,
4264                banking,
4265                audit,
4266                financial_reporting,
4267            )?;
4268            stats.ocpm_event_count = ocpm_snapshot.event_count;
4269            stats.ocpm_object_count = ocpm_snapshot.object_count;
4270            stats.ocpm_case_count = ocpm_snapshot.case_count;
4271            info!(
4272                "OCPM events generated: {} events, {} objects, {} cases",
4273                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274            );
4275            self.check_resources_with_log("post-ocpm")?;
4276            Ok(ocpm_snapshot)
4277        } else {
4278            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279            Ok(OcpmSnapshot::default())
4280        }
4281    }
4282
4283    /// Phase 4: Generate journal entries from document flows and standalone generation.
4284    fn phase_journal_entries(
4285        &mut self,
4286        coa: &Arc<ChartOfAccounts>,
4287        document_flows: &DocumentFlowSnapshot,
4288        _stats: &mut EnhancedGenerationStatistics,
4289    ) -> SynthResult<Vec<JournalEntry>> {
4290        let mut entries = Vec::new();
4291
4292        // Phase 4a: Generate JEs from document flows (for data coherence)
4293        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294            debug!("Phase 4a: Generating JEs from document flows");
4295            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296            debug!("Generated {} JEs from document flows", flow_entries.len());
4297            entries.extend(flow_entries);
4298        }
4299
4300        // Phase 4b: Generate standalone journal entries
4301        if self.phase_config.generate_journal_entries {
4302            info!("Phase 4: Generating Journal Entries");
4303            let je_entries = self.generate_journal_entries(coa)?;
4304            info!("Generated {} standalone journal entries", je_entries.len());
4305            entries.extend(je_entries);
4306        } else {
4307            debug!("Phase 4: Skipped (journal entry generation disabled)");
4308        }
4309
4310        // Phase 4c (shard mode): inject pre-built IC journal entries from
4311        // `ShardContext`. When running standalone (no group engine), this
4312        // is a no-op. See crate::shard_context::ShardContext for rationale.
4313        if let Some(ctx) = &self.shard_context {
4314            if !ctx.extra_journal_entries.is_empty() {
4315                debug!(
4316                    "Phase 4c: appending {} shard-mode IC journal entries",
4317                    ctx.extra_journal_entries.len()
4318                );
4319                entries.extend(ctx.extra_journal_entries.iter().cloned());
4320            }
4321        }
4322
4323        if !entries.is_empty() {
4324            // Note: stats.total_entries/total_line_items are set in generate()
4325            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4326            self.check_resources_with_log("post-journal-entries")?;
4327        }
4328
4329        Ok(entries)
4330    }
4331
4332    /// Phase 5: Inject anomalies into journal entries.
4333    fn phase_anomaly_injection(
4334        &mut self,
4335        entries: &mut [JournalEntry],
4336        actions: &DegradationActions,
4337        stats: &mut EnhancedGenerationStatistics,
4338    ) -> SynthResult<AnomalyLabels> {
4339        if self.phase_config.inject_anomalies
4340            && !entries.is_empty()
4341            && !actions.skip_anomaly_injection
4342        {
4343            info!("Phase 5: Injecting Anomalies");
4344            let result = self.inject_anomalies(entries)?;
4345            stats.anomalies_injected = result.labels.len();
4346            info!("Injected {} anomalies", stats.anomalies_injected);
4347            self.check_resources_with_log("post-anomaly-injection")?;
4348            Ok(result)
4349        } else if actions.skip_anomaly_injection {
4350            warn!("Phase 5: Skipped due to resource degradation");
4351            Ok(AnomalyLabels::default())
4352        } else {
4353            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354            Ok(AnomalyLabels::default())
4355        }
4356    }
4357
4358    /// Phase 6: Validate balance sheet equation on journal entries.
4359    fn phase_balance_validation(
4360        &mut self,
4361        entries: &[JournalEntry],
4362    ) -> SynthResult<BalanceValidationResult> {
4363        if self.phase_config.validate_balances && !entries.is_empty() {
4364            debug!("Phase 6: Validating Balances");
4365            let balance_validation = self.validate_journal_entries(entries)?;
4366            if balance_validation.is_balanced {
4367                debug!("Balance validation passed");
4368            } else {
4369                warn!(
4370                    "Balance validation found {} errors",
4371                    balance_validation.validation_errors.len()
4372                );
4373            }
4374            Ok(balance_validation)
4375        } else {
4376            Ok(BalanceValidationResult::default())
4377        }
4378    }
4379
4380    /// Validate that every `gl_account` referenced in `entries` exists in the
4381    /// chart of accounts.
4382    ///
4383    /// Always emits a warn-level log when the COA is missing accounts; in
4384    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4385    /// `SynthError::generation` so the caller can fail fast.
4386    fn validate_coa_coverage(
4387        &self,
4388        entries: &[JournalEntry],
4389        coa: &ChartOfAccounts,
4390    ) -> SynthResult<()> {
4391        if entries.is_empty() {
4392            return Ok(());
4393        }
4394        let coa_set: std::collections::HashSet<&str> = coa
4395            .accounts
4396            .iter()
4397            .map(|a| a.account_number.as_str())
4398            .collect();
4399        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400        for je in entries {
4401            for line in je.lines.iter() {
4402                if !coa_set.contains(line.gl_account.as_str()) {
4403                    missing.insert(line.gl_account.clone());
4404                }
4405            }
4406        }
4407        if missing.is_empty() {
4408            debug!("COA coverage validation passed");
4409            return Ok(());
4410        }
4411        let msg = format!(
4412            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413            missing.len(),
4414            missing.iter().take(10).collect::<Vec<_>>()
4415        );
4416        if self.phase_config.validate_coa_coverage_strict {
4417            Err(SynthError::generation(msg))
4418        } else {
4419            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420            Ok(())
4421        }
4422    }
4423
4424    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4425    fn phase_data_quality_injection(
4426        &mut self,
4427        entries: &mut [JournalEntry],
4428        actions: &DegradationActions,
4429        stats: &mut EnhancedGenerationStatistics,
4430    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431        if self.phase_config.inject_data_quality
4432            && !entries.is_empty()
4433            && !actions.skip_data_quality
4434        {
4435            info!("Phase 7: Injecting Data Quality Variations");
4436            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437            stats.data_quality_issues = dq_stats.records_with_issues;
4438            info!("Injected {} data quality issues", stats.data_quality_issues);
4439            self.check_resources_with_log("post-data-quality")?;
4440            Ok((dq_stats, quality_issues))
4441        } else if actions.skip_data_quality {
4442            warn!("Phase 7: Skipped due to resource degradation");
4443            // v4.4.1: report the denominator (entries seen) even when
4444            // injection is skipped, so downstream consumers can tell
4445            // "skipped, 0/N" apart from "ran but found nothing".
4446            Ok((stats_with_denominator(entries.len()), Vec::new()))
4447        } else {
4448            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449            Ok((stats_with_denominator(entries.len()), Vec::new()))
4450        }
4451    }
4452
4453    /// Phase 10b: Generate period-close journal entries.
4454    ///
4455    /// Generates:
4456    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4457    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4458    ///    for the configured period.
4459    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4460    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4461    ///    earnings via the Income Summary (3600) clearing account.
4462    fn phase_period_close(
4463        &mut self,
4464        entries: &mut Vec<JournalEntry>,
4465        subledger: &SubledgerSnapshot,
4466        stats: &mut EnhancedGenerationStatistics,
4467    ) -> SynthResult<()> {
4468        if !self.phase_config.generate_period_close || entries.is_empty() {
4469            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470            return Ok(());
4471        }
4472
4473        info!("Phase 10b: Generating period-close journal entries");
4474
4475        use datasynth_core::accounts::{
4476            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477        };
4478        use rust_decimal::Decimal;
4479
4480        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483        // Posting date for close entries is the last day of the period
4484        let close_date = end_date - chrono::Days::new(1);
4485
4486        // Statutory tax rate (21% — configurable rates come in later tiers)
4487        let tax_rate = Decimal::new(21, 2); // 0.21
4488
4489        // Collect company codes from config
4490        let company_codes: Vec<String> = self
4491            .config
4492            .companies
4493            .iter()
4494            .map(|c| c.code.clone())
4495            .collect();
4496
4497        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4498        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501        // --- Depreciation JEs (per asset) ---
4502        // Compute period depreciation for each active fixed asset using straight-line method.
4503        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4504        let period_months = self.config.global.period_months;
4505        for asset in &subledger.fa_records {
4506            // Skip assets that are inactive / fully depreciated / non-depreciable
4507            use datasynth_core::models::subledger::fa::AssetStatus;
4508            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509                continue;
4510            }
4511            let useful_life_months = asset.useful_life_months();
4512            if useful_life_months == 0 {
4513                // Land or CIP — not depreciated
4514                continue;
4515            }
4516            let salvage_value = asset.salvage_value();
4517            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518            if depreciable_base == Decimal::ZERO {
4519                continue;
4520            }
4521            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522                * Decimal::from(period_months))
4523            .round_dp(2);
4524            if period_depr <= Decimal::ZERO {
4525                continue;
4526            }
4527
4528            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529            depr_header.document_type = "CL".to_string();
4530            depr_header.header_text = Some(format!(
4531                "Depreciation - {} {}",
4532                asset.asset_number, asset.description
4533            ));
4534            depr_header.created_by = "CLOSE_ENGINE".to_string();
4535            depr_header.source = TransactionSource::Automated;
4536            depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538            let doc_id = depr_header.document_id;
4539            let mut depr_je = JournalEntry::new(depr_header);
4540
4541            // DR Depreciation Expense (6000)
4542            depr_je.add_line(JournalEntryLine::debit(
4543                doc_id,
4544                1,
4545                expense_accounts::DEPRECIATION.to_string(),
4546                period_depr,
4547            ));
4548            // CR Accumulated Depreciation (1510)
4549            depr_je.add_line(JournalEntryLine::credit(
4550                doc_id,
4551                2,
4552                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553                period_depr,
4554            ));
4555
4556            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557            close_jes.push(depr_je);
4558        }
4559
4560        if !subledger.fa_records.is_empty() {
4561            debug!(
4562                "Generated {} depreciation JEs from {} FA records",
4563                close_jes.len(),
4564                subledger.fa_records.len()
4565            );
4566        }
4567
4568        // --- Accrual entries (standard period-end accruals per company) ---
4569        // Generate standard accrued expense entries (utilities, rent, interest) using
4570        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4571        {
4572            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574            // v3.4.3: snap reversal dates to business days. No-op when
4575            // temporal_patterns.business_days is disabled.
4576            if let Some(ctx) = &self.temporal_context {
4577                accrual_gen.set_temporal_context(Arc::clone(ctx));
4578            }
4579
4580            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4581            let accrual_items: &[(&str, &str, &str)] = &[
4582                ("Accrued Utilities", "6200", "2100"),
4583                ("Accrued Rent", "6300", "2100"),
4584                ("Accrued Interest", "6100", "2150"),
4585            ];
4586
4587            for company_code in &company_codes {
4588                // Estimate company revenue from existing JEs
4589                let company_revenue: Decimal = entries
4590                    .iter()
4591                    .filter(|e| e.header.company_code == *company_code)
4592                    .flat_map(|e| e.lines.iter())
4593                    .filter(|l| l.gl_account.starts_with('4'))
4594                    .map(|l| l.credit_amount - l.debit_amount)
4595                    .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597                if company_revenue <= Decimal::ZERO {
4598                    continue;
4599                }
4600
4601                // Use 0.5% of period revenue per accrual item as a proxy
4602                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603                if accrual_base <= Decimal::ZERO {
4604                    continue;
4605                }
4606
4607                for (description, expense_acct, liability_acct) in accrual_items {
4608                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609                        company_code,
4610                        description,
4611                        accrual_base,
4612                        expense_acct,
4613                        liability_acct,
4614                        close_date,
4615                        None,
4616                    );
4617                    close_jes.push(accrual_je);
4618                    if let Some(rev_je) = reversal_je {
4619                        close_jes.push(rev_je);
4620                    }
4621                }
4622            }
4623
4624            debug!(
4625                "Generated accrual entries for {} companies",
4626                company_codes.len()
4627            );
4628        }
4629
4630        for company_code in &company_codes {
4631            // Calculate net income for this company from existing JEs:
4632            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4633            // Revenue (4xxx): credit-normal, so net = credits - debits
4634            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4635            let mut total_revenue = Decimal::ZERO;
4636            let mut total_expenses = Decimal::ZERO;
4637
4638            for entry in entries.iter() {
4639                if entry.header.company_code != *company_code {
4640                    continue;
4641                }
4642                for line in &entry.lines {
4643                    let category = AccountCategory::from_account(&line.gl_account);
4644                    match category {
4645                        AccountCategory::Revenue => {
4646                            // Revenue is credit-normal: net revenue = credits - debits
4647                            total_revenue += line.credit_amount - line.debit_amount;
4648                        }
4649                        AccountCategory::Cogs
4650                        | AccountCategory::OperatingExpense
4651                        | AccountCategory::OtherIncomeExpense
4652                        | AccountCategory::Tax => {
4653                            // Expenses are debit-normal: net expense = debits - credits
4654                            total_expenses += line.debit_amount - line.credit_amount;
4655                        }
4656                        _ => {}
4657                    }
4658                }
4659            }
4660
4661            let pre_tax_income = total_revenue - total_expenses;
4662
4663            // Skip if no income statement activity
4664            if pre_tax_income == Decimal::ZERO {
4665                debug!(
4666                    "Company {}: no pre-tax income, skipping period close",
4667                    company_code
4668                );
4669                continue;
4670            }
4671
4672            // --- Tax provision / DTA JE ---
4673            if pre_tax_income > Decimal::ZERO {
4674                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4675                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678                tax_header.document_type = "CL".to_string();
4679                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680                tax_header.created_by = "CLOSE_ENGINE".to_string();
4681                tax_header.source = TransactionSource::Automated;
4682                tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684                let doc_id = tax_header.document_id;
4685                let mut tax_je = JournalEntry::new(tax_header);
4686
4687                // DR Tax Expense (8000)
4688                tax_je.add_line(JournalEntryLine::debit(
4689                    doc_id,
4690                    1,
4691                    tax_accounts::TAX_EXPENSE.to_string(),
4692                    tax_amount,
4693                ));
4694                // CR Income Tax Payable (2130)
4695                tax_je.add_line(JournalEntryLine::credit(
4696                    doc_id,
4697                    2,
4698                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699                    tax_amount,
4700                ));
4701
4702                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703                close_jes.push(tax_je);
4704            } else {
4705                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4706                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4707                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708                if dta_amount > Decimal::ZERO {
4709                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710                    dta_header.document_type = "CL".to_string();
4711                    dta_header.header_text =
4712                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4714                    dta_header.source = TransactionSource::Automated;
4715                    dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717                    let doc_id = dta_header.document_id;
4718                    let mut dta_je = JournalEntry::new(dta_header);
4719
4720                    // DR Deferred Tax Asset (1600)
4721                    dta_je.add_line(JournalEntryLine::debit(
4722                        doc_id,
4723                        1,
4724                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725                        dta_amount,
4726                    ));
4727                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4728                    // reflecting the benefit of the future deductible temporary difference.
4729                    dta_je.add_line(JournalEntryLine::credit(
4730                        doc_id,
4731                        2,
4732                        tax_accounts::TAX_EXPENSE.to_string(),
4733                        dta_amount,
4734                    ));
4735
4736                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737                    close_jes.push(dta_je);
4738                    debug!(
4739                        "Company {}: loss year — recognised DTA of {}",
4740                        company_code, dta_amount
4741                    );
4742                }
4743            }
4744
4745            // --- Dividend JEs (v2.4) ---
4746            // If the entity is profitable after tax, declare a 10% dividend payout.
4747            // This runs AFTER tax provision so the dividend is based on post-tax income
4748            // but BEFORE the retained earnings close so the RE transfer reflects the
4749            // reduced balance.
4750            let tax_provision = if pre_tax_income > Decimal::ZERO {
4751                (pre_tax_income * tax_rate).round_dp(2)
4752            } else {
4753                Decimal::ZERO
4754            };
4755            let net_income = pre_tax_income - tax_provision;
4756
4757            if net_income > Decimal::ZERO {
4758                use datasynth_generators::DividendGenerator;
4759                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4760                let mut div_gen = DividendGenerator::new(self.seed + 460);
4761                let currency_str = self
4762                    .config
4763                    .companies
4764                    .iter()
4765                    .find(|c| c.code == *company_code)
4766                    .map(|c| c.currency.as_str())
4767                    .unwrap_or("USD");
4768                let div_result = div_gen.generate(
4769                    company_code,
4770                    close_date,
4771                    Decimal::new(1, 0), // $1 per share placeholder
4772                    dividend_amount,
4773                    currency_str,
4774                );
4775                let div_je_count = div_result.journal_entries.len();
4776                close_jes.extend(div_result.journal_entries);
4777                debug!(
4778                    "Company {}: declared dividend of {} ({} JEs)",
4779                    company_code, dividend_amount, div_je_count
4780                );
4781            }
4782
4783            // --- Income statement closing JE ---
4784            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4785            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4786            // close the pre-tax loss into Retained Earnings as-is.
4787            if net_income != Decimal::ZERO {
4788                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789                close_header.document_type = "CL".to_string();
4790                close_header.header_text =
4791                    Some(format!("Income statement close - {}", company_code));
4792                close_header.created_by = "CLOSE_ENGINE".to_string();
4793                close_header.source = TransactionSource::Automated;
4794                close_header.business_process = Some(BusinessProcess::R2R);
4795
4796                let doc_id = close_header.document_id;
4797                let mut close_je = JournalEntry::new(close_header);
4798
4799                let abs_net_income = net_income.abs();
4800
4801                if net_income > Decimal::ZERO {
4802                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4803                    close_je.add_line(JournalEntryLine::debit(
4804                        doc_id,
4805                        1,
4806                        equity_accounts::INCOME_SUMMARY.to_string(),
4807                        abs_net_income,
4808                    ));
4809                    close_je.add_line(JournalEntryLine::credit(
4810                        doc_id,
4811                        2,
4812                        equity_accounts::RETAINED_EARNINGS.to_string(),
4813                        abs_net_income,
4814                    ));
4815                } else {
4816                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4817                    close_je.add_line(JournalEntryLine::debit(
4818                        doc_id,
4819                        1,
4820                        equity_accounts::RETAINED_EARNINGS.to_string(),
4821                        abs_net_income,
4822                    ));
4823                    close_je.add_line(JournalEntryLine::credit(
4824                        doc_id,
4825                        2,
4826                        equity_accounts::INCOME_SUMMARY.to_string(),
4827                        abs_net_income,
4828                    ));
4829                }
4830
4831                debug_assert!(
4832                    close_je.is_balanced(),
4833                    "Income statement closing JE must be balanced"
4834                );
4835                close_jes.push(close_je);
4836            }
4837        }
4838
4839        let close_count = close_jes.len();
4840        if close_count > 0 {
4841            info!("Generated {} period-close journal entries", close_count);
4842            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843            entries.extend(close_jes);
4844            stats.period_close_je_count = close_count;
4845
4846            // Update total entry/line-item stats
4847            stats.total_entries = entries.len() as u64;
4848            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849        } else {
4850            debug!("No period-close entries generated (no income statement activity)");
4851        }
4852
4853        Ok(())
4854    }
4855
4856    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4857    fn phase_audit_data(
4858        &mut self,
4859        entries: &[JournalEntry],
4860        stats: &mut EnhancedGenerationStatistics,
4861    ) -> SynthResult<AuditSnapshot> {
4862        if self.phase_config.generate_audit {
4863            info!("Phase 8: Generating Audit Data");
4864            let audit_snapshot = self.generate_audit_data(entries)?;
4865            stats.audit_engagement_count = audit_snapshot.engagements.len();
4866            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867            stats.audit_evidence_count = audit_snapshot.evidence.len();
4868            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869            stats.audit_finding_count = audit_snapshot.findings.len();
4870            stats.audit_judgment_count = audit_snapshot.judgments.len();
4871            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874            stats.audit_sample_count = audit_snapshot.samples.len();
4875            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879            stats.audit_related_party_transaction_count =
4880                audit_snapshot.related_party_transactions.len();
4881            info!(
4882                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885                 {} RP transactions",
4886                stats.audit_engagement_count,
4887                stats.audit_workpaper_count,
4888                stats.audit_evidence_count,
4889                stats.audit_risk_count,
4890                stats.audit_finding_count,
4891                stats.audit_judgment_count,
4892                stats.audit_confirmation_count,
4893                stats.audit_procedure_step_count,
4894                stats.audit_sample_count,
4895                stats.audit_analytical_result_count,
4896                stats.audit_ia_function_count,
4897                stats.audit_ia_report_count,
4898                stats.audit_related_party_count,
4899                stats.audit_related_party_transaction_count,
4900            );
4901            self.check_resources_with_log("post-audit")?;
4902            Ok(audit_snapshot)
4903        } else {
4904            debug!("Phase 8: Skipped (audit generation disabled)");
4905            Ok(AuditSnapshot::default())
4906        }
4907    }
4908
4909    /// Phase 9: Generate banking KYC/AML data.
4910    fn phase_banking_data(
4911        &mut self,
4912        stats: &mut EnhancedGenerationStatistics,
4913    ) -> SynthResult<BankingSnapshot> {
4914        if self.phase_config.generate_banking {
4915            info!("Phase 9: Generating Banking KYC/AML Data");
4916            let banking_snapshot = self.generate_banking_data()?;
4917            stats.banking_customer_count = banking_snapshot.customers.len();
4918            stats.banking_account_count = banking_snapshot.accounts.len();
4919            stats.banking_transaction_count = banking_snapshot.transactions.len();
4920            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921            info!(
4922                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923                stats.banking_customer_count, stats.banking_account_count,
4924                stats.banking_transaction_count, stats.banking_suspicious_count
4925            );
4926            self.check_resources_with_log("post-banking")?;
4927            Ok(banking_snapshot)
4928        } else {
4929            debug!("Phase 9: Skipped (banking generation disabled)");
4930            Ok(BankingSnapshot::default())
4931        }
4932    }
4933
4934    /// Phase 10: Export accounting network graphs for ML training.
4935    fn phase_graph_export(
4936        &mut self,
4937        entries: &[JournalEntry],
4938        coa: &Arc<ChartOfAccounts>,
4939        stats: &mut EnhancedGenerationStatistics,
4940    ) -> SynthResult<GraphExportSnapshot> {
4941        if self.phase_config.generate_graph_export && !entries.is_empty() {
4942            info!("Phase 10: Exporting Accounting Network Graphs");
4943            match self.export_graphs(entries, coa, stats) {
4944                Ok(snapshot) => {
4945                    info!(
4946                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4947                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948                    );
4949                    Ok(snapshot)
4950                }
4951                Err(e) => {
4952                    warn!("Phase 10: Graph export failed: {}", e);
4953                    Ok(GraphExportSnapshot::default())
4954                }
4955            }
4956        } else {
4957            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958            Ok(GraphExportSnapshot::default())
4959        }
4960    }
4961
4962    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4963    #[allow(clippy::too_many_arguments)]
4964    fn phase_hypergraph_export(
4965        &self,
4966        coa: &Arc<ChartOfAccounts>,
4967        entries: &[JournalEntry],
4968        document_flows: &DocumentFlowSnapshot,
4969        sourcing: &SourcingSnapshot,
4970        hr: &HrSnapshot,
4971        manufacturing: &ManufacturingSnapshot,
4972        banking: &BankingSnapshot,
4973        audit: &AuditSnapshot,
4974        financial_reporting: &FinancialReportingSnapshot,
4975        ocpm: &OcpmSnapshot,
4976        compliance: &ComplianceRegulationsSnapshot,
4977        stats: &mut EnhancedGenerationStatistics,
4978    ) -> SynthResult<()> {
4979        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981            match self.export_hypergraph(
4982                coa,
4983                entries,
4984                document_flows,
4985                sourcing,
4986                hr,
4987                manufacturing,
4988                banking,
4989                audit,
4990                financial_reporting,
4991                ocpm,
4992                compliance,
4993                stats,
4994            ) {
4995                Ok(info) => {
4996                    info!(
4997                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998                        info.node_count, info.edge_count, info.hyperedge_count
4999                    );
5000                }
5001                Err(e) => {
5002                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5003                }
5004            }
5005        } else {
5006            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007        }
5008        Ok(())
5009    }
5010
5011    /// Phase 11: LLM Enrichment.
5012    ///
5013    /// Uses an LLM provider (mock by default) to enrich vendor names with
5014    /// realistic, context-aware names. This phase is non-blocking: failures
5015    /// log a warning but do not stop the generation pipeline.
5016    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017        if !self.config.llm.enabled {
5018            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019            return;
5020        }
5021
5022        info!("Phase 11: Starting LLM Enrichment");
5023        let start = std::time::Instant::now();
5024
5025        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5027            // and the corresponding API key environment variable is present.
5028            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029                let schema_provider = &self.config.llm.provider;
5030                let api_key_env = match schema_provider.as_str() {
5031                    "openai" => Some("OPENAI_API_KEY"),
5032                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5033                    "custom" => Some("LLM_API_KEY"),
5034                    _ => None,
5035                };
5036                if let Some(key_env) = api_key_env {
5037                    if std::env::var(key_env).is_ok() {
5038                        let llm_config = datasynth_core::llm::LlmConfig {
5039                            model: self.config.llm.model.clone(),
5040                            api_key_env: key_env.to_string(),
5041                            ..datasynth_core::llm::LlmConfig::default()
5042                        };
5043                        match HttpLlmProvider::new(llm_config) {
5044                            Ok(p) => Arc::new(p),
5045                            Err(e) => {
5046                                warn!(
5047                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5048                                    e
5049                                );
5050                                Arc::new(MockLlmProvider::new(self.seed))
5051                            }
5052                        }
5053                    } else {
5054                        Arc::new(MockLlmProvider::new(self.seed))
5055                    }
5056                } else {
5057                    Arc::new(MockLlmProvider::new(self.seed))
5058                }
5059            };
5060            // v4.1.1+: multi-category enrichment. Vendors remain the
5061            // default path; customers and materials opt in via
5062            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5063            let industry = format!("{:?}", self.config.global.industry);
5064
5065            let vendor_enricher =
5066                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067            let max_vendors = self
5068                .config
5069                .llm
5070                .max_vendor_enrichments
5071                .min(self.master_data.vendors.len());
5072            let mut vendors_enriched = 0usize;
5073            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075                    Ok(name) => {
5076                        vendor.name = name;
5077                        vendors_enriched += 1;
5078                    }
5079                    Err(e) => warn!(
5080                        "LLM vendor enrichment failed for {}: {}",
5081                        vendor.vendor_id, e
5082                    ),
5083                }
5084            }
5085
5086            let mut customers_enriched = 0usize;
5087            if self.config.llm.enrich_customers {
5088                let customer_enricher =
5089                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090                        &provider,
5091                    ));
5092                let max_customers = self
5093                    .config
5094                    .llm
5095                    .max_customer_enrichments
5096                    .min(self.master_data.customers.len());
5097                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098                    match customer_enricher.enrich_customer_name(
5099                        &industry,
5100                        "general",
5101                        &customer.country,
5102                    ) {
5103                        Ok(name) => {
5104                            customer.name = name;
5105                            customers_enriched += 1;
5106                        }
5107                        Err(e) => warn!(
5108                            "LLM customer enrichment failed for {}: {}",
5109                            customer.customer_id, e
5110                        ),
5111                    }
5112                }
5113            }
5114
5115            let mut materials_enriched = 0usize;
5116            if self.config.llm.enrich_materials {
5117                let material_enricher =
5118                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119                        &provider,
5120                    ));
5121                let max_materials = self
5122                    .config
5123                    .llm
5124                    .max_material_enrichments
5125                    .min(self.master_data.materials.len());
5126                for material in self.master_data.materials.iter_mut().take(max_materials) {
5127                    let material_type = format!("{:?}", material.material_type);
5128                    match material_enricher.enrich_material_description(&material_type, &industry) {
5129                        Ok(desc) => {
5130                            material.description = desc;
5131                            materials_enriched += 1;
5132                        }
5133                        Err(e) => warn!(
5134                            "LLM material enrichment failed for {}: {}",
5135                            material.material_id, e
5136                        ),
5137                    }
5138                }
5139            }
5140
5141            (vendors_enriched, customers_enriched, materials_enriched)
5142        }));
5143
5144        match result {
5145            Ok((v, c, m)) => {
5146                stats.llm_vendors_enriched = v;
5147                stats.llm_customers_enriched = c;
5148                stats.llm_materials_enriched = m;
5149                let elapsed = start.elapsed();
5150                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151                info!(
5152                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153                    v, c, m, stats.llm_enrichment_ms
5154                );
5155            }
5156            Err(_) => {
5157                let elapsed = start.elapsed();
5158                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160            }
5161        }
5162    }
5163
5164    /// Phase 12: Diffusion Enhancement.
5165    ///
5166    /// Generates a sample set matching distribution properties from the
5167    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5168    /// - `"statistical"` (default) — moment-matching backend, always fast.
5169    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5170    ///   the `neural` Cargo feature; falls back to statistical when the
5171    ///   feature isn't compiled in, with a loud warning.
5172    ///
5173    /// This phase is non-blocking: failures log a warning but do not
5174    /// stop the pipeline.
5175    fn phase_diffusion_enhancement(
5176        &self,
5177        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178        stats: &mut EnhancedGenerationStatistics,
5179    ) {
5180        if !self.config.diffusion.enabled {
5181            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182            return;
5183        }
5184
5185        info!("Phase 12: Starting Diffusion Enhancement");
5186        let start = std::time::Instant::now();
5187
5188        let backend_choice = self.config.diffusion.backend.as_str();
5189        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191        if use_neural {
5192            #[cfg(feature = "neural")]
5193            {
5194                match self.run_neural_diffusion_phase(entries) {
5195                    Ok(sample_count) => {
5196                        stats.diffusion_samples_generated = sample_count;
5197                        let elapsed = start.elapsed();
5198                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199                        info!(
5200                            "Phase 12 complete ({}): {} samples in {}ms",
5201                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5202                        );
5203                        return;
5204                    }
5205                    Err(e) => {
5206                        warn!(
5207                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208                        );
5209                        // Fall through to statistical path below.
5210                    }
5211                }
5212            }
5213            #[cfg(not(feature = "neural"))]
5214            {
5215                warn!(
5216                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217                     not compiled in — falling back to statistical. Rebuild with \
5218                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5219                    backend_choice
5220                );
5221            }
5222        } else if !matches!(backend_choice, "statistical" | "") {
5223            warn!(
5224                "Phase 12: unknown backend '{}', falling back to statistical",
5225                backend_choice
5226            );
5227        }
5228
5229        // Statistical path (default + fallback).
5230        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231            let means = vec![5000.0, 3.0, 2.0];
5232            let stds = vec![2000.0, 1.5, 1.0];
5233
5234            let diffusion_config = DiffusionConfig {
5235                n_steps: self.config.diffusion.n_steps,
5236                seed: self.seed,
5237                ..Default::default()
5238            };
5239
5240            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241            let n_samples = self.config.diffusion.sample_size;
5242            let n_features = 3;
5243            backend.generate(n_samples, n_features, self.seed).len()
5244        }));
5245
5246        match result {
5247            Ok(sample_count) => {
5248                stats.diffusion_samples_generated = sample_count;
5249                let elapsed = start.elapsed();
5250                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251                info!(
5252                    "Phase 12 complete (statistical): {} samples in {}ms",
5253                    sample_count, stats.diffusion_enhancement_ms
5254                );
5255            }
5256            Err(_) => {
5257                let elapsed = start.elapsed();
5258                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260            }
5261        }
5262    }
5263
5264    /// Neural-backend execution — either load a pre-trained checkpoint
5265    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5266    /// from the first batch of JE amounts. Returns the sample count
5267    /// produced; any error bubbles up to the statistical fallback.
5268    #[cfg(feature = "neural")]
5269    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272        if entries.is_empty() {
5273            return Err(SynthError::generation(
5274                "neural diffusion: no journal entries available as training data",
5275            ));
5276        }
5277
5278        let training_data: Vec<Vec<f64>> = entries
5279            .iter()
5280            .take(5000)
5281            .map(|je| {
5282                let total_amount: f64 = je
5283                    .lines
5284                    .iter()
5285                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286                    .map(|l| {
5287                        use rust_decimal::prelude::ToPrimitive;
5288                        l.debit_amount.to_f64().unwrap_or(0.0)
5289                    })
5290                    .sum();
5291                let line_count = je.lines.len() as f64;
5292                // Use the approval-workflow depth as the third feature
5293                // (proxy for complexity / risk). `None` → 1.
5294                let approval_level = je
5295                    .header
5296                    .approval_workflow
5297                    .as_ref()
5298                    .map(|w| w.required_levels as f64)
5299                    .unwrap_or(1.0);
5300                vec![total_amount, line_count, approval_level]
5301            })
5302            .collect();
5303
5304        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306        let cfg = &self.config.diffusion;
5307        let neural_cfg = &cfg.neural;
5308
5309        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310            neural_cfg.checkpoint_path.as_ref()
5311        {
5312            let path = std::path::Path::new(ckpt_path);
5313            info!(
5314                "  Neural diffusion: loading checkpoint from {}",
5315                path.display()
5316            );
5317            NeuralDiffusionBackend::load(path)
5318                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319        } else {
5320            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321            info!(
5322                "  Neural diffusion: training score network on {} rows × {} features, \
5323                     {} epochs, hidden_dims={:?}",
5324                training_data.len(),
5325                n_features,
5326                neural_cfg.training_epochs,
5327                neural_cfg.hidden_dims
5328            );
5329            let training_config = NeuralTrainingConfig {
5330                n_steps: cfg.n_steps,
5331                schedule: cfg.schedule.clone(),
5332                hidden_dims: neural_cfg.hidden_dims.clone(),
5333                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334                learning_rate: neural_cfg.learning_rate,
5335                epochs: neural_cfg.training_epochs,
5336                batch_size: neural_cfg.batch_size,
5337            };
5338            let (backend, report) =
5339                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341            info!(
5342                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343                report.epochs_completed, report.final_loss
5344            );
5345            backend
5346        };
5347
5348        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349        Ok(samples.len())
5350    }
5351
5352    /// Phase 13: Causal Overlay.
5353    ///
5354    /// Builds a structural causal model from a built-in template (e.g.,
5355    /// fraud_detection) and generates causal samples. Optionally validates
5356    /// that the output respects the causal structure. This phase is
5357    /// non-blocking: failures log a warning but do not stop the pipeline.
5358    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359        if !self.config.causal.enabled {
5360            debug!("Phase 13: Skipped (causal generation disabled)");
5361            return;
5362        }
5363
5364        info!("Phase 13: Starting Causal Overlay");
5365        let start = std::time::Instant::now();
5366
5367        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368            // Select template based on config
5369            let graph = match self.config.causal.template.as_str() {
5370                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371                _ => CausalGraph::fraud_detection_template(),
5372            };
5373
5374            let scm = StructuralCausalModel::new(graph.clone())
5375                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377            let n_samples = self.config.causal.sample_size;
5378            let samples = scm
5379                .generate(n_samples, self.seed)
5380                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382            // Optionally validate causal structure
5383            let validation_passed = if self.config.causal.validate {
5384                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385                if report.valid {
5386                    info!(
5387                        "Causal validation passed: all {} checks OK",
5388                        report.checks.len()
5389                    );
5390                } else {
5391                    warn!(
5392                        "Causal validation: {} violations detected: {:?}",
5393                        report.violations.len(),
5394                        report.violations
5395                    );
5396                }
5397                Some(report.valid)
5398            } else {
5399                None
5400            };
5401
5402            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403        }));
5404
5405        match result {
5406            Ok(Ok((sample_count, validation_passed))) => {
5407                stats.causal_samples_generated = sample_count;
5408                stats.causal_validation_passed = validation_passed;
5409                let elapsed = start.elapsed();
5410                stats.causal_generation_ms = elapsed.as_millis() as u64;
5411                info!(
5412                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413                    sample_count, stats.causal_generation_ms, validation_passed,
5414                );
5415            }
5416            Ok(Err(e)) => {
5417                let elapsed = start.elapsed();
5418                stats.causal_generation_ms = elapsed.as_millis() as u64;
5419                warn!("Phase 13: Causal generation failed: {}", e);
5420            }
5421            Err(_) => {
5422                let elapsed = start.elapsed();
5423                stats.causal_generation_ms = elapsed.as_millis() as u64;
5424                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425            }
5426        }
5427    }
5428
5429    /// Phase 14: Generate S2C sourcing data.
5430    fn phase_sourcing_data(
5431        &mut self,
5432        stats: &mut EnhancedGenerationStatistics,
5433    ) -> SynthResult<SourcingSnapshot> {
5434        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435            debug!("Phase 14: Skipped (sourcing generation disabled)");
5436            return Ok(SourcingSnapshot::default());
5437        }
5438        let degradation = self.check_resources()?;
5439        if degradation >= DegradationLevel::Reduced {
5440            debug!(
5441                "Phase skipped due to resource pressure (degradation: {:?})",
5442                degradation
5443            );
5444            return Ok(SourcingSnapshot::default());
5445        }
5446
5447        info!("Phase 14: Generating S2C Sourcing Data");
5448        let seed = self.seed;
5449
5450        // Gather vendor data from master data
5451        let vendor_ids: Vec<String> = self
5452            .master_data
5453            .vendors
5454            .iter()
5455            .map(|v| v.vendor_id.clone())
5456            .collect();
5457        if vendor_ids.is_empty() {
5458            debug!("Phase 14: Skipped (no vendors available)");
5459            return Ok(SourcingSnapshot::default());
5460        }
5461
5462        let categories: Vec<(String, String)> = vec![
5463            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467            ("CAT-LOG".to_string(), "Logistics".to_string()),
5468        ];
5469        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470            .iter()
5471            .map(|(id, name)| {
5472                (
5473                    id.clone(),
5474                    name.clone(),
5475                    rust_decimal::Decimal::from(100_000),
5476                )
5477            })
5478            .collect();
5479
5480        let company_code = self
5481            .config
5482            .companies
5483            .first()
5484            .map(|c| c.code.as_str())
5485            .unwrap_or("1000");
5486        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489        let fiscal_year = start_date.year() as u16;
5490        let owner_ids: Vec<String> = self
5491            .master_data
5492            .employees
5493            .iter()
5494            .take(5)
5495            .map(|e| e.employee_id.clone())
5496            .collect();
5497        let owner_id = owner_ids
5498            .first()
5499            .map(std::string::String::as_str)
5500            .unwrap_or("BUYER-001");
5501
5502        // Step 1: Spend Analysis
5503        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504        let spend_analyses =
5505            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507        // Step 2: Sourcing Projects
5508        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509        let sourcing_projects = if owner_ids.is_empty() {
5510            Vec::new()
5511        } else {
5512            project_gen.generate(
5513                company_code,
5514                &categories_with_spend,
5515                &owner_ids,
5516                start_date,
5517                self.config.global.period_months,
5518            )
5519        };
5520        stats.sourcing_project_count = sourcing_projects.len();
5521
5522        // Step 3: Qualifications
5523        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524        let mut qual_gen = QualificationGenerator::new(seed + 2);
5525        let qualifications = qual_gen.generate(
5526            company_code,
5527            &qual_vendor_ids,
5528            sourcing_projects.first().map(|p| p.project_id.as_str()),
5529            owner_id,
5530            start_date,
5531        );
5532
5533        // Step 4: RFx Events
5534        let mut rfx_gen = RfxGenerator::new(seed + 3);
5535        let rfx_events: Vec<RfxEvent> = sourcing_projects
5536            .iter()
5537            .map(|proj| {
5538                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539                rfx_gen.generate(
5540                    company_code,
5541                    &proj.project_id,
5542                    &proj.category_id,
5543                    &qualified_vids,
5544                    owner_id,
5545                    start_date,
5546                    50000.0,
5547                )
5548            })
5549            .collect();
5550        stats.rfx_event_count = rfx_events.len();
5551
5552        // Step 5: Bids
5553        let mut bid_gen = BidGenerator::new(seed + 4);
5554        let mut all_bids = Vec::new();
5555        for rfx in &rfx_events {
5556            let bidder_count = vendor_ids.len().clamp(2, 5);
5557            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558            let bids = bid_gen.generate(rfx, &responding, start_date);
5559            all_bids.extend(bids);
5560        }
5561        stats.bid_count = all_bids.len();
5562
5563        // Step 6: Bid Evaluations
5564        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566            .iter()
5567            .map(|rfx| {
5568                let rfx_bids: Vec<SupplierBid> = all_bids
5569                    .iter()
5570                    .filter(|b| b.rfx_id == rfx.rfx_id)
5571                    .cloned()
5572                    .collect();
5573                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574            })
5575            .collect();
5576
5577        // Step 7: Contracts from winning bids
5578        let mut contract_gen = ContractGenerator::new(seed + 6);
5579        let contracts: Vec<ProcurementContract> = bid_evaluations
5580            .iter()
5581            .zip(rfx_events.iter())
5582            .filter_map(|(eval, rfx)| {
5583                eval.ranked_bids.first().and_then(|winner| {
5584                    all_bids
5585                        .iter()
5586                        .find(|b| b.bid_id == winner.bid_id)
5587                        .map(|winning_bid| {
5588                            contract_gen.generate_from_bid(
5589                                winning_bid,
5590                                Some(&rfx.sourcing_project_id),
5591                                &rfx.category_id,
5592                                owner_id,
5593                                start_date,
5594                            )
5595                        })
5596                })
5597            })
5598            .collect();
5599        stats.contract_count = contracts.len();
5600
5601        // Step 8: Catalog Items
5602        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603        let catalog_items = catalog_gen.generate(&contracts);
5604        stats.catalog_item_count = catalog_items.len();
5605
5606        // Step 9: Scorecards
5607        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609            .iter()
5610            .fold(
5611                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612                |mut acc, c| {
5613                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5614                    acc
5615                },
5616            )
5617            .into_iter()
5618            .collect();
5619        let scorecards = scorecard_gen.generate(
5620            company_code,
5621            &vendor_contracts,
5622            start_date,
5623            end_date,
5624            owner_id,
5625        );
5626        stats.scorecard_count = scorecards.len();
5627
5628        // Back-populate cross-references on sourcing projects (Task 35)
5629        // Link each project to its RFx events, contracts, and spend analyses
5630        let mut sourcing_projects = sourcing_projects;
5631        for project in &mut sourcing_projects {
5632            // Link RFx events generated for this project
5633            project.rfx_ids = rfx_events
5634                .iter()
5635                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636                .map(|rfx| rfx.rfx_id.clone())
5637                .collect();
5638
5639            // Link contract awarded from this project's RFx
5640            project.contract_id = contracts
5641                .iter()
5642                .find(|c| {
5643                    c.sourcing_project_id
5644                        .as_deref()
5645                        .is_some_and(|sp| sp == project.project_id)
5646                })
5647                .map(|c| c.contract_id.clone());
5648
5649            // Link spend analysis for matching category (use category_id as the reference)
5650            project.spend_analysis_id = spend_analyses
5651                .iter()
5652                .find(|sa| sa.category_id == project.category_id)
5653                .map(|sa| sa.category_id.clone());
5654        }
5655
5656        info!(
5657            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660        );
5661        self.check_resources_with_log("post-sourcing")?;
5662
5663        Ok(SourcingSnapshot {
5664            spend_analyses,
5665            sourcing_projects,
5666            qualifications,
5667            rfx_events,
5668            bids: all_bids,
5669            bid_evaluations,
5670            contracts,
5671            catalog_items,
5672            scorecards,
5673        })
5674    }
5675
5676    /// Build a [`GroupStructure`] from the current company configuration.
5677    ///
5678    /// The first company in the configuration is treated as the ultimate parent.
5679    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5680    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5681    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684        let parent_code = self
5685            .config
5686            .companies
5687            .first()
5688            .map(|c| c.code.clone())
5689            .unwrap_or_else(|| "PARENT".to_string());
5690
5691        let mut group = GroupStructure::new(parent_code);
5692
5693        for company in self.config.companies.iter().skip(1) {
5694            let sub =
5695                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696            group.add_subsidiary(sub);
5697        }
5698
5699        group
5700    }
5701
5702    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5703    fn phase_intercompany(
5704        &mut self,
5705        journal_entries: &[JournalEntry],
5706        stats: &mut EnhancedGenerationStatistics,
5707    ) -> SynthResult<IntercompanySnapshot> {
5708        // Skip if intercompany is disabled in config
5709        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711            return Ok(IntercompanySnapshot::default());
5712        }
5713
5714        // Intercompany requires at least 2 companies
5715        if self.config.companies.len() < 2 {
5716            debug!(
5717                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718                self.config.companies.len()
5719            );
5720            return Ok(IntercompanySnapshot::default());
5721        }
5722
5723        info!("Phase 14b: Generating Intercompany Transactions");
5724
5725        // Build the group structure early — used by ISA 600 component auditor scope
5726        // and consolidated financial statement generators downstream.
5727        let group_structure = self.build_group_structure();
5728        debug!(
5729            "Group structure built: parent={}, subsidiaries={}",
5730            group_structure.parent_entity,
5731            group_structure.subsidiaries.len()
5732        );
5733
5734        let seed = self.seed;
5735        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739        // Build ownership structure from company configs
5740        // First company is treated as the parent, remaining are subsidiaries
5741        let parent_code = self.config.companies[0].code.clone();
5742        let mut ownership_structure =
5743            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747                format!("REL{:03}", i + 1),
5748                parent_code.clone(),
5749                company.code.clone(),
5750                rust_decimal::Decimal::from(100), // Default 100% ownership
5751                start_date,
5752            );
5753            ownership_structure.add_relationship(relationship);
5754        }
5755
5756        // Convert config transfer pricing method to core model enum
5757        let tp_method = match self.config.intercompany.transfer_pricing_method {
5758            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760            }
5761            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763            }
5764            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766            }
5767            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769            }
5770            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772            }
5773        };
5774
5775        // Build IC generator config from schema config
5776        let ic_currency = self
5777            .config
5778            .companies
5779            .first()
5780            .map(|c| c.currency.clone())
5781            .unwrap_or_else(|| "USD".to_string());
5782        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784            transfer_pricing_method: tp_method,
5785            markup_percent: rust_decimal::Decimal::from_f64_retain(
5786                self.config.intercompany.markup_percent,
5787            )
5788            .unwrap_or(rust_decimal::Decimal::from(5)),
5789            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790            default_currency: ic_currency,
5791            ..Default::default()
5792        };
5793
5794        // Create IC generator
5795        let mut ic_generator = datasynth_generators::ICGenerator::new(
5796            ic_gen_config,
5797            ownership_structure.clone(),
5798            seed + 50,
5799        );
5800
5801        // Generate IC transactions for the period
5802        // Use ~3 transactions per day as a reasonable default
5803        let transactions_per_day = 3;
5804        let matched_pairs = ic_generator.generate_transactions_for_period(
5805            start_date,
5806            end_date,
5807            transactions_per_day,
5808        );
5809
5810        // Generate IC source P2P/O2C documents
5811        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812        debug!(
5813            "Generated {} IC seller invoices, {} IC buyer POs",
5814            ic_doc_chains.seller_invoices.len(),
5815            ic_doc_chains.buyer_orders.len()
5816        );
5817
5818        // Generate journal entries from matched pairs
5819        let mut seller_entries = Vec::new();
5820        let mut buyer_entries = Vec::new();
5821        let fiscal_year = start_date.year();
5822
5823        for pair in &matched_pairs {
5824            let fiscal_period = pair.posting_date.month();
5825            let (seller_je, buyer_je) =
5826                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827            seller_entries.push(seller_je);
5828            buyer_entries.push(buyer_je);
5829        }
5830
5831        // Run matching engine
5832        let matching_config = datasynth_generators::ICMatchingConfig {
5833            base_currency: self
5834                .config
5835                .companies
5836                .first()
5837                .map(|c| c.currency.clone())
5838                .unwrap_or_else(|| "USD".to_string()),
5839            ..Default::default()
5840        };
5841        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842        matching_engine.load_matched_pairs(&matched_pairs);
5843        let matching_result = matching_engine.run_matching(end_date);
5844
5845        // Generate elimination entries if configured
5846        let mut elimination_entries = Vec::new();
5847        if self.config.intercompany.generate_eliminations {
5848            let elim_config = datasynth_generators::EliminationConfig {
5849                consolidation_entity: "GROUP".to_string(),
5850                base_currency: self
5851                    .config
5852                    .companies
5853                    .first()
5854                    .map(|c| c.currency.clone())
5855                    .unwrap_or_else(|| "USD".to_string()),
5856                ..Default::default()
5857            };
5858
5859            let mut elim_generator =
5860                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864                matching_result
5865                    .matched_balances
5866                    .iter()
5867                    .chain(matching_result.unmatched_balances.iter())
5868                    .cloned()
5869                    .collect();
5870
5871            // Build investment and equity maps from the group structure so that the
5872            // elimination generator can produce equity-investment elimination entries
5873            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5874            //
5875            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5876            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5877            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5878            //
5879            // Net assets are derived from the journal entries using account-range heuristics:
5880            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5881            // no JE data is available (IC phase runs early in the generation pipeline).
5882            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883                std::collections::HashMap::new();
5884            let mut equity_amounts: std::collections::HashMap<
5885                String,
5886                std::collections::HashMap<String, rust_decimal::Decimal>,
5887            > = std::collections::HashMap::new();
5888            {
5889                use rust_decimal::Decimal;
5890                let hundred = Decimal::from(100u32);
5891                let ten_pct = Decimal::new(10, 2); // 0.10
5892                let thirty_pct = Decimal::new(30, 2); // 0.30
5893                let sixty_pct = Decimal::new(60, 2); // 0.60
5894                let parent_code = &group_structure.parent_entity;
5895                for sub in &group_structure.subsidiaries {
5896                    let net_assets = {
5897                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898                        if na > Decimal::ZERO {
5899                            na
5900                        } else {
5901                            Decimal::from(1_000_000u64)
5902                        }
5903                    };
5904                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5905                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908                    // Split subsidiary equity into conventional components:
5909                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5910                    let mut eq_map = std::collections::HashMap::new();
5911                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915                }
5916            }
5917
5918            let journal = elim_generator.generate_eliminations(
5919                &fiscal_period,
5920                end_date,
5921                &all_balances,
5922                &matched_pairs,
5923                &investment_amounts,
5924                &equity_amounts,
5925            );
5926
5927            elimination_entries = journal.entries.clone();
5928        }
5929
5930        let matched_pair_count = matched_pairs.len();
5931        let elimination_entry_count = elimination_entries.len();
5932        let match_rate = matching_result.match_rate;
5933
5934        stats.ic_matched_pair_count = matched_pair_count;
5935        stats.ic_elimination_count = elimination_entry_count;
5936        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938        info!(
5939            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940            matched_pair_count,
5941            stats.ic_transaction_count,
5942            seller_entries.len(),
5943            buyer_entries.len(),
5944            elimination_entry_count,
5945            match_rate * 100.0
5946        );
5947        self.check_resources_with_log("post-intercompany")?;
5948
5949        // ----------------------------------------------------------------
5950        // NCI measurements: derive from group structure ownership percentages
5951        // ----------------------------------------------------------------
5952        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954            use rust_decimal::Decimal;
5955
5956            let eight_pct = Decimal::new(8, 2); // 0.08
5957
5958            group_structure
5959                .subsidiaries
5960                .iter()
5961                .filter(|sub| {
5962                    sub.nci_percentage > Decimal::ZERO
5963                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964                })
5965                .map(|sub| {
5966                    // Compute net assets from actual journal entries for this subsidiary.
5967                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5968                    // IC phase runs before the main JE batch has been populated).
5969                    let net_assets_from_jes =
5970                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973                        net_assets_from_jes.round_dp(2)
5974                    } else {
5975                        // Fallback: use a plausible base amount
5976                        Decimal::from(1_000_000u64)
5977                    };
5978
5979                    // Net income approximated as 8% of net assets
5980                    let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982                    NciMeasurement::compute(
5983                        sub.entity_code.clone(),
5984                        sub.nci_percentage,
5985                        net_assets,
5986                        net_income,
5987                    )
5988                })
5989                .collect()
5990        };
5991
5992        if !nci_measurements.is_empty() {
5993            info!(
5994                "NCI measurements: {} subsidiaries with non-controlling interests",
5995                nci_measurements.len()
5996            );
5997        }
5998
5999        Ok(IntercompanySnapshot {
6000            group_structure: Some(group_structure),
6001            matched_pairs,
6002            seller_journal_entries: seller_entries,
6003            buyer_journal_entries: buyer_entries,
6004            elimination_entries,
6005            nci_measurements,
6006            ic_document_chains: Some(ic_doc_chains),
6007            matched_pair_count,
6008            elimination_entry_count,
6009            match_rate,
6010        })
6011    }
6012
6013    /// Phase 15: Generate bank reconciliations and financial statements.
6014    fn phase_financial_reporting(
6015        &mut self,
6016        document_flows: &DocumentFlowSnapshot,
6017        journal_entries: &[JournalEntry],
6018        coa: &Arc<ChartOfAccounts>,
6019        _hr: &HrSnapshot,
6020        _audit: &AuditSnapshot,
6021        stats: &mut EnhancedGenerationStatistics,
6022    ) -> SynthResult<FinancialReportingSnapshot> {
6023        let fs_enabled = self.phase_config.generate_financial_statements
6024            || self.config.financial_reporting.enabled;
6025        let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027        if !fs_enabled && !br_enabled {
6028            debug!("Phase 15: Skipped (financial reporting disabled)");
6029            return Ok(FinancialReportingSnapshot::default());
6030        }
6031
6032        info!("Phase 15: Generating Financial Reporting Data");
6033
6034        let seed = self.seed;
6035        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038        let mut financial_statements = Vec::new();
6039        let mut bank_reconciliations = Vec::new();
6040        let mut trial_balances = Vec::new();
6041        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043            Vec::new();
6044        // Standalone statements keyed by entity code
6045        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046            std::collections::HashMap::new();
6047        // Consolidated statements (one per period)
6048        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049        // Consolidation schedules (one per period)
6050        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052        // Generate financial statements from JE-derived trial balances.
6053        //
6054        // When journal entries are available, we use cumulative trial balances for
6055        // balance sheet accounts and current-period trial balances for income
6056        // statement accounts. We also track prior-period trial balances so the
6057        // generator can produce comparative amounts, and we build a proper
6058        // cash flow statement from working capital changes rather than random data.
6059        if fs_enabled {
6060            let has_journal_entries = !journal_entries.is_empty();
6061
6062            // Use FinancialStatementGenerator for balance sheet and income statement,
6063            // but build cash flow ourselves from TB data when JEs are available.
6064            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065            // Separate generator for consolidated statements (different seed offset)
6066            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068            // Collect elimination JEs once (reused across periods)
6069            let elimination_entries: Vec<&JournalEntry> = journal_entries
6070                .iter()
6071                .filter(|je| je.header.is_elimination)
6072                .collect();
6073
6074            // Generate one set of statements per period, per entity
6075            for period in 0..self.config.global.period_months {
6076                let period_start = start_date + chrono::Months::new(period);
6077                let period_end =
6078                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079                let fiscal_year = period_end.year() as u16;
6080                let fiscal_period = period_end.month() as u8;
6081                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083                // Build per-entity trial balances for this period (non-elimination JEs)
6084                // We accumulate them for the consolidation step.
6085                let mut entity_tb_map: std::collections::HashMap<
6086                    String,
6087                    std::collections::HashMap<String, rust_decimal::Decimal>,
6088                > = std::collections::HashMap::new();
6089
6090                // --- Standalone: one set of statements per company ---
6091                for (company_idx, company) in self.config.companies.iter().enumerate() {
6092                    let company_code = company.code.as_str();
6093                    let currency = company.currency.as_str();
6094                    // Use a unique seed offset per company to keep statements deterministic
6095                    // and distinct across companies
6096                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097                    let mut company_fs_gen =
6098                        FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100                    if has_journal_entries {
6101                        let tb_entries = Self::build_cumulative_trial_balance(
6102                            journal_entries,
6103                            coa,
6104                            company_code,
6105                            start_date,
6106                            period_end,
6107                            fiscal_year,
6108                            fiscal_period,
6109                        );
6110
6111                        // Accumulate per-entity category balances for consolidation
6112                        let entity_cat_map =
6113                            entity_tb_map.entry(company_code.to_string()).or_default();
6114                        for tb_entry in &tb_entries {
6115                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117                        }
6118
6119                        let stmts = company_fs_gen.generate(
6120                            company_code,
6121                            currency,
6122                            &tb_entries,
6123                            period_start,
6124                            period_end,
6125                            fiscal_year,
6126                            fiscal_period,
6127                            None,
6128                            "SYS-AUTOCLOSE",
6129                        );
6130
6131                        let mut entity_stmts = Vec::new();
6132                        for stmt in stmts {
6133                            if stmt.statement_type == StatementType::CashFlowStatement {
6134                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135                                let cf_items = Self::build_cash_flow_from_trial_balances(
6136                                    &tb_entries,
6137                                    None,
6138                                    net_income,
6139                                );
6140                                entity_stmts.push(FinancialStatement {
6141                                    cash_flow_items: cf_items,
6142                                    ..stmt
6143                                });
6144                            } else {
6145                                entity_stmts.push(stmt);
6146                            }
6147                        }
6148
6149                        // Add to the flat financial_statements list (used by KPI/budget)
6150                        financial_statements.extend(entity_stmts.clone());
6151
6152                        // Store standalone per-entity
6153                        standalone_statements
6154                            .entry(company_code.to_string())
6155                            .or_default()
6156                            .extend(entity_stmts);
6157
6158                        // Only store trial balance for the first company in the period
6159                        // to avoid duplicates in the trial_balances list
6160                        if company_idx == 0 {
6161                            trial_balances.push(PeriodTrialBalance {
6162                                fiscal_year,
6163                                fiscal_period,
6164                                period_start,
6165                                period_end,
6166                                entries: tb_entries,
6167                            });
6168                        }
6169                    } else {
6170                        // Fallback: no JEs available
6171                        let tb_entries = Self::build_trial_balance_from_entries(
6172                            journal_entries,
6173                            coa,
6174                            company_code,
6175                            fiscal_year,
6176                            fiscal_period,
6177                        );
6178
6179                        let stmts = company_fs_gen.generate(
6180                            company_code,
6181                            currency,
6182                            &tb_entries,
6183                            period_start,
6184                            period_end,
6185                            fiscal_year,
6186                            fiscal_period,
6187                            None,
6188                            "SYS-AUTOCLOSE",
6189                        );
6190                        financial_statements.extend(stmts.clone());
6191                        standalone_statements
6192                            .entry(company_code.to_string())
6193                            .or_default()
6194                            .extend(stmts);
6195
6196                        if company_idx == 0 && !tb_entries.is_empty() {
6197                            trial_balances.push(PeriodTrialBalance {
6198                                fiscal_year,
6199                                fiscal_period,
6200                                period_start,
6201                                period_end,
6202                                entries: tb_entries,
6203                            });
6204                        }
6205                    }
6206                }
6207
6208                // --- Consolidated: aggregate all entities + apply eliminations ---
6209                // Use the primary (first) company's currency for the consolidated statement
6210                let group_currency = self
6211                    .config
6212                    .companies
6213                    .first()
6214                    .map(|c| c.currency.as_str())
6215                    .unwrap_or("USD");
6216
6217                // Build owned elimination entries for this period
6218                let period_eliminations: Vec<JournalEntry> = elimination_entries
6219                    .iter()
6220                    .filter(|je| {
6221                        je.header.fiscal_year == fiscal_year
6222                            && je.header.fiscal_period == fiscal_period
6223                    })
6224                    .map(|je| (*je).clone())
6225                    .collect();
6226
6227                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228                    &entity_tb_map,
6229                    &period_eliminations,
6230                    &period_label,
6231                );
6232
6233                // Build a pseudo trial balance from consolidated line items for the
6234                // FinancialStatementGenerator to use (only for cash flow direction).
6235                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236                    .line_items
6237                    .iter()
6238                    .map(|li| {
6239                        let net = li.post_elimination_total;
6240                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241                            (net, rust_decimal::Decimal::ZERO)
6242                        } else {
6243                            (rust_decimal::Decimal::ZERO, -net)
6244                        };
6245                        datasynth_generators::TrialBalanceEntry {
6246                            account_code: li.account_category.clone(),
6247                            account_name: li.account_category.clone(),
6248                            category: li.account_category.clone(),
6249                            debit_balance: debit,
6250                            credit_balance: credit,
6251                        }
6252                    })
6253                    .collect();
6254
6255                let mut cons_stmts = cons_gen.generate(
6256                    "GROUP",
6257                    group_currency,
6258                    &cons_tb,
6259                    period_start,
6260                    period_end,
6261                    fiscal_year,
6262                    fiscal_period,
6263                    None,
6264                    "SYS-AUTOCLOSE",
6265                );
6266
6267                // Split consolidated line items by statement type.
6268                // The consolidation generator returns BS items first, then IS items,
6269                // identified by their CONS- prefix and category.
6270                let bs_categories: &[&str] = &[
6271                    "CASH",
6272                    "RECEIVABLES",
6273                    "INVENTORY",
6274                    "FIXEDASSETS",
6275                    "PAYABLES",
6276                    "ACCRUEDLIABILITIES",
6277                    "LONGTERMDEBT",
6278                    "EQUITY",
6279                ];
6280                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281                    cons_line_items.into_iter().partition(|li| {
6282                        let upper = li.label.to_uppercase();
6283                        bs_categories.iter().any(|c| upper == *c)
6284                    });
6285
6286                for stmt in &mut cons_stmts {
6287                    stmt.is_consolidated = true;
6288                    match stmt.statement_type {
6289                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291                        _ => {} // CF and equity change statements keep generator output
6292                    }
6293                }
6294
6295                consolidated_statements.extend(cons_stmts);
6296                consolidation_schedules.push(schedule);
6297            }
6298
6299            // Backward compat: if only 1 company, use existing code path logic
6300            // (prior_cumulative_tb for comparative amounts). Already handled above;
6301            // the prior_ref is omitted to keep this change minimal.
6302            let _ = &mut fs_gen; // suppress unused warning
6303
6304            stats.financial_statement_count = financial_statements.len();
6305            info!(
6306                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307                stats.financial_statement_count,
6308                consolidated_statements.len(),
6309                has_journal_entries
6310            );
6311
6312            // ----------------------------------------------------------------
6313            // IFRS 8 / ASC 280: Operating Segment Reporting
6314            // ----------------------------------------------------------------
6315            // Build entity seeds from the company configuration.
6316            let entity_seeds: Vec<SegmentSeed> = self
6317                .config
6318                .companies
6319                .iter()
6320                .map(|c| SegmentSeed {
6321                    code: c.code.clone(),
6322                    name: c.name.clone(),
6323                    currency: c.currency.clone(),
6324                })
6325                .collect();
6326
6327            let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329            // Generate one set of segment reports per period.
6330            // We extract consolidated revenue / profit / assets from the consolidated
6331            // financial statements produced above, falling back to simple sums when
6332            // no consolidated statements were generated (single-entity path).
6333            for period in 0..self.config.global.period_months {
6334                let period_end =
6335                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336                let fiscal_year = period_end.year() as u16;
6337                let fiscal_period = period_end.month() as u8;
6338                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340                use datasynth_core::models::StatementType;
6341
6342                // Try to find consolidated income statement for this period
6343                let cons_is = consolidated_statements.iter().find(|s| {
6344                    s.fiscal_year == fiscal_year
6345                        && s.fiscal_period == fiscal_period
6346                        && s.statement_type == StatementType::IncomeStatement
6347                });
6348                let cons_bs = consolidated_statements.iter().find(|s| {
6349                    s.fiscal_year == fiscal_year
6350                        && s.fiscal_period == fiscal_period
6351                        && s.statement_type == StatementType::BalanceSheet
6352                });
6353
6354                // If consolidated statements not available fall back to the flat list
6355                let is_stmt = cons_is.or_else(|| {
6356                    financial_statements.iter().find(|s| {
6357                        s.fiscal_year == fiscal_year
6358                            && s.fiscal_period == fiscal_period
6359                            && s.statement_type == StatementType::IncomeStatement
6360                    })
6361                });
6362                let bs_stmt = cons_bs.or_else(|| {
6363                    financial_statements.iter().find(|s| {
6364                        s.fiscal_year == fiscal_year
6365                            && s.fiscal_period == fiscal_period
6366                            && s.statement_type == StatementType::BalanceSheet
6367                    })
6368                });
6369
6370                let consolidated_revenue = is_stmt
6371                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372                    .map(|li| -li.amount) // revenue is stored as negative in IS
6373                    .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375                let consolidated_profit = is_stmt
6376                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377                    .map(|li| li.amount)
6378                    .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380                let consolidated_assets = bs_stmt
6381                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382                    .map(|li| li.amount)
6383                    .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385                // Skip periods where we have no financial data
6386                if consolidated_revenue == rust_decimal::Decimal::ZERO
6387                    && consolidated_assets == rust_decimal::Decimal::ZERO
6388                {
6389                    continue;
6390                }
6391
6392                let group_code = self
6393                    .config
6394                    .companies
6395                    .first()
6396                    .map(|c| c.code.as_str())
6397                    .unwrap_or("GROUP");
6398
6399                // Compute period depreciation from JEs with document type "CL" hitting account
6400                // 6000 (depreciation expense).  These are generated by phase_period_close.
6401                let total_depr: rust_decimal::Decimal = journal_entries
6402                    .iter()
6403                    .filter(|je| je.header.document_type == "CL")
6404                    .flat_map(|je| je.lines.iter())
6405                    .filter(|l| l.gl_account.starts_with("6000"))
6406                    .map(|l| l.debit_amount)
6407                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409                    Some(total_depr)
6410                } else {
6411                    None
6412                };
6413
6414                let (segs, recon) = seg_gen.generate(
6415                    group_code,
6416                    &period_label,
6417                    consolidated_revenue,
6418                    consolidated_profit,
6419                    consolidated_assets,
6420                    &entity_seeds,
6421                    depr_param,
6422                );
6423                segment_reports.extend(segs);
6424                segment_reconciliations.push(recon);
6425            }
6426
6427            info!(
6428                "Segment reports generated: {} segments, {} reconciliations",
6429                segment_reports.len(),
6430                segment_reconciliations.len()
6431            );
6432        }
6433
6434        // Generate bank reconciliations from payment data
6435        if br_enabled && !document_flows.payments.is_empty() {
6436            let employee_ids: Vec<String> = self
6437                .master_data
6438                .employees
6439                .iter()
6440                .map(|e| e.employee_id.clone())
6441                .collect();
6442            let mut br_gen =
6443                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445            // Group payments by company code and period
6446            for company in &self.config.companies {
6447                let company_payments: Vec<PaymentReference> = document_flows
6448                    .payments
6449                    .iter()
6450                    .filter(|p| p.header.company_code == company.code)
6451                    .map(|p| PaymentReference {
6452                        id: p.header.document_id.clone(),
6453                        amount: if p.is_vendor { p.amount } else { -p.amount },
6454                        date: p.header.document_date,
6455                        reference: p
6456                            .check_number
6457                            .clone()
6458                            .or_else(|| p.wire_reference.clone())
6459                            .unwrap_or_else(|| p.header.document_id.clone()),
6460                    })
6461                    .collect();
6462
6463                if company_payments.is_empty() {
6464                    continue;
6465                }
6466
6467                let bank_account_id = format!("{}-MAIN", company.code);
6468
6469                // Generate one reconciliation per period
6470                for period in 0..self.config.global.period_months {
6471                    let period_start = start_date + chrono::Months::new(period);
6472                    let period_end =
6473                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475                    let period_payments: Vec<PaymentReference> = company_payments
6476                        .iter()
6477                        .filter(|p| p.date >= period_start && p.date <= period_end)
6478                        .cloned()
6479                        .collect();
6480
6481                    let recon = br_gen.generate(
6482                        &company.code,
6483                        &bank_account_id,
6484                        period_start,
6485                        period_end,
6486                        &company.currency,
6487                        &period_payments,
6488                    );
6489                    bank_reconciliations.push(recon);
6490                }
6491            }
6492            info!(
6493                "Bank reconciliations generated: {} reconciliations",
6494                bank_reconciliations.len()
6495            );
6496        }
6497
6498        stats.bank_reconciliation_count = bank_reconciliations.len();
6499        self.check_resources_with_log("post-financial-reporting")?;
6500
6501        if !trial_balances.is_empty() {
6502            info!(
6503                "Period-close trial balances captured: {} periods",
6504                trial_balances.len()
6505            );
6506        }
6507
6508        // Notes to financial statements are generated in a separate post-processing step
6509        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6510        // phases have completed, so that deferred tax and provision data can be wired in.
6511        let notes_to_financial_statements = Vec::new();
6512
6513        Ok(FinancialReportingSnapshot {
6514            financial_statements,
6515            standalone_statements,
6516            consolidated_statements,
6517            consolidation_schedules,
6518            bank_reconciliations,
6519            trial_balances,
6520            segment_reports,
6521            segment_reconciliations,
6522            notes_to_financial_statements,
6523        })
6524    }
6525
6526    /// Populate notes to financial statements using fully-resolved snapshots.
6527    ///
6528    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6529    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6530    /// can be wired into the notes context.  The method mutates
6531    /// `financial_reporting.notes_to_financial_statements` in-place.
6532    fn generate_notes_to_financial_statements(
6533        &self,
6534        financial_reporting: &mut FinancialReportingSnapshot,
6535        accounting_standards: &AccountingStandardsSnapshot,
6536        tax: &TaxSnapshot,
6537        hr: &HrSnapshot,
6538        audit: &AuditSnapshot,
6539        treasury: &TreasurySnapshot,
6540    ) {
6541        use datasynth_config::schema::AccountingFrameworkConfig;
6542        use datasynth_core::models::StatementType;
6543        use datasynth_generators::period_close::notes_generator::{
6544            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545        };
6546
6547        let seed = self.seed;
6548        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549        {
6550            Ok(d) => d,
6551            Err(_) => return,
6552        };
6553
6554        let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556        for company in &self.config.companies {
6557            let last_period_end = start_date
6558                + chrono::Months::new(self.config.global.period_months)
6559                - chrono::Days::new(1);
6560            let fiscal_year = last_period_end.year() as u16;
6561
6562            // Extract relevant amounts from the already-generated financial statements
6563            let entity_is = financial_reporting
6564                .standalone_statements
6565                .get(&company.code)
6566                .and_then(|stmts| {
6567                    stmts.iter().find(|s| {
6568                        s.fiscal_year == fiscal_year
6569                            && s.statement_type == StatementType::IncomeStatement
6570                    })
6571                });
6572            let entity_bs = financial_reporting
6573                .standalone_statements
6574                .get(&company.code)
6575                .and_then(|stmts| {
6576                    stmts.iter().find(|s| {
6577                        s.fiscal_year == fiscal_year
6578                            && s.statement_type == StatementType::BalanceSheet
6579                    })
6580                });
6581
6582            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6583            let revenue_amount = entity_is
6584                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585                .map(|li| li.amount);
6586            let ppe_gross = entity_bs
6587                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588                .map(|li| li.amount);
6589
6590            let framework = match self
6591                .config
6592                .accounting_standards
6593                .framework
6594                .unwrap_or_default()
6595            {
6596                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597                    "IFRS".to_string()
6598                }
6599                _ => "US GAAP".to_string(),
6600            };
6601
6602            // ---- Deferred tax (IAS 12 / ASC 740) ----
6603            // Sum closing DTA and DTL from rollforward entries for this entity.
6604            let (entity_dta, entity_dtl) = {
6605                let mut dta = rust_decimal::Decimal::ZERO;
6606                let mut dtl = rust_decimal::Decimal::ZERO;
6607                for rf in &tax.deferred_tax.rollforwards {
6608                    if rf.entity_code == company.code {
6609                        dta += rf.closing_dta;
6610                        dtl += rf.closing_dtl;
6611                    }
6612                }
6613                (
6614                    if dta > rust_decimal::Decimal::ZERO {
6615                        Some(dta)
6616                    } else {
6617                        None
6618                    },
6619                    if dtl > rust_decimal::Decimal::ZERO {
6620                        Some(dtl)
6621                    } else {
6622                        None
6623                    },
6624                )
6625            };
6626
6627            // ---- Provisions (IAS 37 / ASC 450) ----
6628            // Filter provisions to this entity; sum best_estimate amounts.
6629            let entity_provisions: Vec<_> = accounting_standards
6630                .provisions
6631                .iter()
6632                .filter(|p| p.entity_code == company.code)
6633                .collect();
6634            let provision_count = entity_provisions.len();
6635            let total_provisions = if provision_count > 0 {
6636                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637            } else {
6638                None
6639            };
6640
6641            // ---- Pension data from HR snapshot ----
6642            let entity_pension_plan_count = hr
6643                .pension_plans
6644                .iter()
6645                .filter(|p| p.entity_code == company.code)
6646                .count();
6647            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648                let sum: rust_decimal::Decimal = hr
6649                    .pension_disclosures
6650                    .iter()
6651                    .filter(|d| {
6652                        hr.pension_plans
6653                            .iter()
6654                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655                    })
6656                    .map(|d| d.net_pension_liability)
6657                    .sum();
6658                let plan_assets_sum: rust_decimal::Decimal = hr
6659                    .pension_plan_assets
6660                    .iter()
6661                    .filter(|a| {
6662                        hr.pension_plans
6663                            .iter()
6664                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665                    })
6666                    .map(|a| a.fair_value_closing)
6667                    .sum();
6668                if entity_pension_plan_count > 0 {
6669                    Some(sum + plan_assets_sum)
6670                } else {
6671                    None
6672                }
6673            };
6674            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675                let sum: rust_decimal::Decimal = hr
6676                    .pension_plan_assets
6677                    .iter()
6678                    .filter(|a| {
6679                        hr.pension_plans
6680                            .iter()
6681                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682                    })
6683                    .map(|a| a.fair_value_closing)
6684                    .sum();
6685                if entity_pension_plan_count > 0 {
6686                    Some(sum)
6687                } else {
6688                    None
6689                }
6690            };
6691
6692            // ---- Audit data: related parties + subsequent events ----
6693            // Audit snapshot covers all entities; use total counts (common case = single entity).
6694            let rp_count = audit.related_party_transactions.len();
6695            let se_count = audit.subsequent_events.len();
6696            let adjusting_count = audit
6697                .subsequent_events
6698                .iter()
6699                .filter(|e| {
6700                    matches!(
6701                        e.classification,
6702                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703                    )
6704                })
6705                .count();
6706
6707            let ctx = NotesGeneratorContext {
6708                entity_code: company.code.clone(),
6709                framework,
6710                period: format!("FY{}", fiscal_year),
6711                period_end: last_period_end,
6712                currency: company.currency.clone(),
6713                revenue_amount,
6714                total_ppe_gross: ppe_gross,
6715                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6717                deferred_tax_asset: entity_dta,
6718                deferred_tax_liability: entity_dtl,
6719                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6720                provision_count,
6721                total_provisions,
6722                // Pension data from HR snapshot
6723                pension_plan_count: entity_pension_plan_count,
6724                total_dbo: entity_total_dbo,
6725                total_plan_assets: entity_total_plan_assets,
6726                // Audit data
6727                related_party_transaction_count: rp_count,
6728                subsequent_event_count: se_count,
6729                adjusting_event_count: adjusting_count,
6730                ..NotesGeneratorContext::default()
6731            };
6732
6733            let entity_notes = notes_gen.generate(&ctx);
6734            let standard_note_count = entity_notes.len() as u32;
6735            info!(
6736                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738            );
6739            financial_reporting
6740                .notes_to_financial_statements
6741                .extend(entity_notes);
6742
6743            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6744            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745                .debt_instruments
6746                .iter()
6747                .filter(|d| d.entity_id == company.code)
6748                .map(|d| {
6749                    (
6750                        format!("{:?}", d.instrument_type),
6751                        d.principal,
6752                        d.maturity_date.to_string(),
6753                    )
6754                })
6755                .collect();
6756
6757            let hedge_count = treasury.hedge_relationships.len();
6758            let effective_hedges = treasury
6759                .hedge_relationships
6760                .iter()
6761                .filter(|h| h.is_effective)
6762                .count();
6763            let total_notional: rust_decimal::Decimal = treasury
6764                .hedging_instruments
6765                .iter()
6766                .map(|h| h.notional_amount)
6767                .sum();
6768            let total_fair_value: rust_decimal::Decimal = treasury
6769                .hedging_instruments
6770                .iter()
6771                .map(|h| h.fair_value)
6772                .sum();
6773
6774            // Join provision_movements with provisions to get entity/type info
6775            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776                .provisions
6777                .iter()
6778                .filter(|p| p.entity_code == company.code)
6779                .map(|p| p.id.as_str())
6780                .collect();
6781            let provision_movements: Vec<(
6782                String,
6783                rust_decimal::Decimal,
6784                rust_decimal::Decimal,
6785                rust_decimal::Decimal,
6786            )> = accounting_standards
6787                .provision_movements
6788                .iter()
6789                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790                .map(|m| {
6791                    let prov_type = accounting_standards
6792                        .provisions
6793                        .iter()
6794                        .find(|p| p.id == m.provision_id)
6795                        .map(|p| format!("{:?}", p.provision_type))
6796                        .unwrap_or_else(|| "Unknown".to_string());
6797                    (prov_type, m.opening, m.additions, m.closing)
6798                })
6799                .collect();
6800
6801            let enhanced_ctx = EnhancedNotesContext {
6802                entity_code: company.code.clone(),
6803                period: format!("FY{}", fiscal_year),
6804                currency: company.currency.clone(),
6805                // Inventory breakdown: best-effort using zero (would need balance tracker)
6806                finished_goods_value: rust_decimal::Decimal::ZERO,
6807                wip_value: rust_decimal::Decimal::ZERO,
6808                raw_materials_value: rust_decimal::Decimal::ZERO,
6809                debt_instruments,
6810                hedge_count,
6811                effective_hedges,
6812                total_notional,
6813                total_fair_value,
6814                provision_movements,
6815            };
6816
6817            let enhanced_notes =
6818                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819            if !enhanced_notes.is_empty() {
6820                info!(
6821                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822                    company.code,
6823                    enhanced_notes.len(),
6824                    enhanced_ctx.debt_instruments.len(),
6825                    hedge_count,
6826                    enhanced_ctx.provision_movements.len(),
6827                );
6828                financial_reporting
6829                    .notes_to_financial_statements
6830                    .extend(enhanced_notes);
6831            }
6832        }
6833    }
6834
6835    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6836    ///
6837    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6838    /// posted in the journal entries flows through to the trial balance, using the real
6839    /// GL account numbers from the CoA.
6840    fn build_trial_balance_from_entries(
6841        journal_entries: &[JournalEntry],
6842        coa: &ChartOfAccounts,
6843        company_code: &str,
6844        fiscal_year: u16,
6845        fiscal_period: u8,
6846    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847        use rust_decimal::Decimal;
6848
6849        // Accumulate total debits and credits per GL account
6850        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853        for je in journal_entries {
6854            // Filter to matching company, fiscal year, and period
6855            if je.header.company_code != company_code
6856                || je.header.fiscal_year != fiscal_year
6857                || je.header.fiscal_period != fiscal_period
6858            {
6859                continue;
6860            }
6861
6862            for line in &je.lines {
6863                let acct = &line.gl_account;
6864                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866            }
6867        }
6868
6869        // Build a TrialBalanceEntry for each account that had activity
6870        let mut all_accounts: Vec<&String> = account_debits
6871            .keys()
6872            .chain(account_credits.keys())
6873            .collect::<std::collections::HashSet<_>>()
6874            .into_iter()
6875            .collect();
6876        all_accounts.sort();
6877
6878        let mut entries = Vec::new();
6879
6880        for acct_number in all_accounts {
6881            let debit = account_debits
6882                .get(acct_number)
6883                .copied()
6884                .unwrap_or(Decimal::ZERO);
6885            let credit = account_credits
6886                .get(acct_number)
6887                .copied()
6888                .unwrap_or(Decimal::ZERO);
6889
6890            if debit.is_zero() && credit.is_zero() {
6891                continue;
6892            }
6893
6894            // Look up account name from CoA, fall back to "Account {code}"
6895            let account_name = coa
6896                .get_account(acct_number)
6897                .map(|gl| gl.short_description.clone())
6898                .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900            // Map account code prefix to the category strings expected by
6901            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6902            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6903            // OperatingExpenses).
6904            let category = Self::category_from_account_code(acct_number);
6905
6906            entries.push(datasynth_generators::TrialBalanceEntry {
6907                account_code: acct_number.clone(),
6908                account_name,
6909                category,
6910                debit_balance: debit,
6911                credit_balance: credit,
6912            });
6913        }
6914
6915        entries
6916    }
6917
6918    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6919    /// (and including) the given period end date.
6920    ///
6921    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6922    /// while income statement accounts (revenue, expenses) show only the current period.
6923    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6924    fn build_cumulative_trial_balance(
6925        journal_entries: &[JournalEntry],
6926        coa: &ChartOfAccounts,
6927        company_code: &str,
6928        start_date: NaiveDate,
6929        period_end: NaiveDate,
6930        fiscal_year: u16,
6931        fiscal_period: u8,
6932    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933        use rust_decimal::Decimal;
6934
6935        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6936        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939        // Accumulate debits/credits for income statement accounts (current period only)
6940        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943        for je in journal_entries {
6944            if je.header.company_code != company_code {
6945                continue;
6946            }
6947
6948            for line in &je.lines {
6949                let acct = &line.gl_account;
6950                let category = Self::category_from_account_code(acct);
6951                let is_bs_account = matches!(
6952                    category.as_str(),
6953                    "Cash"
6954                        | "Receivables"
6955                        | "Inventory"
6956                        | "FixedAssets"
6957                        | "Payables"
6958                        | "AccruedLiabilities"
6959                        | "LongTermDebt"
6960                        | "Equity"
6961                );
6962
6963                if is_bs_account {
6964                    // Balance sheet: accumulate from start through period_end
6965                    if je.header.document_date <= period_end
6966                        && je.header.document_date >= start_date
6967                    {
6968                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969                            line.debit_amount;
6970                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971                            line.credit_amount;
6972                    }
6973                } else {
6974                    // Income statement: current period only
6975                    if je.header.fiscal_year == fiscal_year
6976                        && je.header.fiscal_period == fiscal_period
6977                    {
6978                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979                            line.debit_amount;
6980                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981                            line.credit_amount;
6982                    }
6983                }
6984            }
6985        }
6986
6987        // Merge all accounts
6988        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989        all_accounts.extend(bs_debits.keys().cloned());
6990        all_accounts.extend(bs_credits.keys().cloned());
6991        all_accounts.extend(is_debits.keys().cloned());
6992        all_accounts.extend(is_credits.keys().cloned());
6993
6994        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995        sorted_accounts.sort();
6996
6997        let mut entries = Vec::new();
6998
6999        for acct_number in &sorted_accounts {
7000            let category = Self::category_from_account_code(acct_number);
7001            let is_bs_account = matches!(
7002                category.as_str(),
7003                "Cash"
7004                    | "Receivables"
7005                    | "Inventory"
7006                    | "FixedAssets"
7007                    | "Payables"
7008                    | "AccruedLiabilities"
7009                    | "LongTermDebt"
7010                    | "Equity"
7011            );
7012
7013            let (debit, credit) = if is_bs_account {
7014                (
7015                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016                    bs_credits
7017                        .get(acct_number)
7018                        .copied()
7019                        .unwrap_or(Decimal::ZERO),
7020                )
7021            } else {
7022                (
7023                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024                    is_credits
7025                        .get(acct_number)
7026                        .copied()
7027                        .unwrap_or(Decimal::ZERO),
7028                )
7029            };
7030
7031            if debit.is_zero() && credit.is_zero() {
7032                continue;
7033            }
7034
7035            let account_name = coa
7036                .get_account(acct_number)
7037                .map(|gl| gl.short_description.clone())
7038                .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040            entries.push(datasynth_generators::TrialBalanceEntry {
7041                account_code: acct_number.clone(),
7042                account_name,
7043                category,
7044                debit_balance: debit,
7045                credit_balance: credit,
7046            });
7047        }
7048
7049        entries
7050    }
7051
7052    /// Build a JE-derived cash flow statement using the indirect method.
7053    ///
7054    /// Compares current and prior cumulative trial balances to derive working capital
7055    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7056    fn build_cash_flow_from_trial_balances(
7057        current_tb: &[datasynth_generators::TrialBalanceEntry],
7058        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059        net_income: rust_decimal::Decimal,
7060    ) -> Vec<CashFlowItem> {
7061        use rust_decimal::Decimal;
7062
7063        // Helper: aggregate a TB by category and return net (debit - credit)
7064        let aggregate =
7065            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066                let mut map: HashMap<String, Decimal> = HashMap::new();
7067                for entry in tb {
7068                    let net = entry.debit_balance - entry.credit_balance;
7069                    *map.entry(entry.category.clone()).or_default() += net;
7070                }
7071                map
7072            };
7073
7074        let current = aggregate(current_tb);
7075        let prior = prior_tb.map(aggregate);
7076
7077        // Get balance for a category, defaulting to zero
7078        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079            *map.get(key).unwrap_or(&Decimal::ZERO)
7080        };
7081
7082        // Compute change: current - prior (or current if no prior)
7083        let change = |key: &str| -> Decimal {
7084            let curr = get(&current, key);
7085            match &prior {
7086                Some(p) => curr - get(p, key),
7087                None => curr,
7088            }
7089        };
7090
7091        // Operating activities (indirect method)
7092        // Depreciation add-back: approximate from FixedAssets decrease
7093        let fixed_asset_change = change("FixedAssets");
7094        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095            -fixed_asset_change
7096        } else {
7097            Decimal::ZERO
7098        };
7099
7100        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7101        let ar_change = change("Receivables");
7102        let inventory_change = change("Inventory");
7103        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7104        let ap_change = change("Payables");
7105        let accrued_change = change("AccruedLiabilities");
7106
7107        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108            + (-ap_change)
7109            + (-accrued_change);
7110
7111        // Investing activities
7112        let capex = if fixed_asset_change > Decimal::ZERO {
7113            -fixed_asset_change
7114        } else {
7115            Decimal::ZERO
7116        };
7117        let investing_cf = capex;
7118
7119        // Financing activities
7120        let debt_change = -change("LongTermDebt");
7121        let equity_change = -change("Equity");
7122        let financing_cf = debt_change + equity_change;
7123
7124        let net_change = operating_cf + investing_cf + financing_cf;
7125
7126        vec![
7127            CashFlowItem {
7128                item_code: "CF-NI".to_string(),
7129                label: "Net Income".to_string(),
7130                category: CashFlowCategory::Operating,
7131                amount: net_income,
7132                amount_prior: None,
7133                sort_order: 1,
7134                is_total: false,
7135            },
7136            CashFlowItem {
7137                item_code: "CF-DEP".to_string(),
7138                label: "Depreciation & Amortization".to_string(),
7139                category: CashFlowCategory::Operating,
7140                amount: depreciation_addback,
7141                amount_prior: None,
7142                sort_order: 2,
7143                is_total: false,
7144            },
7145            CashFlowItem {
7146                item_code: "CF-AR".to_string(),
7147                label: "Change in Accounts Receivable".to_string(),
7148                category: CashFlowCategory::Operating,
7149                amount: -ar_change,
7150                amount_prior: None,
7151                sort_order: 3,
7152                is_total: false,
7153            },
7154            CashFlowItem {
7155                item_code: "CF-AP".to_string(),
7156                label: "Change in Accounts Payable".to_string(),
7157                category: CashFlowCategory::Operating,
7158                amount: -ap_change,
7159                amount_prior: None,
7160                sort_order: 4,
7161                is_total: false,
7162            },
7163            CashFlowItem {
7164                item_code: "CF-INV".to_string(),
7165                label: "Change in Inventory".to_string(),
7166                category: CashFlowCategory::Operating,
7167                amount: -inventory_change,
7168                amount_prior: None,
7169                sort_order: 5,
7170                is_total: false,
7171            },
7172            CashFlowItem {
7173                item_code: "CF-OP".to_string(),
7174                label: "Net Cash from Operating Activities".to_string(),
7175                category: CashFlowCategory::Operating,
7176                amount: operating_cf,
7177                amount_prior: None,
7178                sort_order: 6,
7179                is_total: true,
7180            },
7181            CashFlowItem {
7182                item_code: "CF-CAPEX".to_string(),
7183                label: "Capital Expenditures".to_string(),
7184                category: CashFlowCategory::Investing,
7185                amount: capex,
7186                amount_prior: None,
7187                sort_order: 7,
7188                is_total: false,
7189            },
7190            CashFlowItem {
7191                item_code: "CF-INV-T".to_string(),
7192                label: "Net Cash from Investing Activities".to_string(),
7193                category: CashFlowCategory::Investing,
7194                amount: investing_cf,
7195                amount_prior: None,
7196                sort_order: 8,
7197                is_total: true,
7198            },
7199            CashFlowItem {
7200                item_code: "CF-DEBT".to_string(),
7201                label: "Net Borrowings / (Repayments)".to_string(),
7202                category: CashFlowCategory::Financing,
7203                amount: debt_change,
7204                amount_prior: None,
7205                sort_order: 9,
7206                is_total: false,
7207            },
7208            CashFlowItem {
7209                item_code: "CF-EQ".to_string(),
7210                label: "Equity Changes".to_string(),
7211                category: CashFlowCategory::Financing,
7212                amount: equity_change,
7213                amount_prior: None,
7214                sort_order: 10,
7215                is_total: false,
7216            },
7217            CashFlowItem {
7218                item_code: "CF-FIN-T".to_string(),
7219                label: "Net Cash from Financing Activities".to_string(),
7220                category: CashFlowCategory::Financing,
7221                amount: financing_cf,
7222                amount_prior: None,
7223                sort_order: 11,
7224                is_total: true,
7225            },
7226            CashFlowItem {
7227                item_code: "CF-NET".to_string(),
7228                label: "Net Change in Cash".to_string(),
7229                category: CashFlowCategory::Operating,
7230                amount: net_change,
7231                amount_prior: None,
7232                sort_order: 12,
7233                is_total: true,
7234            },
7235        ]
7236    }
7237
7238    /// Calculate net income from a set of trial balance entries.
7239    ///
7240    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7241    fn calculate_net_income_from_tb(
7242        tb: &[datasynth_generators::TrialBalanceEntry],
7243    ) -> rust_decimal::Decimal {
7244        use rust_decimal::Decimal;
7245
7246        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247        for entry in tb {
7248            let net = entry.debit_balance - entry.credit_balance;
7249            *aggregated.entry(entry.category.clone()).or_default() += net;
7250        }
7251
7252        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254        let opex = *aggregated
7255            .get("OperatingExpenses")
7256            .unwrap_or(&Decimal::ZERO);
7257        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7261        // other_income is typically negative (credit), other_expenses is typically positive
7262        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263        let tax_rate = Decimal::new(25, 2); // 0.25
7264        let tax = operating_income * tax_rate;
7265        operating_income - tax
7266    }
7267
7268    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7269    ///
7270    /// Uses the first two digits of the account code to classify into the categories
7271    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7272    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7273    /// OperatingExpenses, OtherIncome, OtherExpenses.
7274    fn category_from_account_code(code: &str) -> String {
7275        let prefix: String = code.chars().take(2).collect();
7276        match prefix.as_str() {
7277            "10" => "Cash",
7278            "11" => "Receivables",
7279            "12" | "13" | "14" => "Inventory",
7280            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281            "20" => "Payables",
7282            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285            "40" | "41" | "42" | "43" | "44" => "Revenue",
7286            "50" | "51" | "52" => "CostOfSales",
7287            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288                "OperatingExpenses"
7289            }
7290            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292            _ => "OperatingExpenses",
7293        }
7294        .to_string()
7295    }
7296
7297    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7298    fn phase_hr_data(
7299        &mut self,
7300        stats: &mut EnhancedGenerationStatistics,
7301    ) -> SynthResult<HrSnapshot> {
7302        if !self.phase_config.generate_hr {
7303            debug!("Phase 16: Skipped (HR generation disabled)");
7304            return Ok(HrSnapshot::default());
7305        }
7306
7307        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309        let seed = self.seed;
7310        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313        let company_code = self
7314            .config
7315            .companies
7316            .first()
7317            .map(|c| c.code.as_str())
7318            .unwrap_or("1000");
7319        let currency = self
7320            .config
7321            .companies
7322            .first()
7323            .map(|c| c.currency.as_str())
7324            .unwrap_or("USD");
7325
7326        let employee_ids: Vec<String> = self
7327            .master_data
7328            .employees
7329            .iter()
7330            .map(|e| e.employee_id.clone())
7331            .collect();
7332
7333        if employee_ids.is_empty() {
7334            debug!("Phase 16: Skipped (no employees available)");
7335            return Ok(HrSnapshot::default());
7336        }
7337
7338        // Extract cost-center pool from master data employees for cross-reference
7339        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7340        let cost_center_ids: Vec<String> = self
7341            .master_data
7342            .employees
7343            .iter()
7344            .filter_map(|e| e.cost_center.clone())
7345            .collect::<std::collections::HashSet<_>>()
7346            .into_iter()
7347            .collect();
7348
7349        let mut snapshot = HrSnapshot::default();
7350
7351        // Generate payroll runs (one per month)
7352        if self.config.hr.payroll.enabled {
7353            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356            // Look up country pack for payroll deductions and labels
7357            let payroll_pack = self.primary_pack();
7358
7359            // Store the pack on the generator so generate() resolves
7360            // localized deduction rates and labels from it.
7361            payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363            let employees_with_salary: Vec<(
7364                String,
7365                rust_decimal::Decimal,
7366                Option<String>,
7367                Option<String>,
7368            )> = self
7369                .master_data
7370                .employees
7371                .iter()
7372                .map(|e| {
7373                    // Use the employee's actual annual base salary.
7374                    // Fall back to $60,000 / yr if somehow zero.
7375                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376                        e.base_salary
7377                    } else {
7378                        rust_decimal::Decimal::from(60_000)
7379                    };
7380                    (
7381                        e.employee_id.clone(),
7382                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7383                        e.cost_center.clone(),
7384                        e.department_id.clone(),
7385                    )
7386                })
7387                .collect();
7388
7389            // Use generate_with_changes when employee change history is available
7390            // so that salary adjustments, transfers, etc. are reflected in payroll.
7391            let change_history = &self.master_data.employee_change_history;
7392            let has_changes = !change_history.is_empty();
7393            if has_changes {
7394                debug!(
7395                    "Payroll will incorporate {} employee change events",
7396                    change_history.len()
7397                );
7398            }
7399
7400            for month in 0..self.config.global.period_months {
7401                let period_start = start_date + chrono::Months::new(month);
7402                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403                let (run, items) = if has_changes {
7404                    payroll_gen.generate_with_changes(
7405                        company_code,
7406                        &employees_with_salary,
7407                        period_start,
7408                        period_end,
7409                        currency,
7410                        change_history,
7411                    )
7412                } else {
7413                    payroll_gen.generate(
7414                        company_code,
7415                        &employees_with_salary,
7416                        period_start,
7417                        period_end,
7418                        currency,
7419                    )
7420                };
7421                snapshot.payroll_runs.push(run);
7422                snapshot.payroll_run_count += 1;
7423                snapshot.payroll_line_item_count += items.len();
7424                snapshot.payroll_line_items.extend(items);
7425            }
7426        }
7427
7428        // Generate time entries
7429        if self.config.hr.time_attendance.enabled {
7430            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432            // v3.4.2: when a temporal context is configured, time entries
7433            // respect holidays (not just weekends) and submitted_at lag
7434            // snaps to business days.
7435            if let Some(ctx) = &self.temporal_context {
7436                time_gen.set_temporal_context(Arc::clone(ctx));
7437            }
7438            let entries = time_gen.generate(
7439                &employee_ids,
7440                start_date,
7441                end_date,
7442                &self.config.hr.time_attendance,
7443            );
7444            snapshot.time_entry_count = entries.len();
7445            snapshot.time_entries = entries;
7446        }
7447
7448        // Generate expense reports
7449        if self.config.hr.expenses.enabled {
7450            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452            expense_gen.set_country_pack(self.primary_pack().clone());
7453            // v3.4.2: snap submission / approval / paid / line-item dates
7454            // to business days when temporal_context is present.
7455            if let Some(ctx) = &self.temporal_context {
7456                expense_gen.set_temporal_context(Arc::clone(ctx));
7457            }
7458            let company_currency = self
7459                .config
7460                .companies
7461                .first()
7462                .map(|c| c.currency.as_str())
7463                .unwrap_or("USD");
7464            let reports = expense_gen.generate_with_currency(
7465                &employee_ids,
7466                start_date,
7467                end_date,
7468                &self.config.hr.expenses,
7469                company_currency,
7470            );
7471            snapshot.expense_report_count = reports.len();
7472            snapshot.expense_reports = reports;
7473        }
7474
7475        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7476        if self.config.hr.payroll.enabled {
7477            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478            let employee_pairs: Vec<(String, String)> = self
7479                .master_data
7480                .employees
7481                .iter()
7482                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483                .collect();
7484            let enrollments =
7485                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486            snapshot.benefit_enrollment_count = enrollments.len();
7487            snapshot.benefit_enrollments = enrollments;
7488        }
7489
7490        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7491        if self.phase_config.generate_hr {
7492            let entity_name = self
7493                .config
7494                .companies
7495                .first()
7496                .map(|c| c.name.as_str())
7497                .unwrap_or("Entity");
7498            let period_months = self.config.global.period_months;
7499            let period_label = {
7500                let y = start_date.year();
7501                let m = start_date.month();
7502                if period_months >= 12 {
7503                    format!("FY{y}")
7504                } else {
7505                    format!("{y}-{m:02}")
7506                }
7507            };
7508            let reporting_date =
7509                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511            // Compute average annual salary from actual payroll data when available.
7512            // PayrollRun.total_gross covers all employees for one pay period; we sum
7513            // across all runs and divide by employee_count to get per-employee total,
7514            // then annualise for sub-annual periods.
7515            let avg_salary: Option<rust_decimal::Decimal> = {
7516                let employee_count = employee_ids.len();
7517                if self.config.hr.payroll.enabled
7518                    && employee_count > 0
7519                    && !snapshot.payroll_runs.is_empty()
7520                {
7521                    // Sum total gross pay across all payroll runs for this company
7522                    let total_gross: rust_decimal::Decimal = snapshot
7523                        .payroll_runs
7524                        .iter()
7525                        .filter(|r| r.company_code == company_code)
7526                        .map(|r| r.total_gross)
7527                        .sum();
7528                    if total_gross > rust_decimal::Decimal::ZERO {
7529                        // Annualise: total_gross covers `period_months` months of pay
7530                        let annual_total = if period_months > 0 && period_months < 12 {
7531                            total_gross * rust_decimal::Decimal::from(12u32)
7532                                / rust_decimal::Decimal::from(period_months)
7533                        } else {
7534                            total_gross
7535                        };
7536                        Some(
7537                            (annual_total / rust_decimal::Decimal::from(employee_count))
7538                                .round_dp(2),
7539                        )
7540                    } else {
7541                        None
7542                    }
7543                } else {
7544                    None
7545                }
7546            };
7547
7548            let mut pension_gen =
7549                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550            let pension_snap = pension_gen.generate(
7551                company_code,
7552                entity_name,
7553                &period_label,
7554                reporting_date,
7555                employee_ids.len(),
7556                currency,
7557                avg_salary,
7558                period_months,
7559            );
7560            snapshot.pension_plan_count = pension_snap.plans.len();
7561            snapshot.pension_plans = pension_snap.plans;
7562            snapshot.pension_obligations = pension_snap.obligations;
7563            snapshot.pension_plan_assets = pension_snap.plan_assets;
7564            snapshot.pension_disclosures = pension_snap.disclosures;
7565            // Pension JEs are returned here so they can be added to entries
7566            // in the caller (stored temporarily on snapshot for transfer).
7567            // We embed them in the hr snapshot for simplicity; the orchestrator
7568            // will extract and extend `entries`.
7569            snapshot.pension_journal_entries = pension_snap.journal_entries;
7570        }
7571
7572        // Generate stock-based compensation (ASC 718 / IFRS 2)
7573        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574            let period_months = self.config.global.period_months;
7575            let period_label = {
7576                let y = start_date.year();
7577                let m = start_date.month();
7578                if period_months >= 12 {
7579                    format!("FY{y}")
7580                } else {
7581                    format!("{y}-{m:02}")
7582                }
7583            };
7584            let reporting_date =
7585                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587            let mut stock_comp_gen =
7588                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589            let stock_snap = stock_comp_gen.generate(
7590                company_code,
7591                &employee_ids,
7592                start_date,
7593                &period_label,
7594                reporting_date,
7595                currency,
7596            );
7597            snapshot.stock_grant_count = stock_snap.grants.len();
7598            snapshot.stock_grants = stock_snap.grants;
7599            snapshot.stock_comp_expenses = stock_snap.expenses;
7600            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601        }
7602
7603        stats.payroll_run_count = snapshot.payroll_run_count;
7604        stats.time_entry_count = snapshot.time_entry_count;
7605        stats.expense_report_count = snapshot.expense_report_count;
7606        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607        stats.pension_plan_count = snapshot.pension_plan_count;
7608        stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610        info!(
7611            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613            snapshot.time_entry_count, snapshot.expense_report_count,
7614            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615            snapshot.stock_grant_count
7616        );
7617        self.check_resources_with_log("post-hr")?;
7618
7619        Ok(snapshot)
7620    }
7621
7622    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7623    fn phase_accounting_standards(
7624        &mut self,
7625        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626        journal_entries: &[JournalEntry],
7627        stats: &mut EnhancedGenerationStatistics,
7628    ) -> SynthResult<AccountingStandardsSnapshot> {
7629        if !self.phase_config.generate_accounting_standards {
7630            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631            return Ok(AccountingStandardsSnapshot::default());
7632        }
7633        info!("Phase 17: Generating Accounting Standards Data");
7634
7635        let seed = self.seed;
7636        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639        let company_code = self
7640            .config
7641            .companies
7642            .first()
7643            .map(|c| c.code.as_str())
7644            .unwrap_or("1000");
7645        let currency = self
7646            .config
7647            .companies
7648            .first()
7649            .map(|c| c.currency.as_str())
7650            .unwrap_or("USD");
7651
7652        // Convert config framework to standards framework.
7653        // If the user explicitly set a framework in the YAML config, use that.
7654        // Otherwise, fall back to the country pack's accounting.framework field,
7655        // and if that is also absent or unrecognised, default to US GAAP.
7656        let framework = match self.config.accounting_standards.framework {
7657            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658                datasynth_standards::framework::AccountingFramework::UsGaap
7659            }
7660            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661                datasynth_standards::framework::AccountingFramework::Ifrs
7662            }
7663            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664                datasynth_standards::framework::AccountingFramework::DualReporting
7665            }
7666            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667                datasynth_standards::framework::AccountingFramework::FrenchGaap
7668            }
7669            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670                datasynth_standards::framework::AccountingFramework::GermanGaap
7671            }
7672            None => {
7673                // Derive framework from the primary company's country pack
7674                let pack = self.primary_pack();
7675                let pack_fw = pack.accounting.framework.as_str();
7676                match pack_fw {
7677                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678                    "dual_reporting" => {
7679                        datasynth_standards::framework::AccountingFramework::DualReporting
7680                    }
7681                    "french_gaap" => {
7682                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7683                    }
7684                    "german_gaap" | "hgb" => {
7685                        datasynth_standards::framework::AccountingFramework::GermanGaap
7686                    }
7687                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7688                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689                }
7690            }
7691        };
7692
7693        let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695        // Revenue recognition
7696        if self.config.accounting_standards.revenue_recognition.enabled {
7697            let customer_ids: Vec<String> = self
7698                .master_data
7699                .customers
7700                .iter()
7701                .map(|c| c.customer_id.clone())
7702                .collect();
7703
7704            if !customer_ids.is_empty() {
7705                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706                let contracts = rev_gen.generate(
7707                    company_code,
7708                    &customer_ids,
7709                    start_date,
7710                    end_date,
7711                    currency,
7712                    &self.config.accounting_standards.revenue_recognition,
7713                    framework,
7714                );
7715                snapshot.revenue_contract_count = contracts.len();
7716                snapshot.contracts = contracts;
7717            }
7718        }
7719
7720        // Impairment testing
7721        if self.config.accounting_standards.impairment.enabled {
7722            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723                .master_data
7724                .assets
7725                .iter()
7726                .map(|a| {
7727                    (
7728                        a.asset_id.clone(),
7729                        a.description.clone(),
7730                        a.acquisition_cost,
7731                    )
7732                })
7733                .collect();
7734
7735            if !asset_data.is_empty() {
7736                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737                let tests = imp_gen.generate(
7738                    company_code,
7739                    &asset_data,
7740                    end_date,
7741                    &self.config.accounting_standards.impairment,
7742                    framework,
7743                );
7744                snapshot.impairment_test_count = tests.len();
7745                snapshot.impairment_tests = tests;
7746            }
7747        }
7748
7749        // Business combinations (IFRS 3 / ASC 805)
7750        if self
7751            .config
7752            .accounting_standards
7753            .business_combinations
7754            .enabled
7755        {
7756            let bc_config = &self.config.accounting_standards.business_combinations;
7757            let framework_str = match framework {
7758                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759                _ => "US_GAAP",
7760            };
7761            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762            let bc_snap = bc_gen.generate(
7763                company_code,
7764                currency,
7765                start_date,
7766                end_date,
7767                bc_config.acquisition_count,
7768                framework_str,
7769            );
7770            snapshot.business_combination_count = bc_snap.combinations.len();
7771            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772            snapshot.business_combinations = bc_snap.combinations;
7773        }
7774
7775        // Expected Credit Loss (IFRS 9 / ASC 326)
7776        if self
7777            .config
7778            .accounting_standards
7779            .expected_credit_loss
7780            .enabled
7781        {
7782            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783            let framework_str = match framework {
7784                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785                _ => "ASC_326",
7786            };
7787
7788            // Use AR aging data from the subledger snapshot if available;
7789            // otherwise generate synthetic bucket exposures.
7790            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792            let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794            // Collect combined bucket totals across all company AR aging reports.
7795            let bucket_exposures: Vec<(
7796                datasynth_core::models::subledger::ar::AgingBucket,
7797                rust_decimal::Decimal,
7798            )> = if ar_aging_reports.is_empty() {
7799                // No AR aging data — synthesise plausible bucket exposures.
7800                use datasynth_core::models::subledger::ar::AgingBucket;
7801                vec![
7802                    (
7803                        AgingBucket::Current,
7804                        rust_decimal::Decimal::from(500_000_u32),
7805                    ),
7806                    (
7807                        AgingBucket::Days1To30,
7808                        rust_decimal::Decimal::from(120_000_u32),
7809                    ),
7810                    (
7811                        AgingBucket::Days31To60,
7812                        rust_decimal::Decimal::from(45_000_u32),
7813                    ),
7814                    (
7815                        AgingBucket::Days61To90,
7816                        rust_decimal::Decimal::from(15_000_u32),
7817                    ),
7818                    (
7819                        AgingBucket::Over90Days,
7820                        rust_decimal::Decimal::from(8_000_u32),
7821                    ),
7822                ]
7823            } else {
7824                use datasynth_core::models::subledger::ar::AgingBucket;
7825                // Sum bucket totals from all reports.
7826                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827                    std::collections::HashMap::new();
7828                for report in ar_aging_reports {
7829                    for (bucket, amount) in &report.bucket_totals {
7830                        *totals.entry(*bucket).or_default() += amount;
7831                    }
7832                }
7833                AgingBucket::all()
7834                    .into_iter()
7835                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836                    .collect()
7837            };
7838
7839            let ecl_snap = ecl_gen.generate(
7840                company_code,
7841                end_date,
7842                &bucket_exposures,
7843                ecl_config,
7844                &period_label,
7845                framework_str,
7846            );
7847
7848            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849            snapshot.ecl_models = ecl_snap.ecl_models;
7850            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852        }
7853
7854        // Provisions and contingencies (IAS 37 / ASC 450)
7855        {
7856            let framework_str = match framework {
7857                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858                _ => "US_GAAP",
7859            };
7860
7861            // Compute actual revenue from the journal entries generated so far.
7862            // The `journal_entries` slice passed to this phase contains all GL entries
7863            // up to and including Period Close. Fall back to a minimum of 100_000 to
7864            // avoid degenerate zero-based provision amounts on first-period datasets.
7865            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866                .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871            let prov_snap = prov_gen.generate(
7872                company_code,
7873                currency,
7874                revenue_proxy,
7875                end_date,
7876                &period_label,
7877                framework_str,
7878                None, // prior_opening: no carry-forward data in single-period runs
7879            );
7880
7881            snapshot.provision_count = prov_snap.provisions.len();
7882            snapshot.provisions = prov_snap.provisions;
7883            snapshot.provision_movements = prov_snap.movements;
7884            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885            snapshot.provision_journal_entries = prov_snap.journal_entries;
7886        }
7887
7888        // IAS 21 Functional Currency Translation
7889        // For each company whose functional currency differs from the presentation
7890        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7891        {
7892            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894            let presentation_currency = self
7895                .config
7896                .global
7897                .presentation_currency
7898                .clone()
7899                .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901            // Build a minimal rate table populated with approximate rates from
7902            // the FX model base rates (USD-based) so we can do the translation.
7903            let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905            // Populate with base rates against USD; if presentation_currency is
7906            // not USD we do a best-effort two-step conversion using the table's
7907            // triangulation support.
7908            let base_rates = base_rates_usd();
7909            for (ccy, rate) in &base_rates {
7910                rate_table.add_rate(FxRate::new(
7911                    ccy,
7912                    "USD",
7913                    RateType::Closing,
7914                    end_date,
7915                    *rate,
7916                    "SYNTHETIC",
7917                ));
7918                // Average rate = 98% of closing (approximation).
7919                // 0.98 = 98/100 = Decimal::new(98, 2)
7920                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921                rate_table.add_rate(FxRate::new(
7922                    ccy,
7923                    "USD",
7924                    RateType::Average,
7925                    end_date,
7926                    avg,
7927                    "SYNTHETIC",
7928                ));
7929            }
7930
7931            let mut translation_results = Vec::new();
7932            for company in &self.config.companies {
7933                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7934                // to ensure the translation produces non-trivial CTA amounts.
7935                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936                    .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938                let func_ccy = company
7939                    .functional_currency
7940                    .clone()
7941                    .unwrap_or_else(|| company.currency.clone());
7942
7943                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944                    &company.code,
7945                    &func_ccy,
7946                    &presentation_currency,
7947                    &ias21_period_label,
7948                    end_date,
7949                    company_revenue,
7950                    &rate_table,
7951                );
7952                translation_results.push(result);
7953            }
7954
7955            snapshot.currency_translation_count = translation_results.len();
7956            snapshot.currency_translation_results = translation_results;
7957        }
7958
7959        stats.revenue_contract_count = snapshot.revenue_contract_count;
7960        stats.impairment_test_count = snapshot.impairment_test_count;
7961        stats.business_combination_count = snapshot.business_combination_count;
7962        stats.ecl_model_count = snapshot.ecl_model_count;
7963        stats.provision_count = snapshot.provision_count;
7964
7965        // ------------------------------------------------------------
7966        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7967        // ------------------------------------------------------------
7968        if self.config.accounting_standards.leases.enabled {
7969            use datasynth_generators::standards::LeaseGenerator;
7970            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971                .unwrap_or_else(|_| {
7972                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973                });
7974            let framework =
7975                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977            for company in &self.config.companies {
7978                let leases = lease_gen.generate(
7979                    &company.code,
7980                    start_date,
7981                    &self.config.accounting_standards.leases,
7982                    framework,
7983                );
7984                snapshot.lease_count += leases.len();
7985                snapshot.leases.extend(leases);
7986            }
7987            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988        }
7989
7990        // ------------------------------------------------------------
7991        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7992        // ------------------------------------------------------------
7993        if self.config.accounting_standards.fair_value.enabled {
7994            use datasynth_generators::standards::FairValueGenerator;
7995            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997                + chrono::Months::new(self.config.global.period_months);
7998            let framework =
7999                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001            for company in &self.config.companies {
8002                let measurements = fv_gen.generate(
8003                    &company.code,
8004                    end_date,
8005                    &company.currency,
8006                    &self.config.accounting_standards.fair_value,
8007                    framework,
8008                );
8009                snapshot.fair_value_measurement_count += measurements.len();
8010                snapshot.fair_value_measurements.extend(measurements);
8011            }
8012            info!(
8013                "v3.3.1 fair value measurements: {}",
8014                snapshot.fair_value_measurement_count
8015            );
8016        }
8017
8018        // ------------------------------------------------------------
8019        // v3.3.1: Framework reconciliation (dual reporting only)
8020        // ------------------------------------------------------------
8021        if self.config.accounting_standards.generate_differences
8022            && matches!(
8023                self.config.accounting_standards.framework,
8024                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025            )
8026        {
8027            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030                + chrono::Months::new(self.config.global.period_months);
8031            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032            for company in &self.config.companies {
8033                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034                snapshot.framework_difference_count += records.len();
8035                snapshot.framework_differences.extend(records);
8036                snapshot.framework_reconciliations.push(reconciliation);
8037            }
8038            info!(
8039                "v3.3.1 framework reconciliation: {} differences across {} entities",
8040                snapshot.framework_difference_count,
8041                snapshot.framework_reconciliations.len()
8042            );
8043        }
8044
8045        info!(
8046            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047            snapshot.revenue_contract_count,
8048            snapshot.impairment_test_count,
8049            snapshot.business_combination_count,
8050            snapshot.ecl_model_count,
8051            snapshot.provision_count,
8052            snapshot.currency_translation_count,
8053            snapshot.lease_count,
8054            snapshot.fair_value_measurement_count,
8055            snapshot.framework_difference_count,
8056        );
8057        self.check_resources_with_log("post-accounting-standards")?;
8058
8059        Ok(snapshot)
8060    }
8061
8062    /// v3.3.1: helper to resolve the accounting-standards framework enum
8063    /// from config into the `datasynth_standards::framework::AccountingFramework`
8064    /// type expected by standards generators. Falls back to US GAAP.
8065    fn resolve_accounting_framework(
8066        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067    ) -> datasynth_standards::framework::AccountingFramework {
8068        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069        use datasynth_standards::framework::AccountingFramework as Fw;
8070        match cfg {
8071            Some(Cfg::Ifrs) => Fw::Ifrs,
8072            Some(Cfg::DualReporting) => Fw::DualReporting,
8073            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075            _ => Fw::UsGaap,
8076        }
8077    }
8078
8079    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8080    fn phase_manufacturing(
8081        &mut self,
8082        stats: &mut EnhancedGenerationStatistics,
8083    ) -> SynthResult<ManufacturingSnapshot> {
8084        if !self.phase_config.generate_manufacturing {
8085            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086            return Ok(ManufacturingSnapshot::default());
8087        }
8088        info!("Phase 18: Generating Manufacturing Data");
8089
8090        let seed = self.seed;
8091        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094        let company_code = self
8095            .config
8096            .companies
8097            .first()
8098            .map(|c| c.code.as_str())
8099            .unwrap_or("1000");
8100
8101        let material_data: Vec<(String, String)> = self
8102            .master_data
8103            .materials
8104            .iter()
8105            .map(|m| (m.material_id.clone(), m.description.clone()))
8106            .collect();
8107
8108        if material_data.is_empty() {
8109            debug!("Phase 18: Skipped (no materials available)");
8110            return Ok(ManufacturingSnapshot::default());
8111        }
8112
8113        let mut snapshot = ManufacturingSnapshot::default();
8114
8115        // Generate production orders
8116        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117        // v3.4.3: snap planned / actual / operation dates to business days.
8118        if let Some(ctx) = &self.temporal_context {
8119            prod_gen.set_temporal_context(Arc::clone(ctx));
8120        }
8121        let production_orders = prod_gen.generate(
8122            company_code,
8123            &material_data,
8124            start_date,
8125            end_date,
8126            &self.config.manufacturing.production_orders,
8127            &self.config.manufacturing.costing,
8128            &self.config.manufacturing.routing,
8129        );
8130        snapshot.production_order_count = production_orders.len();
8131
8132        // Generate quality inspections from production orders
8133        let inspection_data: Vec<(String, String, String)> = production_orders
8134            .iter()
8135            .map(|po| {
8136                (
8137                    po.order_id.clone(),
8138                    po.material_id.clone(),
8139                    po.material_description.clone(),
8140                )
8141            })
8142            .collect();
8143
8144        snapshot.production_orders = production_orders;
8145
8146        if !inspection_data.is_empty() {
8147            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149            snapshot.quality_inspection_count = inspections.len();
8150            snapshot.quality_inspections = inspections;
8151        }
8152
8153        // Generate cycle counts (one per month)
8154        let storage_locations: Vec<(String, String)> = material_data
8155            .iter()
8156            .enumerate()
8157            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158            .collect();
8159
8160        let employee_ids: Vec<String> = self
8161            .master_data
8162            .employees
8163            .iter()
8164            .map(|e| e.employee_id.clone())
8165            .collect();
8166        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167            .with_employee_pool(employee_ids);
8168        let mut cycle_count_total = 0usize;
8169        for month in 0..self.config.global.period_months {
8170            let count_date = start_date + chrono::Months::new(month);
8171            let items_per_count = storage_locations.len().clamp(10, 50);
8172            let cc = cc_gen.generate(
8173                company_code,
8174                &storage_locations,
8175                count_date,
8176                items_per_count,
8177            );
8178            snapshot.cycle_counts.push(cc);
8179            cycle_count_total += 1;
8180        }
8181        snapshot.cycle_count_count = cycle_count_total;
8182
8183        // Generate BOM components
8184        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185        let bom_components = bom_gen.generate(company_code, &material_data);
8186        snapshot.bom_component_count = bom_components.len();
8187        snapshot.bom_components = bom_components;
8188
8189        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8190        let currency = self
8191            .config
8192            .companies
8193            .first()
8194            .map(|c| c.currency.as_str())
8195            .unwrap_or("USD");
8196        let production_order_ids: Vec<String> = snapshot
8197            .production_orders
8198            .iter()
8199            .map(|po| po.order_id.clone())
8200            .collect();
8201        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203            company_code,
8204            &material_data,
8205            start_date,
8206            end_date,
8207            2,
8208            currency,
8209            &production_order_ids,
8210        );
8211        snapshot.inventory_movement_count = inventory_movements.len();
8212        snapshot.inventory_movements = inventory_movements;
8213
8214        stats.production_order_count = snapshot.production_order_count;
8215        stats.quality_inspection_count = snapshot.quality_inspection_count;
8216        stats.cycle_count_count = snapshot.cycle_count_count;
8217        stats.bom_component_count = snapshot.bom_component_count;
8218        stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220        info!(
8221            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223            snapshot.bom_component_count, snapshot.inventory_movement_count
8224        );
8225        self.check_resources_with_log("post-manufacturing")?;
8226
8227        Ok(snapshot)
8228    }
8229
8230    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8231    fn phase_sales_kpi_budgets(
8232        &mut self,
8233        coa: &Arc<ChartOfAccounts>,
8234        financial_reporting: &FinancialReportingSnapshot,
8235        stats: &mut EnhancedGenerationStatistics,
8236    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237        if !self.phase_config.generate_sales_kpi_budgets {
8238            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239            return Ok(SalesKpiBudgetsSnapshot::default());
8240        }
8241        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243        let seed = self.seed;
8244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247        let company_code = self
8248            .config
8249            .companies
8250            .first()
8251            .map(|c| c.code.as_str())
8252            .unwrap_or("1000");
8253
8254        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256        // Sales Quotes
8257        if self.config.sales_quotes.enabled {
8258            let customer_data: Vec<(String, String)> = self
8259                .master_data
8260                .customers
8261                .iter()
8262                .map(|c| (c.customer_id.clone(), c.name.clone()))
8263                .collect();
8264            let material_data: Vec<(String, String)> = self
8265                .master_data
8266                .materials
8267                .iter()
8268                .map(|m| (m.material_id.clone(), m.description.clone()))
8269                .collect();
8270
8271            if !customer_data.is_empty() && !material_data.is_empty() {
8272                let employee_ids: Vec<String> = self
8273                    .master_data
8274                    .employees
8275                    .iter()
8276                    .map(|e| e.employee_id.clone())
8277                    .collect();
8278                let customer_ids: Vec<String> = self
8279                    .master_data
8280                    .customers
8281                    .iter()
8282                    .map(|c| c.customer_id.clone())
8283                    .collect();
8284                let company_currency = self
8285                    .config
8286                    .companies
8287                    .first()
8288                    .map(|c| c.currency.as_str())
8289                    .unwrap_or("USD");
8290
8291                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292                    .with_pools(employee_ids, customer_ids);
8293                let quotes = quote_gen.generate_with_currency(
8294                    company_code,
8295                    &customer_data,
8296                    &material_data,
8297                    start_date,
8298                    end_date,
8299                    &self.config.sales_quotes,
8300                    company_currency,
8301                );
8302                snapshot.sales_quote_count = quotes.len();
8303                snapshot.sales_quotes = quotes;
8304            }
8305        }
8306
8307        // Management KPIs
8308        if self.config.financial_reporting.management_kpis.enabled {
8309            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310            let mut kpis = kpi_gen.generate(
8311                company_code,
8312                start_date,
8313                end_date,
8314                &self.config.financial_reporting.management_kpis,
8315            );
8316
8317            // Override financial KPIs with actual data from financial statements
8318            {
8319                use rust_decimal::Decimal;
8320
8321                if let Some(income_stmt) =
8322                    financial_reporting.financial_statements.iter().find(|fs| {
8323                        fs.statement_type == StatementType::IncomeStatement
8324                            && fs.company_code == company_code
8325                    })
8326                {
8327                    // Extract revenue and COGS from income statement line items
8328                    let total_revenue: Decimal = income_stmt
8329                        .line_items
8330                        .iter()
8331                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332                        .map(|li| li.amount)
8333                        .sum();
8334                    let total_cogs: Decimal = income_stmt
8335                        .line_items
8336                        .iter()
8337                        .filter(|li| {
8338                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339                                && !li.is_total
8340                        })
8341                        .map(|li| li.amount.abs())
8342                        .sum();
8343                    let total_opex: Decimal = income_stmt
8344                        .line_items
8345                        .iter()
8346                        .filter(|li| {
8347                            li.section.contains("Expense")
8348                                && !li.is_total
8349                                && !li.section.contains("Cost")
8350                        })
8351                        .map(|li| li.amount.abs())
8352                        .sum();
8353
8354                    if total_revenue > Decimal::ZERO {
8355                        let hundred = Decimal::from(100);
8356                        let gross_margin_pct =
8357                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358                        let operating_income = total_revenue - total_cogs - total_opex;
8359                        let op_margin_pct =
8360                            (operating_income * hundred / total_revenue).round_dp(2);
8361
8362                        // Override gross margin and operating margin KPIs
8363                        for kpi in &mut kpis {
8364                            if kpi.name == "Gross Margin" {
8365                                kpi.value = gross_margin_pct;
8366                            } else if kpi.name == "Operating Margin" {
8367                                kpi.value = op_margin_pct;
8368                            }
8369                        }
8370                    }
8371                }
8372
8373                // Override Current Ratio from balance sheet
8374                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375                    fs.statement_type == StatementType::BalanceSheet
8376                        && fs.company_code == company_code
8377                }) {
8378                    let current_assets: Decimal = bs
8379                        .line_items
8380                        .iter()
8381                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382                        .map(|li| li.amount)
8383                        .sum();
8384                    let current_liabilities: Decimal = bs
8385                        .line_items
8386                        .iter()
8387                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388                        .map(|li| li.amount.abs())
8389                        .sum();
8390
8391                    if current_liabilities > Decimal::ZERO {
8392                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393                        for kpi in &mut kpis {
8394                            if kpi.name == "Current Ratio" {
8395                                kpi.value = current_ratio;
8396                            }
8397                        }
8398                    }
8399                }
8400            }
8401
8402            snapshot.kpi_count = kpis.len();
8403            snapshot.kpis = kpis;
8404        }
8405
8406        // Budgets
8407        if self.config.financial_reporting.budgets.enabled {
8408            let account_data: Vec<(String, String)> = coa
8409                .accounts
8410                .iter()
8411                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412                .collect();
8413
8414            if !account_data.is_empty() {
8415                let fiscal_year = start_date.year() as u32;
8416                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417                let budget = budget_gen.generate(
8418                    company_code,
8419                    fiscal_year,
8420                    &account_data,
8421                    &self.config.financial_reporting.budgets,
8422                );
8423                snapshot.budget_line_count = budget.line_items.len();
8424                snapshot.budgets.push(budget);
8425            }
8426        }
8427
8428        stats.sales_quote_count = snapshot.sales_quote_count;
8429        stats.kpi_count = snapshot.kpi_count;
8430        stats.budget_line_count = snapshot.budget_line_count;
8431
8432        info!(
8433            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435        );
8436        self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438        Ok(snapshot)
8439    }
8440
8441    /// Compute pre-tax income for a single company from actual journal entries.
8442    ///
8443    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8444    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8445    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8446    /// and the period-close engine so that all three use a consistent definition.
8447    fn compute_pre_tax_income(
8448        company_code: &str,
8449        journal_entries: &[JournalEntry],
8450    ) -> rust_decimal::Decimal {
8451        use datasynth_core::accounts::AccountCategory;
8452        use rust_decimal::Decimal;
8453
8454        let mut total_revenue = Decimal::ZERO;
8455        let mut total_expenses = Decimal::ZERO;
8456
8457        for je in journal_entries {
8458            if je.header.company_code != company_code {
8459                continue;
8460            }
8461            for line in &je.lines {
8462                let cat = AccountCategory::from_account(&line.gl_account);
8463                match cat {
8464                    AccountCategory::Revenue => {
8465                        total_revenue += line.credit_amount - line.debit_amount;
8466                    }
8467                    AccountCategory::Cogs
8468                    | AccountCategory::OperatingExpense
8469                    | AccountCategory::OtherIncomeExpense => {
8470                        total_expenses += line.debit_amount - line.credit_amount;
8471                    }
8472                    _ => {}
8473                }
8474            }
8475        }
8476
8477        let pti = (total_revenue - total_expenses).round_dp(2);
8478        if pti == rust_decimal::Decimal::ZERO {
8479            // No income statement activity yet — fall back to a synthetic value so the
8480            // tax provision generator can still produce meaningful output.
8481            rust_decimal::Decimal::from(1_000_000u32)
8482        } else {
8483            pti
8484        }
8485    }
8486
8487    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8488    fn phase_tax_generation(
8489        &mut self,
8490        document_flows: &DocumentFlowSnapshot,
8491        journal_entries: &[JournalEntry],
8492        stats: &mut EnhancedGenerationStatistics,
8493    ) -> SynthResult<TaxSnapshot> {
8494        if !self.phase_config.generate_tax {
8495            debug!("Phase 20: Skipped (tax generation disabled)");
8496            return Ok(TaxSnapshot::default());
8497        }
8498        info!("Phase 20: Generating Tax Data");
8499
8500        let seed = self.seed;
8501        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503        let fiscal_year = start_date.year();
8504        let company_code = self
8505            .config
8506            .companies
8507            .first()
8508            .map(|c| c.code.as_str())
8509            .unwrap_or("1000");
8510
8511        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512            seed + 370,
8513            self.config.tax.clone(),
8514        );
8515
8516        let pack = self.primary_pack().clone();
8517        let (jurisdictions, codes) =
8518            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520        // Generate tax provisions for each company
8521        let mut provisions = Vec::new();
8522        if self.config.tax.provisions.enabled {
8523            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524            for company in &self.config.companies {
8525                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526                let statutory_rate = rust_decimal::Decimal::new(
8527                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528                    2,
8529                );
8530                let provision = provision_gen.generate(
8531                    &company.code,
8532                    start_date,
8533                    pre_tax_income,
8534                    statutory_rate,
8535                );
8536                provisions.push(provision);
8537            }
8538        }
8539
8540        // Generate tax lines from document invoices
8541        let mut tax_lines = Vec::new();
8542        if !codes.is_empty() {
8543            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544                datasynth_generators::TaxLineGeneratorConfig::default(),
8545                codes.clone(),
8546                seed + 372,
8547            );
8548
8549            // Tax lines from vendor invoices (input tax)
8550            // Use the first company's country as buyer country
8551            let buyer_country = self
8552                .config
8553                .companies
8554                .first()
8555                .map(|c| c.country.as_str())
8556                .unwrap_or("US");
8557            for vi in &document_flows.vendor_invoices {
8558                let lines = tax_line_gen.generate_for_document(
8559                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560                    &vi.header.document_id,
8561                    buyer_country, // seller approx same country
8562                    buyer_country,
8563                    vi.payable_amount,
8564                    vi.header.document_date,
8565                    None,
8566                );
8567                tax_lines.extend(lines);
8568            }
8569
8570            // Tax lines from customer invoices (output tax)
8571            for ci in &document_flows.customer_invoices {
8572                let lines = tax_line_gen.generate_for_document(
8573                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574                    &ci.header.document_id,
8575                    buyer_country, // seller is the company
8576                    buyer_country,
8577                    ci.total_gross_amount,
8578                    ci.header.document_date,
8579                    None,
8580                );
8581                tax_lines.extend(lines);
8582            }
8583        }
8584
8585        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8586        let deferred_tax = {
8587            let companies: Vec<(&str, &str)> = self
8588                .config
8589                .companies
8590                .iter()
8591                .map(|c| (c.code.as_str(), c.country.as_str()))
8592                .collect();
8593            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594            deferred_gen.generate(&companies, start_date, journal_entries)
8595        };
8596
8597        // Build a document_id → posting_date map so each tax JE uses its
8598        // source document's date rather than a blanket period-end date.
8599        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600            std::collections::HashMap::new();
8601        for vi in &document_flows.vendor_invoices {
8602            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603        }
8604        for ci in &document_flows.customer_invoices {
8605            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606        }
8607
8608        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612                &tax_lines,
8613                company_code,
8614                &doc_dates,
8615                end_date,
8616            );
8617            debug!("Generated {} tax posting JEs", jes.len());
8618            jes
8619        } else {
8620            Vec::new()
8621        };
8622
8623        let snapshot = TaxSnapshot {
8624            jurisdiction_count: jurisdictions.len(),
8625            code_count: codes.len(),
8626            jurisdictions,
8627            codes,
8628            tax_provisions: provisions,
8629            tax_lines,
8630            tax_returns: Vec::new(),
8631            withholding_records: Vec::new(),
8632            tax_anomaly_labels: Vec::new(),
8633            deferred_tax,
8634            tax_posting_journal_entries,
8635        };
8636
8637        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638        stats.tax_code_count = snapshot.code_count;
8639        stats.tax_provision_count = snapshot.tax_provisions.len();
8640        stats.tax_line_count = snapshot.tax_lines.len();
8641
8642        info!(
8643            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644            snapshot.jurisdiction_count,
8645            snapshot.code_count,
8646            snapshot.tax_provisions.len(),
8647            snapshot.deferred_tax.temporary_differences.len(),
8648            snapshot.deferred_tax.journal_entries.len(),
8649            snapshot.tax_posting_journal_entries.len(),
8650        );
8651        self.check_resources_with_log("post-tax")?;
8652
8653        Ok(snapshot)
8654    }
8655
8656    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8657    fn phase_esg_generation(
8658        &mut self,
8659        document_flows: &DocumentFlowSnapshot,
8660        manufacturing: &ManufacturingSnapshot,
8661        stats: &mut EnhancedGenerationStatistics,
8662    ) -> SynthResult<EsgSnapshot> {
8663        if !self.phase_config.generate_esg {
8664            debug!("Phase 21: Skipped (ESG generation disabled)");
8665            return Ok(EsgSnapshot::default());
8666        }
8667        let degradation = self.check_resources()?;
8668        if degradation >= DegradationLevel::Reduced {
8669            debug!(
8670                "Phase skipped due to resource pressure (degradation: {:?})",
8671                degradation
8672            );
8673            return Ok(EsgSnapshot::default());
8674        }
8675        info!("Phase 21: Generating ESG Data");
8676
8677        let seed = self.seed;
8678        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681        let entity_id = self
8682            .config
8683            .companies
8684            .first()
8685            .map(|c| c.code.as_str())
8686            .unwrap_or("1000");
8687
8688        let esg_cfg = &self.config.esg;
8689        let mut snapshot = EsgSnapshot::default();
8690
8691        // Energy consumption (feeds into scope 1 & 2 emissions)
8692        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693            esg_cfg.environmental.energy.clone(),
8694            seed + 80,
8695        );
8696        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698        // Water usage
8699        let facility_count = esg_cfg.environmental.energy.facility_count;
8700        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703        // Waste
8704        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705            seed + 82,
8706            esg_cfg.environmental.waste.diversion_target,
8707            facility_count,
8708        );
8709        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711        // Emissions (scope 1, 2, 3)
8712        let mut emission_gen =
8713            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715        // Build EnergyInput from energy_records
8716        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717            .iter()
8718            .map(|e| datasynth_generators::EnergyInput {
8719                facility_id: e.facility_id.clone(),
8720                energy_type: match e.energy_source {
8721                    EnergySourceType::NaturalGas => {
8722                        datasynth_generators::EnergyInputType::NaturalGas
8723                    }
8724                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726                    _ => datasynth_generators::EnergyInputType::Electricity,
8727                },
8728                consumption_kwh: e.consumption_kwh,
8729                period: e.period,
8730            })
8731            .collect();
8732
8733        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8734        if !manufacturing.production_orders.is_empty() {
8735            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736                &manufacturing.production_orders,
8737                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8738                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8739            );
8740            if !mfg_energy.is_empty() {
8741                info!(
8742                    "ESG: {} energy inputs derived from {} production orders",
8743                    mfg_energy.len(),
8744                    manufacturing.production_orders.len(),
8745                );
8746                energy_inputs.extend(mfg_energy);
8747            }
8748        }
8749
8750        let mut emissions = Vec::new();
8751        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754        // Scope 3: use vendor spend data from actual payments
8755        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757            for payment in &document_flows.payments {
8758                if payment.is_vendor {
8759                    *totals
8760                        .entry(payment.business_partner_id.clone())
8761                        .or_default() += payment.amount;
8762                }
8763            }
8764            totals
8765        };
8766        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767            .master_data
8768            .vendors
8769            .iter()
8770            .map(|v| {
8771                let spend = vendor_payment_totals
8772                    .get(&v.vendor_id)
8773                    .copied()
8774                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775                datasynth_generators::VendorSpendInput {
8776                    vendor_id: v.vendor_id.clone(),
8777                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8778                    spend,
8779                    country: v.country.clone(),
8780                }
8781            })
8782            .collect();
8783        if !vendor_spend.is_empty() {
8784            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785                entity_id,
8786                &vendor_spend,
8787                start_date,
8788                end_date,
8789            ));
8790        }
8791
8792        // Business travel & commuting (scope 3)
8793        let headcount = self.master_data.employees.len() as u32;
8794        if headcount > 0 {
8795            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796            emissions.extend(emission_gen.generate_scope3_business_travel(
8797                entity_id,
8798                travel_spend,
8799                start_date,
8800            ));
8801            emissions
8802                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803        }
8804
8805        snapshot.emission_count = emissions.len();
8806        snapshot.emissions = emissions;
8807        snapshot.energy = energy_records;
8808
8809        // Social: Workforce diversity, pay equity, safety
8810        let mut workforce_gen =
8811            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812        let total_headcount = headcount.max(100);
8813        snapshot.diversity =
8814            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817        // v2.4: Derive additional workforce diversity metrics from actual employee data
8818        if !self.master_data.employees.is_empty() {
8819            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820                entity_id,
8821                &self.master_data.employees,
8822                end_date,
8823            );
8824            if !hr_diversity.is_empty() {
8825                info!(
8826                    "ESG: {} diversity metrics derived from {} actual employees",
8827                    hr_diversity.len(),
8828                    self.master_data.employees.len(),
8829                );
8830                snapshot.diversity.extend(hr_diversity);
8831            }
8832        }
8833
8834        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835            entity_id,
8836            facility_count,
8837            start_date,
8838            end_date,
8839        );
8840
8841        // Compute safety metrics
8842        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8843        let safety_metric = workforce_gen.compute_safety_metrics(
8844            entity_id,
8845            &snapshot.safety_incidents,
8846            total_hours,
8847            start_date,
8848        );
8849        snapshot.safety_metrics = vec![safety_metric];
8850
8851        // Governance
8852        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853            seed + 85,
8854            esg_cfg.governance.board_size,
8855            esg_cfg.governance.independence_target,
8856        );
8857        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859        // Supplier ESG assessments
8860        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861            esg_cfg.supply_chain_esg.clone(),
8862            seed + 86,
8863        );
8864        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865            .master_data
8866            .vendors
8867            .iter()
8868            .map(|v| datasynth_generators::VendorInput {
8869                vendor_id: v.vendor_id.clone(),
8870                country: v.country.clone(),
8871                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872                quality_score: None,
8873            })
8874            .collect();
8875        snapshot.supplier_assessments =
8876            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878        // Disclosures
8879        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880            seed + 87,
8881            esg_cfg.reporting.clone(),
8882            esg_cfg.climate_scenarios.clone(),
8883        );
8884        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885        snapshot.disclosures = disclosure_gen.generate_disclosures(
8886            entity_id,
8887            &snapshot.materiality,
8888            start_date,
8889            end_date,
8890        );
8891        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892        snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894        // Anomaly injection
8895        if esg_cfg.anomaly_rate > 0.0 {
8896            let mut anomaly_injector =
8897                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898            let mut labels = Vec::new();
8899            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901            labels.extend(
8902                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903            );
8904            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906            snapshot.anomaly_labels = labels;
8907        }
8908
8909        stats.esg_emission_count = snapshot.emission_count;
8910        stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912        info!(
8913            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914            snapshot.emission_count,
8915            snapshot.disclosure_count,
8916            snapshot.supplier_assessments.len()
8917        );
8918        self.check_resources_with_log("post-esg")?;
8919
8920        Ok(snapshot)
8921    }
8922
8923    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8924    fn phase_treasury_data(
8925        &mut self,
8926        document_flows: &DocumentFlowSnapshot,
8927        subledger: &SubledgerSnapshot,
8928        intercompany: &IntercompanySnapshot,
8929        stats: &mut EnhancedGenerationStatistics,
8930    ) -> SynthResult<TreasurySnapshot> {
8931        if !self.phase_config.generate_treasury {
8932            debug!("Phase 22: Skipped (treasury generation disabled)");
8933            return Ok(TreasurySnapshot::default());
8934        }
8935        let degradation = self.check_resources()?;
8936        if degradation >= DegradationLevel::Reduced {
8937            debug!(
8938                "Phase skipped due to resource pressure (degradation: {:?})",
8939                degradation
8940            );
8941            return Ok(TreasurySnapshot::default());
8942        }
8943        info!("Phase 22: Generating Treasury Data");
8944
8945        let seed = self.seed;
8946        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948        let currency = self
8949            .config
8950            .companies
8951            .first()
8952            .map(|c| c.currency.as_str())
8953            .unwrap_or("USD");
8954        let entity_id = self
8955            .config
8956            .companies
8957            .first()
8958            .map(|c| c.code.as_str())
8959            .unwrap_or("1000");
8960
8961        let mut snapshot = TreasurySnapshot::default();
8962
8963        // Generate debt instruments
8964        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965            self.config.treasury.debt.clone(),
8966            seed + 90,
8967        );
8968        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970        // Generate hedging instruments (IR swaps for floating-rate debt)
8971        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972            self.config.treasury.hedging.clone(),
8973            seed + 91,
8974        );
8975        for debt in &snapshot.debt_instruments {
8976            if debt.rate_type == InterestRateType::Variable {
8977                let swap = hedge_gen.generate_ir_swap(
8978                    currency,
8979                    debt.principal,
8980                    debt.origination_date,
8981                    debt.maturity_date,
8982                );
8983                snapshot.hedging_instruments.push(swap);
8984            }
8985        }
8986
8987        // Build FX exposures from foreign-currency payments and generate
8988        // FX forwards + hedge relationship designations via generate() API.
8989        {
8990            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991            for payment in &document_flows.payments {
8992                if payment.currency != currency {
8993                    let entry = fx_map
8994                        .entry(payment.currency.clone())
8995                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996                    entry.0 += payment.amount;
8997                    // Use the latest settlement date among grouped payments
8998                    if payment.header.document_date > entry.1 {
8999                        entry.1 = payment.header.document_date;
9000                    }
9001                }
9002            }
9003            if !fx_map.is_empty() {
9004                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005                    .into_iter()
9006                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007                        datasynth_generators::treasury::FxExposure {
9008                            currency_pair: format!("{foreign_ccy}/{currency}"),
9009                            foreign_currency: foreign_ccy,
9010                            net_amount,
9011                            settlement_date,
9012                            description: "AP payment FX exposure".to_string(),
9013                        }
9014                    })
9015                    .collect();
9016                let (fx_instruments, fx_relationships) =
9017                    hedge_gen.generate(start_date, &fx_exposures);
9018                snapshot.hedging_instruments.extend(fx_instruments);
9019                snapshot.hedge_relationships.extend(fx_relationships);
9020            }
9021        }
9022
9023        // Inject anomalies if configured
9024        if self.config.treasury.anomaly_rate > 0.0 {
9025            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026                seed + 92,
9027                self.config.treasury.anomaly_rate,
9028            );
9029            let mut labels = Vec::new();
9030            labels.extend(
9031                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032            );
9033            snapshot.treasury_anomaly_labels = labels;
9034        }
9035
9036        // Generate cash positions from payment flows
9037        if self.config.treasury.cash_positioning.enabled {
9038            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040            // AP payments as outflows
9041            for payment in &document_flows.payments {
9042                cash_flows.push(datasynth_generators::treasury::CashFlow {
9043                    date: payment.header.document_date,
9044                    account_id: format!("{entity_id}-MAIN"),
9045                    amount: payment.amount,
9046                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047                });
9048            }
9049
9050            // Customer receipts (from O2C chains) as inflows
9051            for chain in &document_flows.o2c_chains {
9052                if let Some(ref receipt) = chain.customer_receipt {
9053                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9054                        date: receipt.header.document_date,
9055                        account_id: format!("{entity_id}-MAIN"),
9056                        amount: receipt.amount,
9057                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058                    });
9059                }
9060                // Remainder receipts (follow-up to partial payments)
9061                for receipt in &chain.remainder_receipts {
9062                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9063                        date: receipt.header.document_date,
9064                        account_id: format!("{entity_id}-MAIN"),
9065                        amount: receipt.amount,
9066                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067                    });
9068                }
9069            }
9070
9071            if !cash_flows.is_empty() {
9072                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073                    self.config.treasury.cash_positioning.clone(),
9074                    seed + 93,
9075                );
9076                let account_id = format!("{entity_id}-MAIN");
9077                snapshot.cash_positions = cash_gen.generate(
9078                    entity_id,
9079                    &account_id,
9080                    currency,
9081                    &cash_flows,
9082                    start_date,
9083                    start_date + chrono::Months::new(self.config.global.period_months),
9084                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9085                );
9086            }
9087        }
9088
9089        // Generate cash forecasts from AR/AP aging
9090        if self.config.treasury.cash_forecasting.enabled {
9091            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093            // Build AR aging items from subledger AR invoices
9094            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095                .ar_invoices
9096                .iter()
9097                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098                .map(|inv| {
9099                    let days_past_due = if inv.due_date < end_date {
9100                        (end_date - inv.due_date).num_days().max(0) as u32
9101                    } else {
9102                        0
9103                    };
9104                    datasynth_generators::treasury::ArAgingItem {
9105                        expected_date: inv.due_date,
9106                        amount: inv.amount_remaining,
9107                        days_past_due,
9108                        document_id: inv.invoice_number.clone(),
9109                    }
9110                })
9111                .collect();
9112
9113            // Build AP aging items from subledger AP invoices
9114            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115                .ap_invoices
9116                .iter()
9117                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119                    payment_date: inv.due_date,
9120                    amount: inv.amount_remaining,
9121                    document_id: inv.invoice_number.clone(),
9122                })
9123                .collect();
9124
9125            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126                self.config.treasury.cash_forecasting.clone(),
9127                seed + 94,
9128            );
9129            let forecast = forecast_gen.generate(
9130                entity_id,
9131                currency,
9132                end_date,
9133                &ar_items,
9134                &ap_items,
9135                &[], // scheduled disbursements - empty for now
9136            );
9137            snapshot.cash_forecasts.push(forecast);
9138        }
9139
9140        // Generate cash pools and sweeps
9141        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144                self.config.treasury.cash_pooling.clone(),
9145                seed + 95,
9146            );
9147
9148            // Create a pool from available accounts
9149            let account_ids: Vec<String> = snapshot
9150                .cash_positions
9151                .iter()
9152                .map(|cp| cp.bank_account_id.clone())
9153                .collect::<std::collections::HashSet<_>>()
9154                .into_iter()
9155                .collect();
9156
9157            if let Some(pool) =
9158                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159            {
9160                // Generate sweeps - build participant balances from last cash position per account
9161                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162                for cp in &snapshot.cash_positions {
9163                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164                }
9165
9166                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167                    latest_balances
9168                        .into_iter()
9169                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9170                        .map(
9171                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172                                account_id: id,
9173                                balance,
9174                            },
9175                        )
9176                        .collect();
9177
9178                let sweeps =
9179                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180                snapshot.cash_pool_sweeps = sweeps;
9181                snapshot.cash_pools.push(pool);
9182            }
9183        }
9184
9185        // Generate bank guarantees
9186        if self.config.treasury.bank_guarantees.enabled {
9187            let vendor_names: Vec<String> = self
9188                .master_data
9189                .vendors
9190                .iter()
9191                .map(|v| v.name.clone())
9192                .collect();
9193            if !vendor_names.is_empty() {
9194                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195                    self.config.treasury.bank_guarantees.clone(),
9196                    seed + 96,
9197                );
9198                snapshot.bank_guarantees =
9199                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200            }
9201        }
9202
9203        // Generate netting runs from intercompany matched pairs
9204        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205            let entity_ids: Vec<String> = self
9206                .config
9207                .companies
9208                .iter()
9209                .map(|c| c.code.clone())
9210                .collect();
9211            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212                .matched_pairs
9213                .iter()
9214                .map(|mp| {
9215                    (
9216                        mp.seller_company.clone(),
9217                        mp.buyer_company.clone(),
9218                        mp.amount,
9219                    )
9220                })
9221                .collect();
9222            if entity_ids.len() >= 2 {
9223                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224                    self.config.treasury.netting.clone(),
9225                    seed + 97,
9226                );
9227                snapshot.netting_runs = netting_gen.generate(
9228                    &entity_ids,
9229                    currency,
9230                    start_date,
9231                    self.config.global.period_months,
9232                    &ic_amounts,
9233                );
9234            }
9235        }
9236
9237        // Generate treasury journal entries from the instruments we just created.
9238        {
9239            use datasynth_generators::treasury::TreasuryAccounting;
9240
9241            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242            let mut treasury_jes = Vec::new();
9243
9244            // Debt interest accrual JEs
9245            if !snapshot.debt_instruments.is_empty() {
9246                let debt_jes =
9247                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249                treasury_jes.extend(debt_jes);
9250            }
9251
9252            // Hedge mark-to-market JEs
9253            if !snapshot.hedging_instruments.is_empty() {
9254                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255                    &snapshot.hedging_instruments,
9256                    &snapshot.hedge_relationships,
9257                    end_date,
9258                    entity_id,
9259                );
9260                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261                treasury_jes.extend(hedge_jes);
9262            }
9263
9264            // Cash pool sweep JEs
9265            if !snapshot.cash_pool_sweeps.is_empty() {
9266                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267                    &snapshot.cash_pool_sweeps,
9268                    entity_id,
9269                );
9270                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271                treasury_jes.extend(sweep_jes);
9272            }
9273
9274            if !treasury_jes.is_empty() {
9275                debug!("Total treasury journal entries: {}", treasury_jes.len());
9276            }
9277            snapshot.journal_entries = treasury_jes;
9278        }
9279
9280        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282        stats.cash_position_count = snapshot.cash_positions.len();
9283        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284        stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286        info!(
9287            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288            snapshot.debt_instruments.len(),
9289            snapshot.hedging_instruments.len(),
9290            snapshot.cash_positions.len(),
9291            snapshot.cash_forecasts.len(),
9292            snapshot.cash_pools.len(),
9293            snapshot.bank_guarantees.len(),
9294            snapshot.netting_runs.len(),
9295            snapshot.journal_entries.len(),
9296        );
9297        self.check_resources_with_log("post-treasury")?;
9298
9299        Ok(snapshot)
9300    }
9301
9302    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9303    fn phase_project_accounting(
9304        &mut self,
9305        document_flows: &DocumentFlowSnapshot,
9306        hr: &HrSnapshot,
9307        stats: &mut EnhancedGenerationStatistics,
9308    ) -> SynthResult<ProjectAccountingSnapshot> {
9309        if !self.phase_config.generate_project_accounting {
9310            debug!("Phase 23: Skipped (project accounting disabled)");
9311            return Ok(ProjectAccountingSnapshot::default());
9312        }
9313        let degradation = self.check_resources()?;
9314        if degradation >= DegradationLevel::Reduced {
9315            debug!(
9316                "Phase skipped due to resource pressure (degradation: {:?})",
9317                degradation
9318            );
9319            return Ok(ProjectAccountingSnapshot::default());
9320        }
9321        info!("Phase 23: Generating Project Accounting Data");
9322
9323        let seed = self.seed;
9324        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327        let company_code = self
9328            .config
9329            .companies
9330            .first()
9331            .map(|c| c.code.as_str())
9332            .unwrap_or("1000");
9333
9334        let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336        // Generate projects with WBS hierarchies
9337        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338            self.config.project_accounting.clone(),
9339            seed + 95,
9340        );
9341        let pool = project_gen.generate(company_code, start_date, end_date);
9342        snapshot.projects = pool.projects.clone();
9343
9344        // Link source documents to projects for cost allocation
9345        {
9346            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347                Vec::new();
9348
9349            // Time entries
9350            for te in &hr.time_entries {
9351                let total_hours = te.hours_regular + te.hours_overtime;
9352                if total_hours > 0.0 {
9353                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354                        id: te.entry_id.clone(),
9355                        entity_id: company_code.to_string(),
9356                        date: te.date,
9357                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358                            .unwrap_or(rust_decimal::Decimal::ZERO),
9359                        source_type: CostSourceType::TimeEntry,
9360                        hours: Some(
9361                            rust_decimal::Decimal::from_f64_retain(total_hours)
9362                                .unwrap_or(rust_decimal::Decimal::ZERO),
9363                        ),
9364                    });
9365                }
9366            }
9367
9368            // Expense reports
9369            for er in &hr.expense_reports {
9370                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371                    id: er.report_id.clone(),
9372                    entity_id: company_code.to_string(),
9373                    date: er.submission_date,
9374                    amount: er.total_amount,
9375                    source_type: CostSourceType::ExpenseReport,
9376                    hours: None,
9377                });
9378            }
9379
9380            // Purchase orders
9381            for po in &document_flows.purchase_orders {
9382                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383                    id: po.header.document_id.clone(),
9384                    entity_id: company_code.to_string(),
9385                    date: po.header.document_date,
9386                    amount: po.total_net_amount,
9387                    source_type: CostSourceType::PurchaseOrder,
9388                    hours: None,
9389                });
9390            }
9391
9392            // Vendor invoices
9393            for vi in &document_flows.vendor_invoices {
9394                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395                    id: vi.header.document_id.clone(),
9396                    entity_id: company_code.to_string(),
9397                    date: vi.header.document_date,
9398                    amount: vi.payable_amount,
9399                    source_type: CostSourceType::VendorInvoice,
9400                    hours: None,
9401                });
9402            }
9403
9404            if !source_docs.is_empty() && !pool.projects.is_empty() {
9405                let mut cost_gen =
9406                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407                        self.config.project_accounting.cost_allocation.clone(),
9408                        seed + 99,
9409                    );
9410                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411            }
9412        }
9413
9414        // Generate change orders
9415        if self.config.project_accounting.change_orders.enabled {
9416            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417                self.config.project_accounting.change_orders.clone(),
9418                seed + 96,
9419            );
9420            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421        }
9422
9423        // Generate milestones
9424        if self.config.project_accounting.milestones.enabled {
9425            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426                self.config.project_accounting.milestones.clone(),
9427                seed + 97,
9428            );
9429            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430        }
9431
9432        // Generate earned value metrics (needs cost lines, so only if we have projects)
9433        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435                self.config.project_accounting.earned_value.clone(),
9436                seed + 98,
9437            );
9438            snapshot.earned_value_metrics =
9439                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440        }
9441
9442        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9443        if self.config.project_accounting.revenue_recognition.enabled
9444            && !snapshot.projects.is_empty()
9445            && !snapshot.cost_lines.is_empty()
9446        {
9447            use datasynth_generators::project_accounting::RevenueGenerator;
9448            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449            let avg_contract_value =
9450                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453            // Build contract value tuples: only customer-type projects get revenue recognition.
9454            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9455            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456                snapshot
9457                    .projects
9458                    .iter()
9459                    .filter(|p| {
9460                        matches!(
9461                            p.project_type,
9462                            datasynth_core::models::ProjectType::Customer
9463                        )
9464                    })
9465                    .map(|p| {
9466                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468                        // budget × 1.25 → contract value
9469                        } else {
9470                            avg_contract_value
9471                        };
9472                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9473                        (p.project_id.clone(), cv, etc)
9474                    })
9475                    .collect();
9476
9477            if !contract_values.is_empty() {
9478                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479                snapshot.revenue_records = rev_gen.generate(
9480                    &snapshot.projects,
9481                    &snapshot.cost_lines,
9482                    &contract_values,
9483                    start_date,
9484                    end_date,
9485                );
9486                debug!(
9487                    "Generated {} revenue recognition records for {} customer projects",
9488                    snapshot.revenue_records.len(),
9489                    contract_values.len()
9490                );
9491            }
9492        }
9493
9494        stats.project_count = snapshot.projects.len();
9495        stats.project_change_order_count = snapshot.change_orders.len();
9496        stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498        info!(
9499            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500            snapshot.projects.len(),
9501            snapshot.change_orders.len(),
9502            snapshot.milestones.len(),
9503            snapshot.earned_value_metrics.len()
9504        );
9505        self.check_resources_with_log("post-project-accounting")?;
9506
9507        Ok(snapshot)
9508    }
9509
9510    /// Phase 24: Generate process evolution and organizational events.
9511    fn phase_evolution_events(
9512        &mut self,
9513        stats: &mut EnhancedGenerationStatistics,
9514    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515        if !self.phase_config.generate_evolution_events {
9516            debug!("Phase 24: Skipped (evolution events disabled)");
9517            return Ok((Vec::new(), Vec::new()));
9518        }
9519        info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521        let seed = self.seed;
9522        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526        // Process evolution events
9527        let mut proc_gen =
9528            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529                seed + 100,
9530            );
9531        let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533        // Organizational events
9534        let company_codes: Vec<String> = self
9535            .config
9536            .companies
9537            .iter()
9538            .map(|c| c.code.clone())
9539            .collect();
9540        let mut org_gen =
9541            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542                seed + 101,
9543            );
9544        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546        stats.process_evolution_event_count = process_events.len();
9547        stats.organizational_event_count = org_events.len();
9548
9549        info!(
9550            "Evolution events generated: {} process evolution, {} organizational",
9551            process_events.len(),
9552            org_events.len()
9553        );
9554        self.check_resources_with_log("post-evolution-events")?;
9555
9556        Ok((process_events, org_events))
9557    }
9558
9559    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9560    /// data recovery, and regulatory changes).
9561    fn phase_disruption_events(
9562        &self,
9563        stats: &mut EnhancedGenerationStatistics,
9564    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565        if !self.config.organizational_events.enabled {
9566            debug!("Phase 24b: Skipped (organizational events disabled)");
9567            return Ok(Vec::new());
9568        }
9569        info!("Phase 24b: Generating Disruption Events");
9570
9571        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575        let company_codes: Vec<String> = self
9576            .config
9577            .companies
9578            .iter()
9579            .map(|c| c.code.clone())
9580            .collect();
9581
9582        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583        let events = gen.generate(start_date, end_date, &company_codes);
9584
9585        stats.disruption_event_count = events.len();
9586        info!("Disruption events generated: {} events", events.len());
9587        self.check_resources_with_log("post-disruption-events")?;
9588
9589        Ok(events)
9590    }
9591
9592    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9593    ///
9594    /// Produces paired examples where each pair contains the original clean JE
9595    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9596    /// split transaction). Useful for training anomaly detection models with
9597    /// known ground truth.
9598    fn phase_counterfactuals(
9599        &self,
9600        journal_entries: &[JournalEntry],
9601        stats: &mut EnhancedGenerationStatistics,
9602    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605            return Ok(Vec::new());
9606        }
9607        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613        // Rotating set of specs to produce diverse mutation types
9614        let specs = [
9615            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616            CounterfactualSpec::ShiftDate { days: -14 },
9617            CounterfactualSpec::SelfApprove,
9618            CounterfactualSpec::SplitTransaction { split_count: 3 },
9619        ];
9620
9621        let pairs: Vec<_> = journal_entries
9622            .iter()
9623            .enumerate()
9624            .map(|(i, je)| {
9625                let spec = &specs[i % specs.len()];
9626                gen.generate(je, spec)
9627            })
9628            .collect();
9629
9630        stats.counterfactual_pair_count = pairs.len();
9631        info!(
9632            "Counterfactual pairs generated: {} pairs from {} journal entries",
9633            pairs.len(),
9634            journal_entries.len()
9635        );
9636        self.check_resources_with_log("post-counterfactuals")?;
9637
9638        Ok(pairs)
9639    }
9640
9641    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9642    ///
9643    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9644    /// fraudulent, then generates probabilistic red flags on all chain documents.
9645    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9646    /// to produce realistic ML training data.
9647    fn phase_red_flags(
9648        &self,
9649        anomaly_labels: &AnomalyLabels,
9650        document_flows: &DocumentFlowSnapshot,
9651        stats: &mut EnhancedGenerationStatistics,
9652    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653        if !self.config.fraud.enabled {
9654            debug!("Phase 26: Skipped (fraud generation disabled)");
9655            return Ok(Vec::new());
9656        }
9657        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659        use datasynth_generators::fraud::RedFlagGenerator;
9660
9661        let generator = RedFlagGenerator::new();
9662        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9665        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666            .labels
9667            .iter()
9668            .filter(|label| label.anomaly_type.is_intentional())
9669            .map(|label| label.document_id.as_str())
9670            .collect();
9671
9672        let mut flags = Vec::new();
9673
9674        // Iterate P2P chains: use the purchase order document ID as the chain key.
9675        for chain in &document_flows.p2p_chains {
9676            let doc_id = &chain.purchase_order.header.document_id;
9677            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679        }
9680
9681        // Iterate O2C chains: use the sales order document ID as the chain key.
9682        for chain in &document_flows.o2c_chains {
9683            let doc_id = &chain.sales_order.header.document_id;
9684            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686        }
9687
9688        stats.red_flag_count = flags.len();
9689        info!(
9690            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691            flags.len(),
9692            document_flows.p2p_chains.len(),
9693            document_flows.o2c_chains.len(),
9694            fraud_doc_ids.len()
9695        );
9696        self.check_resources_with_log("post-red-flags")?;
9697
9698        Ok(flags)
9699    }
9700
9701    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9702    ///
9703    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9704    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9705    /// advance them over the simulation period.
9706    fn phase_collusion_rings(
9707        &mut self,
9708        stats: &mut EnhancedGenerationStatistics,
9709    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712            return Ok(Vec::new());
9713        }
9714        info!("Phase 26b: Generating Collusion Rings");
9715
9716        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718        let months = self.config.global.period_months;
9719
9720        let employee_ids: Vec<String> = self
9721            .master_data
9722            .employees
9723            .iter()
9724            .map(|e| e.employee_id.clone())
9725            .collect();
9726        let vendor_ids: Vec<String> = self
9727            .master_data
9728            .vendors
9729            .iter()
9730            .map(|v| v.vendor_id.clone())
9731            .collect();
9732
9733        let mut generator =
9734            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737        stats.collusion_ring_count = rings.len();
9738        info!(
9739            "Collusion rings generated: {} rings, total members: {}",
9740            rings.len(),
9741            rings
9742                .iter()
9743                .map(datasynth_generators::fraud::CollusionRing::size)
9744                .sum::<usize>()
9745        );
9746        self.check_resources_with_log("post-collusion-rings")?;
9747
9748        Ok(rings)
9749    }
9750
9751    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9752    ///
9753    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9754    /// master data changes over time, supporting bi-temporal audit queries.
9755    fn phase_temporal_attributes(
9756        &mut self,
9757        stats: &mut EnhancedGenerationStatistics,
9758    ) -> SynthResult<
9759        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760    > {
9761        if !self.config.temporal_attributes.enabled {
9762            debug!("Phase 27: Skipped (temporal attributes disabled)");
9763            return Ok(Vec::new());
9764        }
9765        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770        // Build a TemporalAttributeConfig from the user's config.
9771        // Since Phase 27 is already gated on temporal_attributes.enabled,
9772        // default to enabling version chains so users get actual mutations.
9773        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774            || self.config.temporal_attributes.enabled;
9775        let temporal_config = {
9776            let ta = &self.config.temporal_attributes;
9777            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778                .enabled(ta.enabled)
9779                .closed_probability(ta.valid_time.closed_probability)
9780                .avg_validity_days(ta.valid_time.avg_validity_days)
9781                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782                .with_version_chains(if generate_version_chains {
9783                    ta.avg_versions_per_entity
9784                } else {
9785                    1.0
9786                })
9787                .build()
9788        };
9789        // Apply backdating settings if configured
9790        let temporal_config = if self
9791            .config
9792            .temporal_attributes
9793            .transaction_time
9794            .allow_backdating
9795        {
9796            let mut c = temporal_config;
9797            c.transaction_time.allow_backdating = true;
9798            c.transaction_time.backdating_probability = self
9799                .config
9800                .temporal_attributes
9801                .transaction_time
9802                .backdating_probability;
9803            c.transaction_time.max_backdate_days = self
9804                .config
9805                .temporal_attributes
9806                .transaction_time
9807                .max_backdate_days;
9808            c
9809        } else {
9810            temporal_config
9811        };
9812        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813            temporal_config,
9814            self.seed + 130,
9815            start_date,
9816        );
9817
9818        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819            self.seed + 130,
9820            datasynth_core::GeneratorType::Vendor,
9821        );
9822
9823        let chains: Vec<_> = self
9824            .master_data
9825            .vendors
9826            .iter()
9827            .map(|vendor| {
9828                let id = uuid_factory.next();
9829                gen.generate_version_chain(vendor.clone(), id)
9830            })
9831            .collect();
9832
9833        stats.temporal_version_chain_count = chains.len();
9834        info!("Temporal version chains generated: {} chains", chains.len());
9835        self.check_resources_with_log("post-temporal-attributes")?;
9836
9837        Ok(chains)
9838    }
9839
9840    /// Phase 28: Build entity relationship graph and cross-process links.
9841    ///
9842    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9843    /// `EntityGraph` from master-data vendor/customer entities and
9844    /// journal-entry-derived transaction summaries.
9845    ///
9846    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9847    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9848    /// generates inventory-movement cross-process links.
9849    fn phase_entity_relationships(
9850        &self,
9851        journal_entries: &[JournalEntry],
9852        document_flows: &DocumentFlowSnapshot,
9853        stats: &mut EnhancedGenerationStatistics,
9854    ) -> SynthResult<(
9855        Option<datasynth_core::models::EntityGraph>,
9856        Vec<datasynth_core::models::CrossProcessLink>,
9857    )> {
9858        use datasynth_generators::relationships::{
9859            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860            TransactionSummary,
9861        };
9862
9863        let rs_enabled = self.config.relationship_strength.enabled;
9864        let cpl_enabled = self.config.cross_process_links.enabled
9865            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867        if !rs_enabled && !cpl_enabled {
9868            debug!(
9869                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870            );
9871            return Ok((None, Vec::new()));
9872        }
9873
9874        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879        let company_code = self
9880            .config
9881            .companies
9882            .first()
9883            .map(|c| c.code.as_str())
9884            .unwrap_or("1000");
9885
9886        // Build the generator with matching config flags
9887        let gen_config = EntityGraphConfig {
9888            enabled: rs_enabled,
9889            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891                enable_return_flows: false,
9892                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894                // Use higher link rate for small datasets to avoid probabilistic empty results
9895                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896                    1.0
9897                } else {
9898                    0.30
9899                },
9900                ..Default::default()
9901            },
9902            strength_config: datasynth_generators::relationships::StrengthConfig {
9903                transaction_volume_weight: self
9904                    .config
9905                    .relationship_strength
9906                    .calculation
9907                    .transaction_volume_weight,
9908                transaction_count_weight: self
9909                    .config
9910                    .relationship_strength
9911                    .calculation
9912                    .transaction_count_weight,
9913                duration_weight: self
9914                    .config
9915                    .relationship_strength
9916                    .calculation
9917                    .relationship_duration_weight,
9918                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919                mutual_connections_weight: self
9920                    .config
9921                    .relationship_strength
9922                    .calculation
9923                    .mutual_connections_weight,
9924                recency_half_life_days: self
9925                    .config
9926                    .relationship_strength
9927                    .calculation
9928                    .recency_half_life_days,
9929            },
9930            ..Default::default()
9931        };
9932
9933        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935        // --- Part 1: Entity Relationship Graph ---
9936        let entity_graph = if rs_enabled {
9937            // Build EntitySummary lists from master data
9938            let vendor_summaries: Vec<EntitySummary> = self
9939                .master_data
9940                .vendors
9941                .iter()
9942                .map(|v| {
9943                    EntitySummary::new(
9944                        &v.vendor_id,
9945                        &v.name,
9946                        datasynth_core::models::GraphEntityType::Vendor,
9947                        start_date,
9948                    )
9949                })
9950                .collect();
9951
9952            let customer_summaries: Vec<EntitySummary> = self
9953                .master_data
9954                .customers
9955                .iter()
9956                .map(|c| {
9957                    EntitySummary::new(
9958                        &c.customer_id,
9959                        &c.name,
9960                        datasynth_core::models::GraphEntityType::Customer,
9961                        start_date,
9962                    )
9963                })
9964                .collect();
9965
9966            // Build transaction summaries from journal entries.
9967            // Key = (company_code, trading_partner) for entries that have a
9968            // trading partner.  This captures intercompany flows and any JE
9969            // whose line items carry a trading_partner reference.
9970            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971                std::collections::HashMap::new();
9972
9973            for je in journal_entries {
9974                let cc = je.header.company_code.clone();
9975                let posting_date = je.header.posting_date;
9976                for line in &je.lines {
9977                    if let Some(ref tp) = line.trading_partner {
9978                        let amount = if line.debit_amount > line.credit_amount {
9979                            line.debit_amount
9980                        } else {
9981                            line.credit_amount
9982                        };
9983                        let entry = txn_summaries
9984                            .entry((cc.clone(), tp.clone()))
9985                            .or_insert_with(|| TransactionSummary {
9986                                total_volume: rust_decimal::Decimal::ZERO,
9987                                transaction_count: 0,
9988                                first_transaction_date: posting_date,
9989                                last_transaction_date: posting_date,
9990                                related_entities: std::collections::HashSet::new(),
9991                            });
9992                        entry.total_volume += amount;
9993                        entry.transaction_count += 1;
9994                        if posting_date < entry.first_transaction_date {
9995                            entry.first_transaction_date = posting_date;
9996                        }
9997                        if posting_date > entry.last_transaction_date {
9998                            entry.last_transaction_date = posting_date;
9999                        }
10000                        entry.related_entities.insert(cc.clone());
10001                    }
10002                }
10003            }
10004
10005            // Also extract transaction relationships from document flow chains.
10006            // P2P chains: Company → Vendor relationships
10007            for chain in &document_flows.p2p_chains {
10008                let cc = chain.purchase_order.header.company_code.clone();
10009                let vendor_id = chain.purchase_order.vendor_id.clone();
10010                let po_date = chain.purchase_order.header.document_date;
10011                let amount = chain.purchase_order.total_net_amount;
10012
10013                let entry = txn_summaries
10014                    .entry((cc.clone(), vendor_id))
10015                    .or_insert_with(|| TransactionSummary {
10016                        total_volume: rust_decimal::Decimal::ZERO,
10017                        transaction_count: 0,
10018                        first_transaction_date: po_date,
10019                        last_transaction_date: po_date,
10020                        related_entities: std::collections::HashSet::new(),
10021                    });
10022                entry.total_volume += amount;
10023                entry.transaction_count += 1;
10024                if po_date < entry.first_transaction_date {
10025                    entry.first_transaction_date = po_date;
10026                }
10027                if po_date > entry.last_transaction_date {
10028                    entry.last_transaction_date = po_date;
10029                }
10030                entry.related_entities.insert(cc);
10031            }
10032
10033            // O2C chains: Company → Customer relationships
10034            for chain in &document_flows.o2c_chains {
10035                let cc = chain.sales_order.header.company_code.clone();
10036                let customer_id = chain.sales_order.customer_id.clone();
10037                let so_date = chain.sales_order.header.document_date;
10038                let amount = chain.sales_order.total_net_amount;
10039
10040                let entry = txn_summaries
10041                    .entry((cc.clone(), customer_id))
10042                    .or_insert_with(|| TransactionSummary {
10043                        total_volume: rust_decimal::Decimal::ZERO,
10044                        transaction_count: 0,
10045                        first_transaction_date: so_date,
10046                        last_transaction_date: so_date,
10047                        related_entities: std::collections::HashSet::new(),
10048                    });
10049                entry.total_volume += amount;
10050                entry.transaction_count += 1;
10051                if so_date < entry.first_transaction_date {
10052                    entry.first_transaction_date = so_date;
10053                }
10054                if so_date > entry.last_transaction_date {
10055                    entry.last_transaction_date = so_date;
10056                }
10057                entry.related_entities.insert(cc);
10058            }
10059
10060            let as_of_date = journal_entries
10061                .last()
10062                .map(|je| je.header.posting_date)
10063                .unwrap_or(start_date);
10064
10065            let graph = gen.generate_entity_graph(
10066                company_code,
10067                as_of_date,
10068                &vendor_summaries,
10069                &customer_summaries,
10070                &txn_summaries,
10071            );
10072
10073            info!(
10074                "Entity relationship graph: {} nodes, {} edges",
10075                graph.nodes.len(),
10076                graph.edges.len()
10077            );
10078            stats.entity_relationship_node_count = graph.nodes.len();
10079            stats.entity_relationship_edge_count = graph.edges.len();
10080            Some(graph)
10081        } else {
10082            None
10083        };
10084
10085        // --- Part 2: Cross-Process Links ---
10086        let cross_process_links = if cpl_enabled {
10087            // Build GoodsReceiptRef from P2P chains
10088            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089                .p2p_chains
10090                .iter()
10091                .flat_map(|chain| {
10092                    let vendor_id = chain.purchase_order.vendor_id.clone();
10093                    let cc = chain.purchase_order.header.company_code.clone();
10094                    chain.goods_receipts.iter().flat_map(move |gr| {
10095                        gr.items.iter().filter_map({
10096                            let doc_id = gr.header.document_id.clone();
10097                            let v_id = vendor_id.clone();
10098                            let company = cc.clone();
10099                            let receipt_date = gr.header.document_date;
10100                            move |item| {
10101                                item.base
10102                                    .material_id
10103                                    .as_ref()
10104                                    .map(|mat_id| GoodsReceiptRef {
10105                                        document_id: doc_id.clone(),
10106                                        material_id: mat_id.clone(),
10107                                        quantity: item.base.quantity,
10108                                        receipt_date,
10109                                        vendor_id: v_id.clone(),
10110                                        company_code: company.clone(),
10111                                    })
10112                            }
10113                        })
10114                    })
10115                })
10116                .collect();
10117
10118            // Build DeliveryRef from O2C chains
10119            let del_refs: Vec<DeliveryRef> = document_flows
10120                .o2c_chains
10121                .iter()
10122                .flat_map(|chain| {
10123                    let customer_id = chain.sales_order.customer_id.clone();
10124                    let cc = chain.sales_order.header.company_code.clone();
10125                    chain.deliveries.iter().flat_map(move |del| {
10126                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127                        del.items.iter().filter_map({
10128                            let doc_id = del.header.document_id.clone();
10129                            let c_id = customer_id.clone();
10130                            let company = cc.clone();
10131                            move |item| {
10132                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133                                    document_id: doc_id.clone(),
10134                                    material_id: mat_id.clone(),
10135                                    quantity: item.base.quantity,
10136                                    delivery_date,
10137                                    customer_id: c_id.clone(),
10138                                    company_code: company.clone(),
10139                                })
10140                            }
10141                        })
10142                    })
10143                })
10144                .collect();
10145
10146            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147            info!("Cross-process links generated: {} links", links.len());
10148            stats.cross_process_link_count = links.len();
10149            links
10150        } else {
10151            Vec::new()
10152        };
10153
10154        self.check_resources_with_log("post-entity-relationships")?;
10155        Ok((entity_graph, cross_process_links))
10156    }
10157
10158    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10159    fn phase_industry_data(
10160        &self,
10161        stats: &mut EnhancedGenerationStatistics,
10162    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163        if !self.config.industry_specific.enabled {
10164            return None;
10165        }
10166        info!("Phase 29: Generating industry-specific data");
10167        let output = datasynth_generators::industry::factory::generate_industry_output(
10168            self.config.global.industry,
10169        );
10170        stats.industry_gl_account_count = output.gl_accounts.len();
10171        info!(
10172            "Industry data generated: {} GL accounts for {:?}",
10173            output.gl_accounts.len(),
10174            self.config.global.industry
10175        );
10176        Some(output)
10177    }
10178
10179    /// Phase 3b: Generate opening balances for each company.
10180    fn phase_opening_balances(
10181        &mut self,
10182        coa: &Arc<ChartOfAccounts>,
10183        stats: &mut EnhancedGenerationStatistics,
10184    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185        if !self.config.balance.generate_opening_balances {
10186            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187            return Ok(Vec::new());
10188        }
10189        info!("Phase 3b: Generating Opening Balances");
10190
10191        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193        let fiscal_year = start_date.year();
10194
10195        // **v5.3** — When the shard context supplies prior-period
10196        // opening-balance carryovers, use them directly instead of
10197        // calling `OpeningBalanceGenerator`.  This implements multi-
10198        // period continuity: period N+1 opens with period N's closing
10199        // BS positions exactly, rather than re-rolling the industry-
10200        // mix generator and losing the audit trail.
10201        //
10202        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10203        // through to the generator path — byte-identical behaviour
10204        // for single-period engagements.
10205        if let Some(ctx) = &self.shard_context {
10206            if !ctx.opening_balances.is_empty() {
10207                debug!(
10208                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209                    ctx.opening_balances.len()
10210                );
10211                let mut results = Vec::new();
10212                for company in &self.config.companies {
10213                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214                        .opening_balances
10215                        .iter()
10216                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217                        .collect();
10218                    let total_assets = ctx
10219                        .opening_balances
10220                        .iter()
10221                        .filter(|ob| {
10222                            matches!(
10223                                ob.account_type,
10224                                AccountType::Asset | AccountType::ContraAsset
10225                            )
10226                        })
10227                        .map(|ob| ob.net_balance())
10228                        .sum::<rust_decimal::Decimal>();
10229                    let total_liabilities = ctx
10230                        .opening_balances
10231                        .iter()
10232                        .filter(|ob| {
10233                            matches!(
10234                                ob.account_type,
10235                                AccountType::Liability | AccountType::ContraLiability
10236                            )
10237                        })
10238                        .map(|ob| ob.net_balance())
10239                        .sum::<rust_decimal::Decimal>();
10240                    let total_equity = ctx
10241                        .opening_balances
10242                        .iter()
10243                        .filter(|ob| {
10244                            matches!(
10245                                ob.account_type,
10246                                AccountType::Equity | AccountType::ContraEquity
10247                            )
10248                        })
10249                        .map(|ob| ob.net_balance())
10250                        .sum::<rust_decimal::Decimal>();
10251                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252                        < rust_decimal::Decimal::ONE;
10253                    results.push(GeneratedOpeningBalance {
10254                        company_code: company.code.clone(),
10255                        as_of_date: start_date,
10256                        balances,
10257                        total_assets,
10258                        total_liabilities,
10259                        total_equity,
10260                        is_balanced,
10261                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262                            current_ratio: None,
10263                            quick_ratio: None,
10264                            debt_to_equity: None,
10265                            working_capital: rust_decimal::Decimal::ZERO,
10266                        },
10267                    });
10268                }
10269                stats.opening_balance_count = results.len();
10270                info!(
10271                    "Phase 3b: opening-balance carryover applied ({} companies)",
10272                    results.len()
10273                );
10274                self.check_resources_with_log("post-opening-balances")?;
10275                return Ok(results);
10276            }
10277        }
10278
10279        let industry = match self.config.global.industry {
10280            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281            IndustrySector::Retail => IndustryType::Retail,
10282            IndustrySector::FinancialServices => IndustryType::Financial,
10283            IndustrySector::Healthcare => IndustryType::Healthcare,
10284            IndustrySector::Technology => IndustryType::Technology,
10285            _ => IndustryType::Manufacturing,
10286        };
10287
10288        let config = datasynth_generators::OpeningBalanceConfig {
10289            industry,
10290            ..Default::default()
10291        };
10292        let mut gen =
10293            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295        let mut results = Vec::new();
10296        for company in &self.config.companies {
10297            let spec = OpeningBalanceSpec::new(
10298                company.code.clone(),
10299                start_date,
10300                fiscal_year,
10301                company.currency.clone(),
10302                rust_decimal::Decimal::new(10_000_000, 0),
10303                industry,
10304            );
10305            let ob = gen.generate(&spec, coa, start_date, &company.code);
10306            results.push(ob);
10307        }
10308
10309        stats.opening_balance_count = results.len();
10310        info!("Opening balances generated: {} companies", results.len());
10311        self.check_resources_with_log("post-opening-balances")?;
10312
10313        Ok(results)
10314    }
10315
10316    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10317    fn phase_subledger_reconciliation(
10318        &mut self,
10319        subledger: &SubledgerSnapshot,
10320        entries: &[JournalEntry],
10321        stats: &mut EnhancedGenerationStatistics,
10322    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323        if !self.config.balance.reconcile_subledgers {
10324            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325            return Ok(Vec::new());
10326        }
10327        info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333        // Build GL balance map from journal entries using a balance tracker
10334        let tracker_config = BalanceTrackerConfig {
10335            validate_on_each_entry: false,
10336            track_history: false,
10337            fail_on_validation_error: false,
10338            ..Default::default()
10339        };
10340        let recon_currency = self
10341            .config
10342            .companies
10343            .first()
10344            .map(|c| c.currency.clone())
10345            .unwrap_or_else(|| "USD".to_string());
10346        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347        let validation_errors = tracker.apply_entries(entries);
10348        if !validation_errors.is_empty() {
10349            warn!(
10350                error_count = validation_errors.len(),
10351                "Balance tracker encountered validation errors during subledger reconciliation"
10352            );
10353            for err in &validation_errors {
10354                debug!("Balance validation error: {:?}", err);
10355            }
10356        }
10357
10358        let mut engine = datasynth_generators::ReconciliationEngine::new(
10359            datasynth_generators::ReconciliationConfig::default(),
10360        );
10361
10362        let mut results = Vec::new();
10363        let company_code = self
10364            .config
10365            .companies
10366            .first()
10367            .map(|c| c.code.as_str())
10368            .unwrap_or("1000");
10369
10370        // Reconcile AR
10371        if !subledger.ar_invoices.is_empty() {
10372            let gl_balance = tracker
10373                .get_account_balance(
10374                    company_code,
10375                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10376                )
10377                .map(|b| b.closing_balance)
10378                .unwrap_or_default();
10379            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381        }
10382
10383        // Reconcile AP
10384        if !subledger.ap_invoices.is_empty() {
10385            let gl_balance = tracker
10386                .get_account_balance(
10387                    company_code,
10388                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10389                )
10390                .map(|b| b.closing_balance)
10391                .unwrap_or_default();
10392            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394        }
10395
10396        // Reconcile FA
10397        if !subledger.fa_records.is_empty() {
10398            let gl_asset_balance = tracker
10399                .get_account_balance(
10400                    company_code,
10401                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402                )
10403                .map(|b| b.closing_balance)
10404                .unwrap_or_default();
10405            let gl_accum_depr_balance = tracker
10406                .get_account_balance(
10407                    company_code,
10408                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409                )
10410                .map(|b| b.closing_balance)
10411                .unwrap_or_default();
10412            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413                subledger.fa_records.iter().collect();
10414            let (asset_recon, depr_recon) = engine.reconcile_fa(
10415                company_code,
10416                end_date,
10417                gl_asset_balance,
10418                gl_accum_depr_balance,
10419                &fa_refs,
10420            );
10421            results.push(asset_recon);
10422            results.push(depr_recon);
10423        }
10424
10425        // Reconcile Inventory
10426        if !subledger.inventory_positions.is_empty() {
10427            let gl_balance = tracker
10428                .get_account_balance(
10429                    company_code,
10430                    datasynth_core::accounts::control_accounts::INVENTORY,
10431                )
10432                .map(|b| b.closing_balance)
10433                .unwrap_or_default();
10434            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435                subledger.inventory_positions.iter().collect();
10436            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437        }
10438
10439        stats.subledger_reconciliation_count = results.len();
10440        let passed = results.iter().filter(|r| r.is_balanced()).count();
10441        let failed = results.len() - passed;
10442        info!(
10443            "Subledger reconciliation: {} checks, {} passed, {} failed",
10444            results.len(),
10445            passed,
10446            failed
10447        );
10448        self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450        Ok(results)
10451    }
10452
10453    /// Generate the chart of accounts.
10454    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457        let coa_framework = self.resolve_coa_framework();
10458
10459        let mut gen = ChartOfAccountsGenerator::new(
10460            self.config.chart_of_accounts.complexity,
10461            self.config.global.industry,
10462            self.seed,
10463        )
10464        .with_coa_framework(coa_framework)
10465        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10466        .with_expand_industry_subaccounts(
10467            self.config.chart_of_accounts.expand_industry_subaccounts,
10468        );
10469
10470        let mut built = gen.generate();
10471        // v4.4.1: propagate the accounting framework label from config
10472        // onto the CoA struct so SDK consumers can read it without
10473        // cross-referencing the config (they previously saw null).
10474        if self.config.accounting_standards.enabled {
10475            use datasynth_config::schema::AccountingFrameworkConfig;
10476            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10477                match f {
10478                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10479                    AccountingFrameworkConfig::Ifrs => "ifrs",
10480                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10481                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10482                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10483                }
10484                .to_string()
10485            });
10486        }
10487        let coa = Arc::new(built);
10488        self.coa = Some(Arc::clone(&coa));
10489
10490        if let Some(pb) = pb {
10491            pb.finish_with_message("Chart of Accounts complete");
10492        }
10493
10494        Ok(coa)
10495    }
10496
10497    /// Generate master data entities.
10498    fn generate_master_data(&mut self) -> SynthResult<()> {
10499        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10500            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10501        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10502
10503        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10504        let pb = self.create_progress_bar(total, "Generating Master Data");
10505
10506        // Resolve country pack once for all companies (uses primary company's country)
10507        let pack = self.primary_pack().clone();
10508
10509        // Capture config values needed inside the parallel closure
10510        let vendors_per_company = self.phase_config.vendors_per_company;
10511        let customers_per_company = self.phase_config.customers_per_company;
10512        let materials_per_company = self.phase_config.materials_per_company;
10513        let assets_per_company = self.phase_config.assets_per_company;
10514        let coa_framework = self.resolve_coa_framework();
10515
10516        // Generate all master data in parallel across companies.
10517        // Each company's data is independent, making this embarrassingly parallel.
10518        let per_company_results: Vec<_> = self
10519            .config
10520            .companies
10521            .par_iter()
10522            .enumerate()
10523            .map(|(i, company)| {
10524                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10525                let pack = pack.clone();
10526
10527                // Generate vendors (offset counter so IDs are globally unique across companies)
10528                let mut vendor_gen = VendorGenerator::new(company_seed);
10529                vendor_gen.set_country_pack(pack.clone());
10530                vendor_gen.set_coa_framework(coa_framework);
10531                vendor_gen.set_counter_offset(i * vendors_per_company);
10532                // v3.2.0+: user-supplied bank names (and future template
10533                // strings) flow through the shared provider.
10534                vendor_gen.set_template_provider(self.template_provider.clone());
10535                // Wire vendor network config when enabled
10536                if self.config.vendor_network.enabled {
10537                    let vn = &self.config.vendor_network;
10538                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10539                        enabled: true,
10540                        depth: vn.depth,
10541                        tier1_count: datasynth_generators::TierCountConfig::new(
10542                            vn.tier1.min,
10543                            vn.tier1.max,
10544                        ),
10545                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10546                            vn.tier2_per_parent.min,
10547                            vn.tier2_per_parent.max,
10548                        ),
10549                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10550                            vn.tier3_per_parent.min,
10551                            vn.tier3_per_parent.max,
10552                        ),
10553                        cluster_distribution: datasynth_generators::ClusterDistribution {
10554                            reliable_strategic: vn.clusters.reliable_strategic,
10555                            standard_operational: vn.clusters.standard_operational,
10556                            transactional: vn.clusters.transactional,
10557                            problematic: vn.clusters.problematic,
10558                        },
10559                        concentration_limits: datasynth_generators::ConcentrationLimits {
10560                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10561                            max_top5: vn.dependencies.top_5_concentration,
10562                        },
10563                        ..datasynth_generators::VendorNetworkConfig::default()
10564                    });
10565                }
10566                let vendor_pool =
10567                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10568
10569                // Generate customers (offset counter so IDs are globally unique across companies)
10570                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10571                customer_gen.set_country_pack(pack.clone());
10572                customer_gen.set_coa_framework(coa_framework);
10573                customer_gen.set_counter_offset(i * customers_per_company);
10574                // v3.2.0+: user-supplied customer names flow through the shared provider.
10575                customer_gen.set_template_provider(self.template_provider.clone());
10576                // Wire customer segmentation config when enabled
10577                if self.config.customer_segmentation.enabled {
10578                    let cs = &self.config.customer_segmentation;
10579                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10580                        enabled: true,
10581                        segment_distribution: datasynth_generators::SegmentDistribution {
10582                            enterprise: cs.value_segments.enterprise.customer_share,
10583                            mid_market: cs.value_segments.mid_market.customer_share,
10584                            smb: cs.value_segments.smb.customer_share,
10585                            consumer: cs.value_segments.consumer.customer_share,
10586                        },
10587                        referral_config: datasynth_generators::ReferralConfig {
10588                            enabled: cs.networks.referrals.enabled,
10589                            referral_rate: cs.networks.referrals.referral_rate,
10590                            ..Default::default()
10591                        },
10592                        hierarchy_config: datasynth_generators::HierarchyConfig {
10593                            enabled: cs.networks.corporate_hierarchies.enabled,
10594                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10595                            ..Default::default()
10596                        },
10597                        ..Default::default()
10598                    };
10599                    customer_gen.set_segmentation_config(seg_cfg);
10600                }
10601                let customer_pool = customer_gen.generate_customer_pool(
10602                    customers_per_company,
10603                    &company.code,
10604                    start_date,
10605                );
10606
10607                // Generate materials (offset counter so IDs are globally unique across companies)
10608                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10609                material_gen.set_country_pack(pack.clone());
10610                material_gen.set_counter_offset(i * materials_per_company);
10611                // v3.2.1+: user-supplied material descriptions flow through shared provider
10612                material_gen.set_template_provider(self.template_provider.clone());
10613                let material_pool = material_gen.generate_material_pool(
10614                    materials_per_company,
10615                    &company.code,
10616                    start_date,
10617                );
10618
10619                // Generate fixed assets
10620                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10621                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10622                asset_gen.set_template_provider(self.template_provider.clone());
10623                let asset_pool = asset_gen.generate_asset_pool(
10624                    assets_per_company,
10625                    &company.code,
10626                    (start_date, end_date),
10627                );
10628
10629                // Generate employees
10630                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10631                employee_gen.set_country_pack(pack);
10632                // v3.2.1+: user-supplied department names flow through shared provider
10633                employee_gen.set_template_provider(self.template_provider.clone());
10634                let employee_pool =
10635                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10636
10637                // Generate employee change history (2-5 events per employee)
10638                let employee_change_history =
10639                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10640
10641                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10642                let employee_ids: Vec<String> = employee_pool
10643                    .employees
10644                    .iter()
10645                    .map(|e| e.employee_id.clone())
10646                    .collect();
10647                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10648                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10649
10650                // v5.1: profit centre hierarchy (two-level: top-level
10651                // segment / region / product-group nodes + sub-units).
10652                let mut pc_gen =
10653                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10654                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10655
10656                (
10657                    vendor_pool.vendors,
10658                    customer_pool.customers,
10659                    material_pool.materials,
10660                    asset_pool.assets,
10661                    employee_pool.employees,
10662                    employee_change_history,
10663                    cost_centers,
10664                    profit_centers,
10665                )
10666            })
10667            .collect();
10668
10669        // Aggregate results from all companies
10670        for (
10671            vendors,
10672            customers,
10673            materials,
10674            assets,
10675            employees,
10676            change_history,
10677            cost_centers,
10678            profit_centers,
10679        ) in per_company_results
10680        {
10681            self.master_data.vendors.extend(vendors);
10682            self.master_data.customers.extend(customers);
10683            self.master_data.materials.extend(materials);
10684            self.master_data.assets.extend(assets);
10685            self.master_data.employees.extend(employees);
10686            self.master_data.cost_centers.extend(cost_centers);
10687            self.master_data.profit_centers.extend(profit_centers);
10688            self.master_data
10689                .employee_change_history
10690                .extend(change_history);
10691        }
10692
10693        // v3.3.0: one OrganizationalProfile per company. Cheap to
10694        // generate (derived from industry + company_code) so we
10695        // always emit when master data runs; no separate config flag.
10696        {
10697            use datasynth_core::models::IndustrySector;
10698            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10699            let industry = match self.config.global.industry {
10700                IndustrySector::Manufacturing => "manufacturing",
10701                IndustrySector::Retail => "retail",
10702                IndustrySector::FinancialServices => "financial_services",
10703                IndustrySector::Technology => "technology",
10704                IndustrySector::Healthcare => "healthcare",
10705                _ => "other",
10706            };
10707            for (i, company) in self.config.companies.iter().enumerate() {
10708                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10709                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10710                let profile = profile_gen.generate(&company.code, industry);
10711                self.master_data.organizational_profiles.push(profile);
10712            }
10713        }
10714
10715        if let Some(pb) = &pb {
10716            pb.inc(total);
10717        }
10718        if let Some(pb) = pb {
10719            pb.finish_with_message("Master data generation complete");
10720        }
10721
10722        Ok(())
10723    }
10724
10725    /// Generate document flows (P2P and O2C).
10726    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10727        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10729
10730        // Generate P2P chains
10731        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10732        let months = (self.config.global.period_months as usize).max(1);
10733        let p2p_count = self
10734            .phase_config
10735            .p2p_chains
10736            .min(self.master_data.vendors.len() * 2 * months);
10737        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10738
10739        // Convert P2P config from schema to generator config
10740        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10741        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10742        p2p_gen.set_country_pack(self.primary_pack().clone());
10743        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10744        // to business days. No-op when `temporal_patterns.business_days.
10745        // enabled = false`.
10746        if let Some(ctx) = &self.temporal_context {
10747            p2p_gen.set_temporal_context(Arc::clone(ctx));
10748        }
10749
10750        for i in 0..p2p_count {
10751            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10752            let materials: Vec<&Material> = self
10753                .master_data
10754                .materials
10755                .iter()
10756                .skip(i % self.master_data.materials.len().max(1))
10757                .take(2.min(self.master_data.materials.len()))
10758                .collect();
10759
10760            if materials.is_empty() {
10761                continue;
10762            }
10763
10764            let company = &self.config.companies[i % self.config.companies.len()];
10765            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10766            let fiscal_period = po_date.month() as u8;
10767            let created_by = if self.master_data.employees.is_empty() {
10768                "SYSTEM"
10769            } else {
10770                self.master_data.employees[i % self.master_data.employees.len()]
10771                    .user_id
10772                    .as_str()
10773            };
10774
10775            let chain = p2p_gen.generate_chain(
10776                &company.code,
10777                vendor,
10778                &materials,
10779                po_date,
10780                start_date.year() as u16,
10781                fiscal_period,
10782                created_by,
10783            );
10784
10785            // Flatten documents
10786            flows.purchase_orders.push(chain.purchase_order.clone());
10787            flows.goods_receipts.extend(chain.goods_receipts.clone());
10788            if let Some(vi) = &chain.vendor_invoice {
10789                flows.vendor_invoices.push(vi.clone());
10790            }
10791            if let Some(payment) = &chain.payment {
10792                flows.payments.push(payment.clone());
10793            }
10794            for remainder in &chain.remainder_payments {
10795                flows.payments.push(remainder.clone());
10796            }
10797            flows.p2p_chains.push(chain);
10798
10799            if let Some(pb) = &pb {
10800                pb.inc(1);
10801            }
10802        }
10803
10804        if let Some(pb) = pb {
10805            pb.finish_with_message("P2P document flows complete");
10806        }
10807
10808        // Generate O2C chains
10809        // Cap at ~2 SOs per customer per month to keep order volume realistic
10810        let o2c_count = self
10811            .phase_config
10812            .o2c_chains
10813            .min(self.master_data.customers.len() * 2 * months);
10814        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10815
10816        // Convert O2C config from schema to generator config
10817        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10818        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10819        o2c_gen.set_country_pack(self.primary_pack().clone());
10820        // v3.4.1: wire temporal context (no-op when business_days disabled).
10821        if let Some(ctx) = &self.temporal_context {
10822            o2c_gen.set_temporal_context(Arc::clone(ctx));
10823        }
10824
10825        for i in 0..o2c_count {
10826            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10827            let materials: Vec<&Material> = self
10828                .master_data
10829                .materials
10830                .iter()
10831                .skip(i % self.master_data.materials.len().max(1))
10832                .take(2.min(self.master_data.materials.len()))
10833                .collect();
10834
10835            if materials.is_empty() {
10836                continue;
10837            }
10838
10839            let company = &self.config.companies[i % self.config.companies.len()];
10840            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10841            let fiscal_period = so_date.month() as u8;
10842            let created_by = if self.master_data.employees.is_empty() {
10843                "SYSTEM"
10844            } else {
10845                self.master_data.employees[i % self.master_data.employees.len()]
10846                    .user_id
10847                    .as_str()
10848            };
10849
10850            let chain = o2c_gen.generate_chain(
10851                &company.code,
10852                customer,
10853                &materials,
10854                so_date,
10855                start_date.year() as u16,
10856                fiscal_period,
10857                created_by,
10858            );
10859
10860            // Flatten documents
10861            flows.sales_orders.push(chain.sales_order.clone());
10862            flows.deliveries.extend(chain.deliveries.clone());
10863            if let Some(ci) = &chain.customer_invoice {
10864                flows.customer_invoices.push(ci.clone());
10865            }
10866            if let Some(receipt) = &chain.customer_receipt {
10867                flows.payments.push(receipt.clone());
10868            }
10869            // Extract remainder receipts (follow-up to partial payments)
10870            for receipt in &chain.remainder_receipts {
10871                flows.payments.push(receipt.clone());
10872            }
10873            flows.o2c_chains.push(chain);
10874
10875            if let Some(pb) = &pb {
10876                pb.inc(1);
10877            }
10878        }
10879
10880        if let Some(pb) = pb {
10881            pb.finish_with_message("O2C document flows complete");
10882        }
10883
10884        // Collect all document cross-references from document headers.
10885        // Each document embeds references to its predecessor(s) via add_reference(); here we
10886        // denormalise them into a flat list for the document_references.json output file.
10887        {
10888            let mut refs = Vec::new();
10889            for doc in &flows.purchase_orders {
10890                refs.extend(doc.header.document_references.iter().cloned());
10891            }
10892            for doc in &flows.goods_receipts {
10893                refs.extend(doc.header.document_references.iter().cloned());
10894            }
10895            for doc in &flows.vendor_invoices {
10896                refs.extend(doc.header.document_references.iter().cloned());
10897            }
10898            for doc in &flows.sales_orders {
10899                refs.extend(doc.header.document_references.iter().cloned());
10900            }
10901            for doc in &flows.deliveries {
10902                refs.extend(doc.header.document_references.iter().cloned());
10903            }
10904            for doc in &flows.customer_invoices {
10905                refs.extend(doc.header.document_references.iter().cloned());
10906            }
10907            for doc in &flows.payments {
10908                refs.extend(doc.header.document_references.iter().cloned());
10909            }
10910            debug!(
10911                "Collected {} document cross-references from document headers",
10912                refs.len()
10913            );
10914            flows.document_references = refs;
10915        }
10916
10917        Ok(())
10918    }
10919
10920    /// Generate journal entries using parallel generation across multiple cores.
10921    fn generate_journal_entries(
10922        &mut self,
10923        coa: &Arc<ChartOfAccounts>,
10924    ) -> SynthResult<Vec<JournalEntry>> {
10925        use datasynth_core::traits::ParallelGenerator;
10926
10927        let total = self.calculate_total_transactions();
10928        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10929
10930        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10931            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10932        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10933
10934        let company_codes: Vec<String> = self
10935            .config
10936            .companies
10937            .iter()
10938            .map(|c| c.code.clone())
10939            .collect();
10940
10941        let mut generator = JournalEntryGenerator::new_with_params(
10942            self.config.transactions.clone(),
10943            Arc::clone(coa),
10944            company_codes,
10945            start_date,
10946            end_date,
10947            self.seed,
10948        );
10949        // Wire the `business_processes.*_weight` config through (phantom knob
10950        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10951        let bp = &self.config.business_processes;
10952        generator.set_business_process_weights(
10953            bp.o2c_weight,
10954            bp.p2p_weight,
10955            bp.r2r_weight,
10956            bp.h2r_weight,
10957            bp.a2r_weight,
10958        );
10959        // v3.4.0: wire advanced distributions (mixture models + industry
10960        // profiles). No-op when `distributions.enabled = false` or
10961        // `distributions.amounts.enabled = false`, preserving v3.3.2
10962        // byte-identical output on default configs.
10963        generator
10964            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10965            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10966        let generator = generator;
10967
10968        // Connect generated master data to ensure JEs reference real entities
10969        // Enable persona-based error injection for realistic human behavior
10970        // Pass fraud configuration for fraud injection
10971        let je_pack = self.primary_pack();
10972
10973        let mut generator = generator
10974            .with_master_data(
10975                &self.master_data.vendors,
10976                &self.master_data.customers,
10977                &self.master_data.materials,
10978            )
10979            .with_country_pack_names(je_pack)
10980            .with_country_pack_temporal(
10981                self.config.temporal_patterns.clone(),
10982                self.seed + 200,
10983                je_pack,
10984            )
10985            .with_persona_errors(true)
10986            .with_fraud_config(self.config.fraud.clone());
10987
10988        // Apply temporal drift if configured. v3.5.2+: also merge
10989        // `distributions.regime_changes` (regime events, economic
10990        // cycles, parameter drifts) into the same DriftConfig so both
10991        // knobs flow through the shared DriftController.
10992        let temporal_enabled = self.config.temporal.enabled;
10993        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10994        if temporal_enabled || regimes_enabled {
10995            let mut drift_config = if temporal_enabled {
10996                self.config.temporal.to_core_config()
10997            } else {
10998                // regime-changes only: start from default (drift OFF),
10999                // apply_to flips `enabled = true`.
11000                datasynth_core::distributions::DriftConfig::default()
11001            };
11002            if regimes_enabled {
11003                self.config
11004                    .distributions
11005                    .regime_changes
11006                    .apply_to(&mut drift_config, start_date);
11007            }
11008            generator = generator.with_drift_config(drift_config, self.seed + 100);
11009        }
11010
11011        // Check memory limit at start
11012        self.check_memory_limit()?;
11013
11014        // Determine parallelism: use available cores, but cap at total entries
11015        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11016
11017        // Use parallel generation for datasets with 10K+ entries.
11018        // Below this threshold, the statistical properties of a single-seeded
11019        // generator (e.g. Benford compliance) are better preserved.
11020        let entries = if total >= 10_000 && num_threads > 1 {
11021            // Parallel path: split the generator across cores and generate in parallel.
11022            // Each sub-generator gets a unique seed for deterministic, independent generation.
11023            let sub_generators = generator.split(num_threads);
11024            let entries_per_thread = total as usize / num_threads;
11025            let remainder = total as usize % num_threads;
11026
11027            let batches: Vec<Vec<JournalEntry>> = sub_generators
11028                .into_par_iter()
11029                .enumerate()
11030                .map(|(i, mut gen)| {
11031                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11032                    gen.generate_batch(count)
11033                })
11034                .collect();
11035
11036            // Merge all batches into a single Vec
11037            let entries = JournalEntryGenerator::merge_results(batches);
11038
11039            if let Some(pb) = &pb {
11040                pb.inc(total);
11041            }
11042            entries
11043        } else {
11044            // Sequential path for small datasets (< 1000 entries)
11045            let mut entries = Vec::with_capacity(total as usize);
11046            for _ in 0..total {
11047                let entry = generator.generate();
11048                entries.push(entry);
11049                if let Some(pb) = &pb {
11050                    pb.inc(1);
11051                }
11052            }
11053            entries
11054        };
11055
11056        if let Some(pb) = pb {
11057            pb.finish_with_message("Journal entries complete");
11058        }
11059
11060        Ok(entries)
11061    }
11062
11063    /// Generate journal entries from document flows.
11064    ///
11065    /// This creates proper GL entries for each document in the P2P and O2C flows,
11066    /// ensuring that document activity is reflected in the general ledger.
11067    fn generate_jes_from_document_flows(
11068        &mut self,
11069        flows: &DocumentFlowSnapshot,
11070    ) -> SynthResult<Vec<JournalEntry>> {
11071        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11072        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11073
11074        let je_config = match self.resolve_coa_framework() {
11075            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11076            CoAFramework::GermanSkr04 => {
11077                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11078                DocumentFlowJeConfig::from(&fa)
11079            }
11080            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11081        };
11082
11083        let populate_fec = je_config.populate_fec_fields;
11084        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11085
11086        // Build auxiliary account lookup from vendor/customer master data so that
11087        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11088        // PCG "4010001") instead of raw partner IDs.
11089        if populate_fec {
11090            let mut aux_lookup = std::collections::HashMap::new();
11091            for vendor in &self.master_data.vendors {
11092                if let Some(ref aux) = vendor.auxiliary_gl_account {
11093                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11094                }
11095            }
11096            for customer in &self.master_data.customers {
11097                if let Some(ref aux) = customer.auxiliary_gl_account {
11098                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11099                }
11100            }
11101            if !aux_lookup.is_empty() {
11102                generator.set_auxiliary_account_lookup(aux_lookup);
11103            }
11104        }
11105
11106        let mut entries = Vec::new();
11107
11108        // Generate JEs from P2P chains
11109        for chain in &flows.p2p_chains {
11110            let chain_entries = generator.generate_from_p2p_chain(chain);
11111            entries.extend(chain_entries);
11112            if let Some(pb) = &pb {
11113                pb.inc(1);
11114            }
11115        }
11116
11117        // Generate JEs from O2C chains
11118        for chain in &flows.o2c_chains {
11119            let chain_entries = generator.generate_from_o2c_chain(chain);
11120            entries.extend(chain_entries);
11121            if let Some(pb) = &pb {
11122                pb.inc(1);
11123            }
11124        }
11125
11126        if let Some(pb) = pb {
11127            pb.finish_with_message(format!(
11128                "Generated {} JEs from document flows",
11129                entries.len()
11130            ));
11131        }
11132
11133        Ok(entries)
11134    }
11135
11136    /// Generate journal entries from payroll runs.
11137    ///
11138    /// Creates one JE per payroll run:
11139    /// - DR Salaries & Wages (6100) for gross pay
11140    /// - CR Payroll Clearing (9100) for gross pay
11141    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11142        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11143
11144        let mut jes = Vec::with_capacity(payroll_runs.len());
11145
11146        for run in payroll_runs {
11147            let mut je = JournalEntry::new_simple(
11148                format!("JE-PAYROLL-{}", run.payroll_id),
11149                run.company_code.clone(),
11150                run.run_date,
11151                format!("Payroll {}", run.payroll_id),
11152            );
11153
11154            // Debit Salaries & Wages for gross pay
11155            je.add_line(JournalEntryLine {
11156                line_number: 1,
11157                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11158                debit_amount: run.total_gross,
11159                reference: Some(run.payroll_id.clone()),
11160                text: Some(format!(
11161                    "Payroll {} ({} employees)",
11162                    run.payroll_id, run.employee_count
11163                )),
11164                ..Default::default()
11165            });
11166
11167            // Credit Payroll Clearing for gross pay
11168            je.add_line(JournalEntryLine {
11169                line_number: 2,
11170                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11171                credit_amount: run.total_gross,
11172                reference: Some(run.payroll_id.clone()),
11173                ..Default::default()
11174            });
11175
11176            jes.push(je);
11177        }
11178
11179        jes
11180    }
11181
11182    /// Link document flows to subledger records.
11183    ///
11184    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11185    /// ensuring subledger data is coherent with document flow data.
11186    fn link_document_flows_to_subledgers(
11187        &mut self,
11188        flows: &DocumentFlowSnapshot,
11189    ) -> SynthResult<SubledgerSnapshot> {
11190        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11191        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11192
11193        // Build vendor/customer name maps from master data for realistic subledger names
11194        let vendor_names: std::collections::HashMap<String, String> = self
11195            .master_data
11196            .vendors
11197            .iter()
11198            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11199            .collect();
11200        let customer_names: std::collections::HashMap<String, String> = self
11201            .master_data
11202            .customers
11203            .iter()
11204            .map(|c| (c.customer_id.clone(), c.name.clone()))
11205            .collect();
11206
11207        let mut linker = DocumentFlowLinker::new()
11208            .with_vendor_names(vendor_names)
11209            .with_customer_names(customer_names);
11210
11211        // Convert vendor invoices to AP invoices
11212        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11213        if let Some(pb) = &pb {
11214            pb.inc(flows.vendor_invoices.len() as u64);
11215        }
11216
11217        // Convert customer invoices to AR invoices
11218        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11219        if let Some(pb) = &pb {
11220            pb.inc(flows.customer_invoices.len() as u64);
11221        }
11222
11223        if let Some(pb) = pb {
11224            pb.finish_with_message(format!(
11225                "Linked {} AP and {} AR invoices",
11226                ap_invoices.len(),
11227                ar_invoices.len()
11228            ));
11229        }
11230
11231        Ok(SubledgerSnapshot {
11232            ap_invoices,
11233            ar_invoices,
11234            fa_records: Vec::new(),
11235            inventory_positions: Vec::new(),
11236            inventory_movements: Vec::new(),
11237            // Aging reports are computed after payment settlement in phase_document_flows.
11238            ar_aging_reports: Vec::new(),
11239            ap_aging_reports: Vec::new(),
11240            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11241            depreciation_runs: Vec::new(),
11242            inventory_valuations: Vec::new(),
11243            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11244            dunning_runs: Vec::new(),
11245            dunning_letters: Vec::new(),
11246        })
11247    }
11248
11249    /// Generate OCPM events from document flows.
11250    ///
11251    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11252    /// capturing the object-centric process perspective.
11253    #[allow(clippy::too_many_arguments)]
11254    fn generate_ocpm_events(
11255        &mut self,
11256        flows: &DocumentFlowSnapshot,
11257        sourcing: &SourcingSnapshot,
11258        hr: &HrSnapshot,
11259        manufacturing: &ManufacturingSnapshot,
11260        banking: &BankingSnapshot,
11261        audit: &AuditSnapshot,
11262        financial_reporting: &FinancialReportingSnapshot,
11263    ) -> SynthResult<OcpmSnapshot> {
11264        let total_chains = flows.p2p_chains.len()
11265            + flows.o2c_chains.len()
11266            + sourcing.sourcing_projects.len()
11267            + hr.payroll_runs.len()
11268            + manufacturing.production_orders.len()
11269            + banking.customers.len()
11270            + audit.engagements.len()
11271            + financial_reporting.bank_reconciliations.len();
11272        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11273
11274        // Create OCPM event log with standard types
11275        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11276        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11277
11278        // Configure the OCPM generator
11279        let ocpm_config = OcpmGeneratorConfig {
11280            generate_p2p: true,
11281            generate_o2c: true,
11282            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11283            generate_h2r: !hr.payroll_runs.is_empty(),
11284            generate_mfg: !manufacturing.production_orders.is_empty(),
11285            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11286            generate_bank: !banking.customers.is_empty(),
11287            generate_audit: !audit.engagements.is_empty(),
11288            happy_path_rate: 0.75,
11289            exception_path_rate: 0.20,
11290            error_path_rate: 0.05,
11291            add_duration_variability: true,
11292            duration_std_dev_factor: 0.3,
11293        };
11294        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11295        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11296
11297        // Get available users for resource assignment
11298        let available_users: Vec<String> = self
11299            .master_data
11300            .employees
11301            .iter()
11302            .take(20)
11303            .map(|e| e.user_id.clone())
11304            .collect();
11305
11306        // Deterministic base date from config (avoids Utc::now() non-determinism)
11307        let fallback_date =
11308            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11309        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11310            .unwrap_or(fallback_date);
11311        let base_midnight = base_date
11312            .and_hms_opt(0, 0, 0)
11313            .expect("midnight is always valid");
11314        let base_datetime =
11315            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11316
11317        // Helper closure to add case results to event log
11318        let add_result = |event_log: &mut OcpmEventLog,
11319                          result: datasynth_ocpm::CaseGenerationResult| {
11320            for event in result.events {
11321                event_log.add_event(event);
11322            }
11323            for object in result.objects {
11324                event_log.add_object(object);
11325            }
11326            for relationship in result.relationships {
11327                event_log.add_relationship(relationship);
11328            }
11329            for corr in result.correlation_events {
11330                event_log.add_correlation_event(corr);
11331            }
11332            event_log.add_case(result.case_trace);
11333        };
11334
11335        // Generate events from P2P chains
11336        for chain in &flows.p2p_chains {
11337            let po = &chain.purchase_order;
11338            let documents = P2pDocuments::new(
11339                &po.header.document_id,
11340                &po.vendor_id,
11341                &po.header.company_code,
11342                po.total_net_amount,
11343                &po.header.currency,
11344                &ocpm_uuid_factory,
11345            )
11346            .with_goods_receipt(
11347                chain
11348                    .goods_receipts
11349                    .first()
11350                    .map(|gr| gr.header.document_id.as_str())
11351                    .unwrap_or(""),
11352                &ocpm_uuid_factory,
11353            )
11354            .with_invoice(
11355                chain
11356                    .vendor_invoice
11357                    .as_ref()
11358                    .map(|vi| vi.header.document_id.as_str())
11359                    .unwrap_or(""),
11360                &ocpm_uuid_factory,
11361            )
11362            .with_payment(
11363                chain
11364                    .payment
11365                    .as_ref()
11366                    .map(|p| p.header.document_id.as_str())
11367                    .unwrap_or(""),
11368                &ocpm_uuid_factory,
11369            );
11370
11371            let start_time =
11372                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11373            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11374            add_result(&mut event_log, result);
11375
11376            if let Some(pb) = &pb {
11377                pb.inc(1);
11378            }
11379        }
11380
11381        // Generate events from O2C chains
11382        for chain in &flows.o2c_chains {
11383            let so = &chain.sales_order;
11384            let documents = O2cDocuments::new(
11385                &so.header.document_id,
11386                &so.customer_id,
11387                &so.header.company_code,
11388                so.total_net_amount,
11389                &so.header.currency,
11390                &ocpm_uuid_factory,
11391            )
11392            .with_delivery(
11393                chain
11394                    .deliveries
11395                    .first()
11396                    .map(|d| d.header.document_id.as_str())
11397                    .unwrap_or(""),
11398                &ocpm_uuid_factory,
11399            )
11400            .with_invoice(
11401                chain
11402                    .customer_invoice
11403                    .as_ref()
11404                    .map(|ci| ci.header.document_id.as_str())
11405                    .unwrap_or(""),
11406                &ocpm_uuid_factory,
11407            )
11408            .with_receipt(
11409                chain
11410                    .customer_receipt
11411                    .as_ref()
11412                    .map(|r| r.header.document_id.as_str())
11413                    .unwrap_or(""),
11414                &ocpm_uuid_factory,
11415            );
11416
11417            let start_time =
11418                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11419            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11420            add_result(&mut event_log, result);
11421
11422            if let Some(pb) = &pb {
11423                pb.inc(1);
11424            }
11425        }
11426
11427        // Generate events from S2C sourcing projects
11428        for project in &sourcing.sourcing_projects {
11429            // Find vendor from contracts or qualifications
11430            let vendor_id = sourcing
11431                .contracts
11432                .iter()
11433                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11434                .map(|c| c.vendor_id.clone())
11435                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11436                .or_else(|| {
11437                    self.master_data
11438                        .vendors
11439                        .first()
11440                        .map(|v| v.vendor_id.clone())
11441                })
11442                .unwrap_or_else(|| "V000".to_string());
11443            let mut docs = S2cDocuments::new(
11444                &project.project_id,
11445                &vendor_id,
11446                &project.company_code,
11447                project.estimated_annual_spend,
11448                &ocpm_uuid_factory,
11449            );
11450            // Link RFx if available
11451            if let Some(rfx) = sourcing
11452                .rfx_events
11453                .iter()
11454                .find(|r| r.sourcing_project_id == project.project_id)
11455            {
11456                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11457                // Link winning bid (status == Accepted)
11458                if let Some(bid) = sourcing.bids.iter().find(|b| {
11459                    b.rfx_id == rfx.rfx_id
11460                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11461                }) {
11462                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11463                }
11464            }
11465            // Link contract
11466            if let Some(contract) = sourcing
11467                .contracts
11468                .iter()
11469                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11470            {
11471                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11472            }
11473            let start_time = base_datetime - chrono::Duration::days(90);
11474            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11475            add_result(&mut event_log, result);
11476
11477            if let Some(pb) = &pb {
11478                pb.inc(1);
11479            }
11480        }
11481
11482        // Generate events from H2R payroll runs
11483        for run in &hr.payroll_runs {
11484            // Use first matching payroll line item's employee, or fallback
11485            let employee_id = hr
11486                .payroll_line_items
11487                .iter()
11488                .find(|li| li.payroll_id == run.payroll_id)
11489                .map(|li| li.employee_id.as_str())
11490                .unwrap_or("EMP000");
11491            let docs = H2rDocuments::new(
11492                &run.payroll_id,
11493                employee_id,
11494                &run.company_code,
11495                run.total_gross,
11496                &ocpm_uuid_factory,
11497            )
11498            .with_time_entries(
11499                hr.time_entries
11500                    .iter()
11501                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11502                    .take(5)
11503                    .map(|t| t.entry_id.as_str())
11504                    .collect(),
11505            );
11506            let start_time = base_datetime - chrono::Duration::days(30);
11507            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11508            add_result(&mut event_log, result);
11509
11510            if let Some(pb) = &pb {
11511                pb.inc(1);
11512            }
11513        }
11514
11515        // Generate events from MFG production orders
11516        for order in &manufacturing.production_orders {
11517            let mut docs = MfgDocuments::new(
11518                &order.order_id,
11519                &order.material_id,
11520                &order.company_code,
11521                order.planned_quantity,
11522                &ocpm_uuid_factory,
11523            )
11524            .with_operations(
11525                order
11526                    .operations
11527                    .iter()
11528                    .map(|o| format!("OP-{:04}", o.operation_number))
11529                    .collect::<Vec<_>>()
11530                    .iter()
11531                    .map(std::string::String::as_str)
11532                    .collect(),
11533            );
11534            // Link quality inspection if available (via reference_id matching order_id)
11535            if let Some(insp) = manufacturing
11536                .quality_inspections
11537                .iter()
11538                .find(|i| i.reference_id == order.order_id)
11539            {
11540                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11541            }
11542            // Link cycle count if available (match by material_id in items)
11543            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11544                cc.items
11545                    .iter()
11546                    .any(|item| item.material_id == order.material_id)
11547            }) {
11548                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11549            }
11550            let start_time = base_datetime - chrono::Duration::days(60);
11551            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11552            add_result(&mut event_log, result);
11553
11554            if let Some(pb) = &pb {
11555                pb.inc(1);
11556            }
11557        }
11558
11559        // Generate events from Banking customers
11560        for customer in &banking.customers {
11561            let customer_id_str = customer.customer_id.to_string();
11562            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11563            // Link accounts (primary_owner_id matches customer_id)
11564            if let Some(account) = banking
11565                .accounts
11566                .iter()
11567                .find(|a| a.primary_owner_id == customer.customer_id)
11568            {
11569                let account_id_str = account.account_id.to_string();
11570                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11571                // Link transactions for this account
11572                let txn_strs: Vec<String> = banking
11573                    .transactions
11574                    .iter()
11575                    .filter(|t| t.account_id == account.account_id)
11576                    .take(10)
11577                    .map(|t| t.transaction_id.to_string())
11578                    .collect();
11579                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11580                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11581                    .transactions
11582                    .iter()
11583                    .filter(|t| t.account_id == account.account_id)
11584                    .take(10)
11585                    .map(|t| t.amount)
11586                    .collect();
11587                if !txn_ids.is_empty() {
11588                    docs = docs.with_transactions(txn_ids, txn_amounts);
11589                }
11590            }
11591            let start_time = base_datetime - chrono::Duration::days(180);
11592            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11593            add_result(&mut event_log, result);
11594
11595            if let Some(pb) = &pb {
11596                pb.inc(1);
11597            }
11598        }
11599
11600        // Generate events from Audit engagements
11601        for engagement in &audit.engagements {
11602            let engagement_id_str = engagement.engagement_id.to_string();
11603            let docs = AuditDocuments::new(
11604                &engagement_id_str,
11605                &engagement.client_entity_id,
11606                &ocpm_uuid_factory,
11607            )
11608            .with_workpapers(
11609                audit
11610                    .workpapers
11611                    .iter()
11612                    .filter(|w| w.engagement_id == engagement.engagement_id)
11613                    .take(10)
11614                    .map(|w| w.workpaper_id.to_string())
11615                    .collect::<Vec<_>>()
11616                    .iter()
11617                    .map(std::string::String::as_str)
11618                    .collect(),
11619            )
11620            .with_evidence(
11621                audit
11622                    .evidence
11623                    .iter()
11624                    .filter(|e| e.engagement_id == engagement.engagement_id)
11625                    .take(10)
11626                    .map(|e| e.evidence_id.to_string())
11627                    .collect::<Vec<_>>()
11628                    .iter()
11629                    .map(std::string::String::as_str)
11630                    .collect(),
11631            )
11632            .with_risks(
11633                audit
11634                    .risk_assessments
11635                    .iter()
11636                    .filter(|r| r.engagement_id == engagement.engagement_id)
11637                    .take(5)
11638                    .map(|r| r.risk_id.to_string())
11639                    .collect::<Vec<_>>()
11640                    .iter()
11641                    .map(std::string::String::as_str)
11642                    .collect(),
11643            )
11644            .with_findings(
11645                audit
11646                    .findings
11647                    .iter()
11648                    .filter(|f| f.engagement_id == engagement.engagement_id)
11649                    .take(5)
11650                    .map(|f| f.finding_id.to_string())
11651                    .collect::<Vec<_>>()
11652                    .iter()
11653                    .map(std::string::String::as_str)
11654                    .collect(),
11655            )
11656            .with_judgments(
11657                audit
11658                    .judgments
11659                    .iter()
11660                    .filter(|j| j.engagement_id == engagement.engagement_id)
11661                    .take(5)
11662                    .map(|j| j.judgment_id.to_string())
11663                    .collect::<Vec<_>>()
11664                    .iter()
11665                    .map(std::string::String::as_str)
11666                    .collect(),
11667            );
11668            let start_time = base_datetime - chrono::Duration::days(120);
11669            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11670            add_result(&mut event_log, result);
11671
11672            if let Some(pb) = &pb {
11673                pb.inc(1);
11674            }
11675        }
11676
11677        // Generate events from Bank Reconciliations
11678        for recon in &financial_reporting.bank_reconciliations {
11679            let docs = BankReconDocuments::new(
11680                &recon.reconciliation_id,
11681                &recon.bank_account_id,
11682                &recon.company_code,
11683                recon.bank_ending_balance,
11684                &ocpm_uuid_factory,
11685            )
11686            .with_statement_lines(
11687                recon
11688                    .statement_lines
11689                    .iter()
11690                    .take(20)
11691                    .map(|l| l.line_id.as_str())
11692                    .collect(),
11693            )
11694            .with_reconciling_items(
11695                recon
11696                    .reconciling_items
11697                    .iter()
11698                    .take(10)
11699                    .map(|i| i.item_id.as_str())
11700                    .collect(),
11701            );
11702            let start_time = base_datetime - chrono::Duration::days(30);
11703            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11704            add_result(&mut event_log, result);
11705
11706            if let Some(pb) = &pb {
11707                pb.inc(1);
11708            }
11709        }
11710
11711        // Compute process variants
11712        event_log.compute_variants();
11713
11714        let summary = event_log.summary();
11715
11716        if let Some(pb) = pb {
11717            pb.finish_with_message(format!(
11718                "Generated {} OCPM events, {} objects",
11719                summary.event_count, summary.object_count
11720            ));
11721        }
11722
11723        Ok(OcpmSnapshot {
11724            event_count: summary.event_count,
11725            object_count: summary.object_count,
11726            case_count: summary.case_count,
11727            event_log: Some(event_log),
11728        })
11729    }
11730
11731    /// Inject anomalies into journal entries.
11732    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11733        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11734
11735        // Read anomaly rates from config instead of using hardcoded values.
11736        // Priority: anomaly_injection config > fraud config > default 0.02
11737        let total_rate = if self.config.anomaly_injection.enabled {
11738            self.config.anomaly_injection.rates.total_rate
11739        } else if self.config.fraud.enabled {
11740            self.config.fraud.fraud_rate
11741        } else {
11742            0.02
11743        };
11744
11745        let fraud_rate = if self.config.anomaly_injection.enabled {
11746            self.config.anomaly_injection.rates.fraud_rate
11747        } else {
11748            AnomalyRateConfig::default().fraud_rate
11749        };
11750
11751        let error_rate = if self.config.anomaly_injection.enabled {
11752            self.config.anomaly_injection.rates.error_rate
11753        } else {
11754            AnomalyRateConfig::default().error_rate
11755        };
11756
11757        let process_issue_rate = if self.config.anomaly_injection.enabled {
11758            self.config.anomaly_injection.rates.process_rate
11759        } else {
11760            AnomalyRateConfig::default().process_issue_rate
11761        };
11762
11763        let anomaly_config = AnomalyInjectorConfig {
11764            rates: AnomalyRateConfig {
11765                total_rate,
11766                fraud_rate,
11767                error_rate,
11768                process_issue_rate,
11769                ..Default::default()
11770            },
11771            seed: self.seed + 5000,
11772            ..Default::default()
11773        };
11774
11775        let mut injector = AnomalyInjector::new(anomaly_config);
11776        let result = injector.process_entries(entries);
11777
11778        if let Some(pb) = &pb {
11779            pb.inc(entries.len() as u64);
11780            pb.finish_with_message("Anomaly injection complete");
11781        }
11782
11783        let mut by_type = HashMap::new();
11784        for label in &result.labels {
11785            *by_type
11786                .entry(format!("{:?}", label.anomaly_type))
11787                .or_insert(0) += 1;
11788        }
11789
11790        Ok(AnomalyLabels {
11791            labels: result.labels,
11792            summary: Some(result.summary),
11793            by_type,
11794        })
11795    }
11796
11797    /// Validate journal entries using running balance tracker.
11798    ///
11799    /// Applies all entries to the balance tracker and validates:
11800    /// - Each entry is internally balanced (debits = credits)
11801    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11802    ///
11803    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11804    /// excluded from balance validation as they may be intentionally unbalanced.
11805    fn validate_journal_entries(
11806        &mut self,
11807        entries: &[JournalEntry],
11808    ) -> SynthResult<BalanceValidationResult> {
11809        // Filter out entries with human errors as they may be intentionally unbalanced
11810        let clean_entries: Vec<&JournalEntry> = entries
11811            .iter()
11812            .filter(|e| {
11813                e.header
11814                    .header_text
11815                    .as_ref()
11816                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11817                    .unwrap_or(true)
11818            })
11819            .collect();
11820
11821        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11822
11823        // Configure tracker to not fail on errors (collect them instead)
11824        let config = BalanceTrackerConfig {
11825            validate_on_each_entry: false,   // We'll validate at the end
11826            track_history: false,            // Skip history for performance
11827            fail_on_validation_error: false, // Collect errors, don't fail
11828            ..Default::default()
11829        };
11830        let validation_currency = self
11831            .config
11832            .companies
11833            .first()
11834            .map(|c| c.currency.clone())
11835            .unwrap_or_else(|| "USD".to_string());
11836
11837        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11838
11839        // Apply clean entries (without human errors)
11840        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11841        let errors = tracker.apply_entries(&clean_refs);
11842
11843        if let Some(pb) = &pb {
11844            pb.inc(entries.len() as u64);
11845        }
11846
11847        // Check if any entries were unbalanced
11848        // Note: When fail_on_validation_error is false, errors are stored in tracker
11849        let has_unbalanced = tracker
11850            .get_validation_errors()
11851            .iter()
11852            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11853
11854        // Validate balance sheet for each company
11855        // Include both returned errors and collected validation errors
11856        let mut all_errors = errors;
11857        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11858        let company_codes: Vec<String> = self
11859            .config
11860            .companies
11861            .iter()
11862            .map(|c| c.code.clone())
11863            .collect();
11864
11865        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11866            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11867            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11868
11869        for company_code in &company_codes {
11870            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11871                all_errors.push(e);
11872            }
11873        }
11874
11875        // Get statistics after all mutable operations are done
11876        let stats = tracker.get_statistics();
11877
11878        // Determine if balanced overall
11879        let is_balanced = all_errors.is_empty();
11880
11881        if let Some(pb) = pb {
11882            let msg = if is_balanced {
11883                "Balance validation passed"
11884            } else {
11885                "Balance validation completed with errors"
11886            };
11887            pb.finish_with_message(msg);
11888        }
11889
11890        Ok(BalanceValidationResult {
11891            validated: true,
11892            is_balanced,
11893            entries_processed: stats.entries_processed,
11894            total_debits: stats.total_debits,
11895            total_credits: stats.total_credits,
11896            accounts_tracked: stats.accounts_tracked,
11897            companies_tracked: stats.companies_tracked,
11898            validation_errors: all_errors,
11899            has_unbalanced_entries: has_unbalanced,
11900        })
11901    }
11902
11903    /// Inject data quality variations into journal entries.
11904    ///
11905    /// Applies typos, missing values, and format variations to make
11906    /// the synthetic data more realistic for testing data cleaning pipelines.
11907    fn inject_data_quality(
11908        &mut self,
11909        entries: &mut [JournalEntry],
11910    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11911        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11912
11913        // Build config from user-specified schema settings when data_quality is enabled;
11914        // otherwise fall back to the low-rate minimal() preset.
11915        let config = if self.config.data_quality.enabled {
11916            let dq = &self.config.data_quality;
11917            // Propagate per-field rates and protected fields from the schema
11918            // so users can dial in real-production NULL profiles per field
11919            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
11920            let field_rates = dq.missing_values.field_rates.clone();
11921            let mut required_fields: std::collections::HashSet<String> =
11922                dq.missing_values.protected_fields.iter().cloned().collect();
11923            // Always preserve audit-critical identifiers regardless of
11924            // user config — losing these breaks downstream joins.
11925            for f in [
11926                "document_id",
11927                "company_code",
11928                "posting_date",
11929                "fiscal_year",
11930                "fiscal_period",
11931                "gl_account",
11932                "line_number",
11933                "transaction_id",
11934            ] {
11935                required_fields.insert(f.to_string());
11936            }
11937            DataQualityConfig {
11938                enable_missing_values: dq.missing_values.enabled,
11939                missing_values: datasynth_generators::MissingValueConfig {
11940                    global_rate: dq.effective_missing_rate(),
11941                    field_rates,
11942                    required_fields,
11943                    ..Default::default()
11944                },
11945                enable_format_variations: dq.format_variations.enabled,
11946                format_variations: datasynth_generators::FormatVariationConfig {
11947                    date_variation_rate: dq.format_variations.dates.rate,
11948                    amount_variation_rate: dq.format_variations.amounts.rate,
11949                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11950                    ..Default::default()
11951                },
11952                enable_duplicates: dq.duplicates.enabled,
11953                duplicates: datasynth_generators::DuplicateConfig {
11954                    duplicate_rate: dq.effective_duplicate_rate(),
11955                    ..Default::default()
11956                },
11957                enable_typos: dq.typos.enabled,
11958                typos: datasynth_generators::TypoConfig {
11959                    char_error_rate: dq.effective_typo_rate(),
11960                    ..Default::default()
11961                },
11962                enable_encoding_issues: dq.encoding_issues.enabled,
11963                encoding_issue_rate: dq.encoding_issues.rate,
11964                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11965                track_statistics: true,
11966            }
11967        } else {
11968            DataQualityConfig::minimal()
11969        };
11970        let mut injector = DataQualityInjector::new(config);
11971
11972        // Wire country pack for locale-aware format baselines
11973        injector.set_country_pack(self.primary_pack().clone());
11974
11975        // Build context for missing value decisions
11976        let context = HashMap::new();
11977
11978        for entry in entries.iter_mut() {
11979            // Process header_text field (common target for typos)
11980            if let Some(text) = &entry.header.header_text {
11981                let processed = injector.process_text_field(
11982                    "header_text",
11983                    text,
11984                    &entry.header.document_id.to_string(),
11985                    &context,
11986                );
11987                match processed {
11988                    Some(new_text) if new_text != *text => {
11989                        entry.header.header_text = Some(new_text);
11990                    }
11991                    None => {
11992                        entry.header.header_text = None; // Missing value
11993                    }
11994                    _ => {}
11995                }
11996            }
11997
11998            // Process reference field
11999            if let Some(ref_text) = &entry.header.reference {
12000                let processed = injector.process_text_field(
12001                    "reference",
12002                    ref_text,
12003                    &entry.header.document_id.to_string(),
12004                    &context,
12005                );
12006                match processed {
12007                    Some(new_text) if new_text != *ref_text => {
12008                        entry.header.reference = Some(new_text);
12009                    }
12010                    None => {
12011                        entry.header.reference = None;
12012                    }
12013                    _ => {}
12014                }
12015            }
12016
12017            // Process user_persona field (potential for typos in user IDs)
12018            let user_persona = entry.header.user_persona.clone();
12019            if let Some(processed) = injector.process_text_field(
12020                "user_persona",
12021                &user_persona,
12022                &entry.header.document_id.to_string(),
12023                &context,
12024            ) {
12025                if processed != user_persona {
12026                    entry.header.user_persona = processed;
12027                }
12028            }
12029
12030            // Process line items
12031            for line in &mut entry.lines {
12032                // Process line description if present
12033                if let Some(ref text) = line.line_text {
12034                    let processed = injector.process_text_field(
12035                        "line_text",
12036                        text,
12037                        &entry.header.document_id.to_string(),
12038                        &context,
12039                    );
12040                    match processed {
12041                        Some(new_text) if new_text != *text => {
12042                            line.line_text = Some(new_text);
12043                        }
12044                        None => {
12045                            line.line_text = None;
12046                        }
12047                        _ => {}
12048                    }
12049                }
12050
12051                // Process cost_center if present
12052                if let Some(cc) = &line.cost_center {
12053                    let processed = injector.process_text_field(
12054                        "cost_center",
12055                        cc,
12056                        &entry.header.document_id.to_string(),
12057                        &context,
12058                    );
12059                    match processed {
12060                        Some(new_cc) if new_cc != *cc => {
12061                            line.cost_center = Some(new_cc);
12062                        }
12063                        None => {
12064                            line.cost_center = None;
12065                        }
12066                        _ => {}
12067                    }
12068                }
12069
12070                // Extended field coverage (v5.6+): apply NULL injection to
12071                // every Option<String> on the line so users can match
12072                // arbitrary real-production NULL profiles via
12073                // `data_quality.missing_values.field_rates`.
12074                //
12075                // Macro-free helper: process_field returns the new value
12076                // ({Some, None, unchanged}) and we apply it back.
12077                macro_rules! process_opt_field {
12078                    ($field_name:expr, $opt:expr) => {
12079                        if let Some(val) = $opt.as_ref() {
12080                            match injector.process_text_field(
12081                                $field_name,
12082                                val,
12083                                &entry.header.document_id.to_string(),
12084                                &context,
12085                            ) {
12086                                Some(new_val) if new_val != *val => {
12087                                    *$opt = Some(new_val);
12088                                }
12089                                None => {
12090                                    *$opt = None;
12091                                }
12092                                _ => {}
12093                            }
12094                        }
12095                    };
12096                }
12097
12098                process_opt_field!("profit_center", &mut line.profit_center);
12099                process_opt_field!("assignment", &mut line.assignment);
12100                process_opt_field!("tax_code", &mut line.tax_code);
12101                process_opt_field!("account_description", &mut line.account_description);
12102                process_opt_field!(
12103                    "auxiliary_account_number",
12104                    &mut line.auxiliary_account_number
12105                );
12106                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12107                process_opt_field!("lettrage", &mut line.lettrage);
12108            }
12109
12110            if let Some(pb) = &pb {
12111                pb.inc(1);
12112            }
12113        }
12114
12115        if let Some(pb) = pb {
12116            pb.finish_with_message("Data quality injection complete");
12117        }
12118
12119        let quality_issues = injector.issues().to_vec();
12120        Ok((injector.stats().clone(), quality_issues))
12121    }
12122
12123    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12124    ///
12125    /// Creates complete audit documentation for each company in the configuration,
12126    /// following ISA standards:
12127    /// - ISA 210/220: Engagement acceptance and terms
12128    /// - ISA 230: Audit documentation (workpapers)
12129    /// - ISA 265: Control deficiencies (findings)
12130    /// - ISA 315/330: Risk assessment and response
12131    /// - ISA 500: Audit evidence
12132    /// - ISA 200: Professional judgment
12133    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12134        // Check if FSM-driven audit generation is enabled
12135        let use_fsm = self
12136            .config
12137            .audit
12138            .fsm
12139            .as_ref()
12140            .map(|f| f.enabled)
12141            .unwrap_or(false);
12142
12143        if use_fsm {
12144            return self.generate_audit_data_with_fsm(entries);
12145        }
12146
12147        // --- Legacy (non-FSM) audit generation follows ---
12148        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12149            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12150        let fiscal_year = start_date.year() as u16;
12151        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12152
12153        // Calculate rough total revenue from entries for materiality
12154        let total_revenue: rust_decimal::Decimal = entries
12155            .iter()
12156            .flat_map(|e| e.lines.iter())
12157            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12158            .map(|l| l.credit_amount)
12159            .sum();
12160
12161        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12162        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12163
12164        let mut snapshot = AuditSnapshot::default();
12165
12166        // Initialize generators
12167        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12168        // v3.3.2: thread the user-facing audit schema config into the
12169        // engagement generator (team size range).
12170        engagement_gen.set_team_config(&self.config.audit.team);
12171
12172        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12173        // v3.3.2: thread workpaper + review workflow schema config into
12174        // the workpaper generator (per-section count range + review
12175        // delay ranges).
12176        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12177        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12178        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12179        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12180        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12181        finding_gen.set_template_provider(self.template_provider.clone());
12182        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12183        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12184        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12185        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12186        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12187        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12188        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12189
12190        // Get list of accounts from CoA for risk assessment
12191        let accounts: Vec<String> = self
12192            .coa
12193            .as_ref()
12194            .map(|coa| {
12195                coa.get_postable_accounts()
12196                    .iter()
12197                    .map(|acc| acc.account_code().to_string())
12198                    .collect()
12199            })
12200            .unwrap_or_default();
12201
12202        // Generate engagements for each company
12203        for (i, company) in self.config.companies.iter().enumerate() {
12204            // Calculate company-specific revenue (proportional to volume weight)
12205            let company_revenue = total_revenue
12206                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12207
12208            // Generate engagements for this company
12209            let engagements_for_company =
12210                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12211            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12212                1
12213            } else {
12214                0
12215            };
12216
12217            for _eng_idx in 0..(engagements_for_company + extra) {
12218                // v3.3.2: draw engagement type from the user-configured
12219                // distribution instead of always using the default
12220                // (AnnualAudit). Falls back to the default when all
12221                // probabilities are zero.
12222                let eng_type =
12223                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12224
12225                // Generate the engagement
12226                let mut engagement = engagement_gen.generate_engagement(
12227                    &company.code,
12228                    &company.name,
12229                    fiscal_year,
12230                    period_end,
12231                    company_revenue,
12232                    Some(eng_type),
12233                );
12234
12235                // Replace synthetic team IDs with real employee IDs from master data
12236                if !self.master_data.employees.is_empty() {
12237                    let emp_count = self.master_data.employees.len();
12238                    // Use employee IDs deterministically based on engagement index
12239                    let base = (i * 10 + _eng_idx) % emp_count;
12240                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12241                        .employee_id
12242                        .clone();
12243                    engagement.engagement_manager_id = self.master_data.employees
12244                        [(base + 1) % emp_count]
12245                        .employee_id
12246                        .clone();
12247                    let real_team: Vec<String> = engagement
12248                        .team_member_ids
12249                        .iter()
12250                        .enumerate()
12251                        .map(|(j, _)| {
12252                            self.master_data.employees[(base + 2 + j) % emp_count]
12253                                .employee_id
12254                                .clone()
12255                        })
12256                        .collect();
12257                    engagement.team_member_ids = real_team;
12258                }
12259
12260                if let Some(pb) = &pb {
12261                    pb.inc(1);
12262                }
12263
12264                // Get team members from the engagement
12265                let team_members: Vec<String> = engagement.team_member_ids.clone();
12266
12267                // Generate workpapers for the engagement.
12268                // v3.3.2: honor `audit.generate_workpapers` — when false,
12269                // workpapers (and dependent evidence) are skipped while
12270                // the engagement itself, risk assessments, findings, etc.
12271                // still generate normally.
12272                let workpapers = if self.config.audit.generate_workpapers {
12273                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12274                } else {
12275                    Vec::new()
12276                };
12277
12278                for wp in &workpapers {
12279                    if let Some(pb) = &pb {
12280                        pb.inc(1);
12281                    }
12282
12283                    // Generate evidence for each workpaper
12284                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12285                        wp,
12286                        &team_members,
12287                        wp.preparer_date,
12288                    );
12289
12290                    for _ in &evidence {
12291                        if let Some(pb) = &pb {
12292                            pb.inc(1);
12293                        }
12294                    }
12295
12296                    snapshot.evidence.extend(evidence);
12297                }
12298
12299                // Generate risk assessments for the engagement
12300                let risks =
12301                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12302
12303                for _ in &risks {
12304                    if let Some(pb) = &pb {
12305                        pb.inc(1);
12306                    }
12307                }
12308                snapshot.risk_assessments.extend(risks);
12309
12310                // Generate findings for the engagement
12311                let findings = finding_gen.generate_findings_for_engagement(
12312                    &engagement,
12313                    &workpapers,
12314                    &team_members,
12315                );
12316
12317                for _ in &findings {
12318                    if let Some(pb) = &pb {
12319                        pb.inc(1);
12320                    }
12321                }
12322                snapshot.findings.extend(findings);
12323
12324                // Generate professional judgments for the engagement
12325                let judgments =
12326                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12327
12328                for _ in &judgments {
12329                    if let Some(pb) = &pb {
12330                        pb.inc(1);
12331                    }
12332                }
12333                snapshot.judgments.extend(judgments);
12334
12335                // ISA 505: External confirmations and responses
12336                let (confs, resps) =
12337                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12338                snapshot.confirmations.extend(confs);
12339                snapshot.confirmation_responses.extend(resps);
12340
12341                // ISA 330: Procedure steps per workpaper
12342                let team_pairs: Vec<(String, String)> = team_members
12343                    .iter()
12344                    .map(|id| {
12345                        let name = self
12346                            .master_data
12347                            .employees
12348                            .iter()
12349                            .find(|e| e.employee_id == *id)
12350                            .map(|e| e.display_name.clone())
12351                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12352                        (id.clone(), name)
12353                    })
12354                    .collect();
12355                for wp in &workpapers {
12356                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12357                    snapshot.procedure_steps.extend(steps);
12358                }
12359
12360                // ISA 530: Samples per workpaper
12361                for wp in &workpapers {
12362                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12363                        snapshot.samples.push(sample);
12364                    }
12365                }
12366
12367                // ISA 520: Analytical procedures
12368                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12369                snapshot.analytical_results.extend(analytical);
12370
12371                // ISA 610: Internal audit function and reports
12372                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12373                snapshot.ia_functions.push(ia_func);
12374                snapshot.ia_reports.extend(ia_reports);
12375
12376                // ISA 550: Related parties and transactions
12377                let vendor_names: Vec<String> = self
12378                    .master_data
12379                    .vendors
12380                    .iter()
12381                    .map(|v| v.name.clone())
12382                    .collect();
12383                let customer_names: Vec<String> = self
12384                    .master_data
12385                    .customers
12386                    .iter()
12387                    .map(|c| c.name.clone())
12388                    .collect();
12389                let (parties, rp_txns) =
12390                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12391                snapshot.related_parties.extend(parties);
12392                snapshot.related_party_transactions.extend(rp_txns);
12393
12394                // Add workpapers after findings since findings need them
12395                snapshot.workpapers.extend(workpapers);
12396
12397                // Generate audit scope record for this engagement (one per engagement)
12398                {
12399                    let scope_id = format!(
12400                        "SCOPE-{}-{}",
12401                        engagement.engagement_id.simple(),
12402                        &engagement.client_entity_id
12403                    );
12404                    let scope = datasynth_core::models::audit::AuditScope::new(
12405                        scope_id.clone(),
12406                        engagement.engagement_id.to_string(),
12407                        engagement.client_entity_id.clone(),
12408                        engagement.materiality,
12409                    );
12410                    // Wire scope_id back to engagement
12411                    let mut eng = engagement;
12412                    eng.scope_id = Some(scope_id);
12413                    snapshot.audit_scopes.push(scope);
12414                    snapshot.engagements.push(eng);
12415                }
12416            }
12417        }
12418
12419        // ----------------------------------------------------------------
12420        // ISA 600: Group audit — component auditors, plan, instructions, reports
12421        // ----------------------------------------------------------------
12422        if self.config.companies.len() > 1 {
12423            // Use materiality from the first engagement if available, otherwise
12424            // derive a reasonable figure from total revenue.
12425            let group_materiality = snapshot
12426                .engagements
12427                .first()
12428                .map(|e| e.materiality)
12429                .unwrap_or_else(|| {
12430                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12431                    total_revenue * pct
12432                });
12433
12434            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12435            let group_engagement_id = snapshot
12436                .engagements
12437                .first()
12438                .map(|e| e.engagement_id.to_string())
12439                .unwrap_or_else(|| "GROUP-ENG".to_string());
12440
12441            let component_snapshot = component_gen.generate(
12442                &self.config.companies,
12443                group_materiality,
12444                &group_engagement_id,
12445                period_end,
12446            );
12447
12448            snapshot.component_auditors = component_snapshot.component_auditors;
12449            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12450            snapshot.component_instructions = component_snapshot.component_instructions;
12451            snapshot.component_reports = component_snapshot.component_reports;
12452
12453            info!(
12454                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12455                snapshot.component_auditors.len(),
12456                snapshot.component_instructions.len(),
12457                snapshot.component_reports.len(),
12458            );
12459        }
12460
12461        // ----------------------------------------------------------------
12462        // ISA 210: Engagement letters — one per engagement
12463        // ----------------------------------------------------------------
12464        {
12465            let applicable_framework = self
12466                .config
12467                .accounting_standards
12468                .framework
12469                .as_ref()
12470                .map(|f| format!("{f:?}"))
12471                .unwrap_or_else(|| "IFRS".to_string());
12472
12473            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12474            let entity_count = self.config.companies.len();
12475
12476            for engagement in &snapshot.engagements {
12477                let company = self
12478                    .config
12479                    .companies
12480                    .iter()
12481                    .find(|c| c.code == engagement.client_entity_id);
12482                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12483                let letter_date = engagement.planning_start;
12484                let letter = letter_gen.generate(
12485                    &engagement.engagement_id.to_string(),
12486                    &engagement.client_name,
12487                    entity_count,
12488                    engagement.period_end_date,
12489                    currency,
12490                    &applicable_framework,
12491                    letter_date,
12492                );
12493                snapshot.engagement_letters.push(letter);
12494            }
12495
12496            info!(
12497                "ISA 210 engagement letters: {} generated",
12498                snapshot.engagement_letters.len()
12499            );
12500        }
12501
12502        // ----------------------------------------------------------------
12503        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12504        // ----------------------------------------------------------------
12505        if self.phase_config.generate_legal_documents {
12506            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12507            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12508            for engagement in &snapshot.engagements {
12509                // Build an employee name list for signatory drawing —
12510                // prefer employees from the engaged entity, fall back to
12511                // all employees.
12512                let employee_names: Vec<String> = self
12513                    .master_data
12514                    .employees
12515                    .iter()
12516                    .filter(|e| e.company_code == engagement.client_entity_id)
12517                    .map(|e| e.display_name.clone())
12518                    .collect();
12519                let names_to_use = if !employee_names.is_empty() {
12520                    employee_names
12521                } else {
12522                    self.master_data
12523                        .employees
12524                        .iter()
12525                        .take(10)
12526                        .map(|e| e.display_name.clone())
12527                        .collect()
12528                };
12529                let docs = legal_gen.generate(
12530                    &engagement.client_entity_id,
12531                    engagement.fiscal_year as i32,
12532                    &names_to_use,
12533                );
12534                snapshot.legal_documents.extend(docs);
12535            }
12536            info!(
12537                "v3.3.0 legal documents: {} emitted across {} engagements",
12538                snapshot.legal_documents.len(),
12539                snapshot.engagements.len()
12540            );
12541        }
12542
12543        // ----------------------------------------------------------------
12544        // v3.3.0: IT general controls — access logs + change records
12545        //
12546        // `ItControlsGenerator` runs one pass per company (not per
12547        // engagement) so employee sets and system catalogs stay
12548        // coherent. We derive the period from the earliest engagement's
12549        // planning_start through the latest engagement's period_end_date
12550        // for each company.
12551        // ----------------------------------------------------------------
12552        if self.phase_config.generate_it_controls {
12553            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12554            use std::collections::HashMap;
12555            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12556
12557            // Group engagements by company to produce one IT-controls
12558            // window per entity.
12559            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12560                HashMap::new();
12561            for engagement in &snapshot.engagements {
12562                let entry = by_company
12563                    .entry(engagement.client_entity_id.clone())
12564                    .or_insert((engagement.planning_start, engagement.period_end_date));
12565                if engagement.planning_start < entry.0 {
12566                    entry.0 = engagement.planning_start;
12567                }
12568                if engagement.period_end_date > entry.1 {
12569                    entry.1 = engagement.period_end_date;
12570                }
12571            }
12572
12573            // Standard system catalog — populated from known ERP / app
12574            // names. Keeps the generator's data shape stable when the
12575            // user hasn't configured IT-system naming separately.
12576            let systems: Vec<String> = vec![
12577                "SAP ECC",
12578                "SAP S/4 HANA",
12579                "Oracle EBS",
12580                "Workday",
12581                "NetSuite",
12582                "Active Directory",
12583                "SharePoint",
12584                "Salesforce",
12585                "ServiceNow",
12586                "Jira",
12587                "GitHub Enterprise",
12588                "AWS Console",
12589                "Okta",
12590            ]
12591            .into_iter()
12592            .map(String::from)
12593            .collect();
12594
12595            for (company_code, (start, end)) in by_company {
12596                let emps: Vec<(String, String)> = self
12597                    .master_data
12598                    .employees
12599                    .iter()
12600                    .filter(|e| e.company_code == company_code)
12601                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12602                    .collect();
12603                if emps.is_empty() {
12604                    continue;
12605                }
12606                // Compute period in months, rounded up to the nearest
12607                // whole month (min 1).
12608                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12609                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12610                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12611                snapshot.it_controls_access_logs.extend(access_logs);
12612                snapshot.it_controls_change_records.extend(change_records);
12613            }
12614
12615            info!(
12616                "v3.3.0 IT controls: {} access logs, {} change records",
12617                snapshot.it_controls_access_logs.len(),
12618                snapshot.it_controls_change_records.len()
12619            );
12620        }
12621
12622        // ----------------------------------------------------------------
12623        // ISA 560 / IAS 10: Subsequent events
12624        // ----------------------------------------------------------------
12625        {
12626            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12627            let entity_codes: Vec<String> = self
12628                .config
12629                .companies
12630                .iter()
12631                .map(|c| c.code.clone())
12632                .collect();
12633            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12634            info!(
12635                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12636                subsequent.len(),
12637                subsequent
12638                    .iter()
12639                    .filter(|e| matches!(
12640                        e.classification,
12641                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12642                    ))
12643                    .count(),
12644                subsequent
12645                    .iter()
12646                    .filter(|e| matches!(
12647                        e.classification,
12648                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12649                    ))
12650                    .count(),
12651            );
12652            snapshot.subsequent_events = subsequent;
12653        }
12654
12655        // ----------------------------------------------------------------
12656        // ISA 402: Service organization controls
12657        // ----------------------------------------------------------------
12658        {
12659            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12660            let entity_codes: Vec<String> = self
12661                .config
12662                .companies
12663                .iter()
12664                .map(|c| c.code.clone())
12665                .collect();
12666            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12667            info!(
12668                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12669                soc_snapshot.service_organizations.len(),
12670                soc_snapshot.soc_reports.len(),
12671                soc_snapshot.user_entity_controls.len(),
12672            );
12673            snapshot.service_organizations = soc_snapshot.service_organizations;
12674            snapshot.soc_reports = soc_snapshot.soc_reports;
12675            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12676        }
12677
12678        // ----------------------------------------------------------------
12679        // ISA 570: Going concern assessments
12680        // ----------------------------------------------------------------
12681        {
12682            use datasynth_generators::audit::going_concern_generator::{
12683                GoingConcernGenerator, GoingConcernInput,
12684            };
12685            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12686            let entity_codes: Vec<String> = self
12687                .config
12688                .companies
12689                .iter()
12690                .map(|c| c.code.clone())
12691                .collect();
12692            // Assessment date = period end + 75 days (typical sign-off window).
12693            let assessment_date = period_end + chrono::Duration::days(75);
12694            let period_label = format!("FY{}", period_end.year());
12695
12696            // Build financial inputs from actual journal entries.
12697            //
12698            // We derive approximate P&L, working capital, and operating cash flow
12699            // by aggregating GL account balances from the journal entry population.
12700            // Account ranges used (standard chart):
12701            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12702            //   Expenses:        6xxx (debit-normal)
12703            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12704            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12705            //   Operating CF:    net income adjusted for D&A (rough proxy)
12706            let gc_inputs: Vec<GoingConcernInput> = self
12707                .config
12708                .companies
12709                .iter()
12710                .map(|company| {
12711                    let code = &company.code;
12712                    let mut revenue = rust_decimal::Decimal::ZERO;
12713                    let mut expenses = rust_decimal::Decimal::ZERO;
12714                    let mut current_assets = rust_decimal::Decimal::ZERO;
12715                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12716                    let mut total_debt = rust_decimal::Decimal::ZERO;
12717
12718                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12719                        for line in &je.lines {
12720                            let acct = line.gl_account.as_str();
12721                            let net = line.debit_amount - line.credit_amount;
12722                            if acct.starts_with('4') {
12723                                // Revenue accounts: credit-normal, so negative net = revenue earned
12724                                revenue -= net;
12725                            } else if acct.starts_with('6') {
12726                                // Expense accounts: debit-normal
12727                                expenses += net;
12728                            }
12729                            // Balance sheet accounts for working capital
12730                            if acct.starts_with('1') {
12731                                // Current asset accounts (1000–1499)
12732                                if let Ok(n) = acct.parse::<u32>() {
12733                                    if (1000..=1499).contains(&n) {
12734                                        current_assets += net;
12735                                    }
12736                                }
12737                            } else if acct.starts_with('2') {
12738                                if let Ok(n) = acct.parse::<u32>() {
12739                                    if (2000..=2499).contains(&n) {
12740                                        // Current liabilities
12741                                        current_liabs -= net; // credit-normal
12742                                    } else if (2500..=2999).contains(&n) {
12743                                        // Long-term debt
12744                                        total_debt -= net;
12745                                    }
12746                                }
12747                            }
12748                        }
12749                    }
12750
12751                    let net_income = revenue - expenses;
12752                    let working_capital = current_assets - current_liabs;
12753                    // Rough operating CF proxy: net income (full accrual CF calculation
12754                    // is done separately in the cash flow statement generator)
12755                    let operating_cash_flow = net_income;
12756
12757                    GoingConcernInput {
12758                        entity_code: code.clone(),
12759                        net_income,
12760                        working_capital,
12761                        operating_cash_flow,
12762                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12763                        assessment_date,
12764                    }
12765                })
12766                .collect();
12767
12768            let assessments = if gc_inputs.is_empty() {
12769                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12770            } else {
12771                gc_gen.generate_for_entities_with_inputs(
12772                    &entity_codes,
12773                    &gc_inputs,
12774                    assessment_date,
12775                    &period_label,
12776                )
12777            };
12778            info!(
12779                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12780                assessments.len(),
12781                assessments.iter().filter(|a| matches!(
12782                    a.auditor_conclusion,
12783                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12784                )).count(),
12785                assessments.iter().filter(|a| matches!(
12786                    a.auditor_conclusion,
12787                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12788                )).count(),
12789                assessments.iter().filter(|a| matches!(
12790                    a.auditor_conclusion,
12791                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12792                )).count(),
12793            );
12794            snapshot.going_concern_assessments = assessments;
12795        }
12796
12797        // ----------------------------------------------------------------
12798        // ISA 540: Accounting estimates
12799        // ----------------------------------------------------------------
12800        {
12801            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12802            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12803            let entity_codes: Vec<String> = self
12804                .config
12805                .companies
12806                .iter()
12807                .map(|c| c.code.clone())
12808                .collect();
12809            let estimates = est_gen.generate_for_entities(&entity_codes);
12810            info!(
12811                "ISA 540 accounting estimates: {} estimates across {} entities \
12812                 ({} with retrospective reviews, {} with auditor point estimates)",
12813                estimates.len(),
12814                entity_codes.len(),
12815                estimates
12816                    .iter()
12817                    .filter(|e| e.retrospective_review.is_some())
12818                    .count(),
12819                estimates
12820                    .iter()
12821                    .filter(|e| e.auditor_point_estimate.is_some())
12822                    .count(),
12823            );
12824            snapshot.accounting_estimates = estimates;
12825        }
12826
12827        // ----------------------------------------------------------------
12828        // ISA 700/701/705/706: Audit opinions (one per engagement)
12829        // ----------------------------------------------------------------
12830        {
12831            use datasynth_generators::audit::audit_opinion_generator::{
12832                AuditOpinionGenerator, AuditOpinionInput,
12833            };
12834
12835            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12836
12837            // Build inputs — one per engagement, linking findings and going concern.
12838            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12839                .engagements
12840                .iter()
12841                .map(|eng| {
12842                    // Collect findings for this engagement.
12843                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12844                        .findings
12845                        .iter()
12846                        .filter(|f| f.engagement_id == eng.engagement_id)
12847                        .cloned()
12848                        .collect();
12849
12850                    // Going concern for this entity.
12851                    let gc = snapshot
12852                        .going_concern_assessments
12853                        .iter()
12854                        .find(|g| g.entity_code == eng.client_entity_id)
12855                        .cloned();
12856
12857                    // Component reports relevant to this engagement.
12858                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12859                        snapshot.component_reports.clone();
12860
12861                    let auditor = self
12862                        .master_data
12863                        .employees
12864                        .first()
12865                        .map(|e| e.display_name.clone())
12866                        .unwrap_or_else(|| "Global Audit LLP".into());
12867
12868                    let partner = self
12869                        .master_data
12870                        .employees
12871                        .get(1)
12872                        .map(|e| e.display_name.clone())
12873                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12874
12875                    AuditOpinionInput {
12876                        entity_code: eng.client_entity_id.clone(),
12877                        entity_name: eng.client_name.clone(),
12878                        engagement_id: eng.engagement_id,
12879                        period_end: eng.period_end_date,
12880                        findings: eng_findings,
12881                        going_concern: gc,
12882                        component_reports: comp_reports,
12883                        // Mark as US-listed when audit standards include PCAOB.
12884                        is_us_listed: {
12885                            let fw = &self.config.audit_standards.isa_compliance.framework;
12886                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12887                        },
12888                        auditor_name: auditor,
12889                        engagement_partner: partner,
12890                    }
12891                })
12892                .collect();
12893
12894            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12895
12896            for go in &generated_opinions {
12897                snapshot
12898                    .key_audit_matters
12899                    .extend(go.key_audit_matters.clone());
12900            }
12901            snapshot.audit_opinions = generated_opinions
12902                .into_iter()
12903                .map(|go| go.opinion)
12904                .collect();
12905
12906            info!(
12907                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12908                snapshot.audit_opinions.len(),
12909                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12910                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12911                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12912                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12913            );
12914        }
12915
12916        // ----------------------------------------------------------------
12917        // SOX 302 / 404 assessments
12918        // ----------------------------------------------------------------
12919        {
12920            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12921
12922            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12923
12924            for (i, company) in self.config.companies.iter().enumerate() {
12925                // Collect findings for this company's engagements.
12926                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12927                    .engagements
12928                    .iter()
12929                    .filter(|e| e.client_entity_id == company.code)
12930                    .map(|e| e.engagement_id)
12931                    .collect();
12932
12933                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12934                    .findings
12935                    .iter()
12936                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12937                    .cloned()
12938                    .collect();
12939
12940                // Derive executive names from employee list.
12941                let emp_count = self.master_data.employees.len();
12942                let ceo_name = if emp_count > 0 {
12943                    self.master_data.employees[i % emp_count]
12944                        .display_name
12945                        .clone()
12946                } else {
12947                    format!("CEO of {}", company.name)
12948                };
12949                let cfo_name = if emp_count > 1 {
12950                    self.master_data.employees[(i + 1) % emp_count]
12951                        .display_name
12952                        .clone()
12953                } else {
12954                    format!("CFO of {}", company.name)
12955                };
12956
12957                // Use engagement materiality if available.
12958                let materiality = snapshot
12959                    .engagements
12960                    .iter()
12961                    .find(|e| e.client_entity_id == company.code)
12962                    .map(|e| e.materiality)
12963                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12964
12965                let input = SoxGeneratorInput {
12966                    company_code: company.code.clone(),
12967                    company_name: company.name.clone(),
12968                    fiscal_year,
12969                    period_end,
12970                    findings: company_findings,
12971                    ceo_name,
12972                    cfo_name,
12973                    materiality_threshold: materiality,
12974                    revenue_percent: rust_decimal::Decimal::from(100),
12975                    assets_percent: rust_decimal::Decimal::from(100),
12976                    significant_accounts: vec![
12977                        "Revenue".into(),
12978                        "Accounts Receivable".into(),
12979                        "Inventory".into(),
12980                        "Fixed Assets".into(),
12981                        "Accounts Payable".into(),
12982                    ],
12983                };
12984
12985                let (certs, assessment) = sox_gen.generate(&input);
12986                snapshot.sox_302_certifications.extend(certs);
12987                snapshot.sox_404_assessments.push(assessment);
12988            }
12989
12990            info!(
12991                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12992                snapshot.sox_302_certifications.len(),
12993                snapshot.sox_404_assessments.len(),
12994                snapshot
12995                    .sox_404_assessments
12996                    .iter()
12997                    .filter(|a| a.icfr_effective)
12998                    .count(),
12999                snapshot
13000                    .sox_404_assessments
13001                    .iter()
13002                    .filter(|a| !a.icfr_effective)
13003                    .count(),
13004            );
13005        }
13006
13007        // ----------------------------------------------------------------
13008        // ISA 320: Materiality calculations (one per entity)
13009        // ----------------------------------------------------------------
13010        {
13011            use datasynth_generators::audit::materiality_generator::{
13012                MaterialityGenerator, MaterialityInput,
13013            };
13014
13015            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13016
13017            // Compute per-company financials from JEs.
13018            // Asset accounts start with '1', revenue with '4',
13019            // expense accounts with '5' or '6'.
13020            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13021
13022            for company in &self.config.companies {
13023                let company_code = company.code.clone();
13024
13025                // Revenue: credit-side entries on 4xxx accounts
13026                let company_revenue: rust_decimal::Decimal = entries
13027                    .iter()
13028                    .filter(|e| e.company_code() == company_code)
13029                    .flat_map(|e| e.lines.iter())
13030                    .filter(|l| l.account_code.starts_with('4'))
13031                    .map(|l| l.credit_amount)
13032                    .sum();
13033
13034                // Total assets: debit balances on 1xxx accounts
13035                let total_assets: rust_decimal::Decimal = entries
13036                    .iter()
13037                    .filter(|e| e.company_code() == company_code)
13038                    .flat_map(|e| e.lines.iter())
13039                    .filter(|l| l.account_code.starts_with('1'))
13040                    .map(|l| l.debit_amount)
13041                    .sum();
13042
13043                // Expenses: debit-side entries on 5xxx/6xxx accounts
13044                let total_expenses: rust_decimal::Decimal = entries
13045                    .iter()
13046                    .filter(|e| e.company_code() == company_code)
13047                    .flat_map(|e| e.lines.iter())
13048                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13049                    .map(|l| l.debit_amount)
13050                    .sum();
13051
13052                // Equity: credit balances on 3xxx accounts
13053                let equity: rust_decimal::Decimal = entries
13054                    .iter()
13055                    .filter(|e| e.company_code() == company_code)
13056                    .flat_map(|e| e.lines.iter())
13057                    .filter(|l| l.account_code.starts_with('3'))
13058                    .map(|l| l.credit_amount)
13059                    .sum();
13060
13061                let pretax_income = company_revenue - total_expenses;
13062
13063                // If no company-specific data, fall back to proportional share
13064                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13065                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13066                        .unwrap_or(rust_decimal::Decimal::ONE);
13067                    (
13068                        total_revenue * w,
13069                        total_revenue * w * rust_decimal::Decimal::from(3),
13070                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13071                        total_revenue * w * rust_decimal::Decimal::from(2),
13072                    )
13073                } else {
13074                    (company_revenue, total_assets, pretax_income, equity)
13075                };
13076
13077                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13078
13079                materiality_inputs.push(MaterialityInput {
13080                    entity_code: company_code,
13081                    period: format!("FY{}", fiscal_year),
13082                    revenue: rev,
13083                    pretax_income: pti,
13084                    total_assets: assets,
13085                    equity: eq,
13086                    gross_profit,
13087                });
13088            }
13089
13090            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13091
13092            info!(
13093                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13094                 {} total assets, {} equity benchmarks)",
13095                snapshot.materiality_calculations.len(),
13096                snapshot
13097                    .materiality_calculations
13098                    .iter()
13099                    .filter(|m| matches!(
13100                        m.benchmark,
13101                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13102                    ))
13103                    .count(),
13104                snapshot
13105                    .materiality_calculations
13106                    .iter()
13107                    .filter(|m| matches!(
13108                        m.benchmark,
13109                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13110                    ))
13111                    .count(),
13112                snapshot
13113                    .materiality_calculations
13114                    .iter()
13115                    .filter(|m| matches!(
13116                        m.benchmark,
13117                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13118                    ))
13119                    .count(),
13120                snapshot
13121                    .materiality_calculations
13122                    .iter()
13123                    .filter(|m| matches!(
13124                        m.benchmark,
13125                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13126                    ))
13127                    .count(),
13128            );
13129        }
13130
13131        // ----------------------------------------------------------------
13132        // ISA 315: Combined Risk Assessments (per entity, per account area)
13133        // ----------------------------------------------------------------
13134        {
13135            use datasynth_generators::audit::cra_generator::CraGenerator;
13136
13137            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13138
13139            // Build entity → scope_id map from already-generated scopes
13140            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13141                .audit_scopes
13142                .iter()
13143                .map(|s| (s.entity_code.clone(), s.id.clone()))
13144                .collect();
13145
13146            for company in &self.config.companies {
13147                let cras = cra_gen.generate_for_entity(&company.code, None);
13148                let scope_id = entity_scope_map.get(&company.code).cloned();
13149                let cras_with_scope: Vec<_> = cras
13150                    .into_iter()
13151                    .map(|mut cra| {
13152                        cra.scope_id = scope_id.clone();
13153                        cra
13154                    })
13155                    .collect();
13156                snapshot.combined_risk_assessments.extend(cras_with_scope);
13157            }
13158
13159            let significant_count = snapshot
13160                .combined_risk_assessments
13161                .iter()
13162                .filter(|c| c.significant_risk)
13163                .count();
13164            let high_cra_count = snapshot
13165                .combined_risk_assessments
13166                .iter()
13167                .filter(|c| {
13168                    matches!(
13169                        c.combined_risk,
13170                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13171                    )
13172                })
13173                .count();
13174
13175            info!(
13176                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13177                snapshot.combined_risk_assessments.len(),
13178                significant_count,
13179                high_cra_count,
13180            );
13181        }
13182
13183        // ----------------------------------------------------------------
13184        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13185        // ----------------------------------------------------------------
13186        {
13187            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13188
13189            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13190
13191            // Group CRAs by entity and use per-entity tolerable error from materiality
13192            for company in &self.config.companies {
13193                let entity_code = company.code.clone();
13194
13195                // Find tolerable error for this entity (= performance materiality)
13196                let tolerable_error = snapshot
13197                    .materiality_calculations
13198                    .iter()
13199                    .find(|m| m.entity_code == entity_code)
13200                    .map(|m| m.tolerable_error);
13201
13202                // Collect CRAs for this entity
13203                let entity_cras: Vec<_> = snapshot
13204                    .combined_risk_assessments
13205                    .iter()
13206                    .filter(|c| c.entity_code == entity_code)
13207                    .cloned()
13208                    .collect();
13209
13210                if !entity_cras.is_empty() {
13211                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13212                    snapshot.sampling_plans.extend(plans);
13213                    snapshot.sampled_items.extend(items);
13214                }
13215            }
13216
13217            let misstatement_count = snapshot
13218                .sampled_items
13219                .iter()
13220                .filter(|i| i.misstatement_found)
13221                .count();
13222
13223            info!(
13224                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13225                snapshot.sampling_plans.len(),
13226                snapshot.sampled_items.len(),
13227                misstatement_count,
13228            );
13229        }
13230
13231        // ----------------------------------------------------------------
13232        // ISA 315: Significant Classes of Transactions (SCOTS)
13233        // ----------------------------------------------------------------
13234        {
13235            use datasynth_generators::audit::scots_generator::{
13236                ScotsGenerator, ScotsGeneratorConfig,
13237            };
13238
13239            let ic_enabled = self.config.intercompany.enabled;
13240
13241            let config = ScotsGeneratorConfig {
13242                intercompany_enabled: ic_enabled,
13243                ..ScotsGeneratorConfig::default()
13244            };
13245            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13246
13247            for company in &self.config.companies {
13248                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13249                snapshot
13250                    .significant_transaction_classes
13251                    .extend(entity_scots);
13252            }
13253
13254            let estimation_count = snapshot
13255                .significant_transaction_classes
13256                .iter()
13257                .filter(|s| {
13258                    matches!(
13259                        s.transaction_type,
13260                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13261                    )
13262                })
13263                .count();
13264
13265            info!(
13266                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13267                snapshot.significant_transaction_classes.len(),
13268                estimation_count,
13269            );
13270        }
13271
13272        // ----------------------------------------------------------------
13273        // ISA 520: Unusual Item Markers
13274        // ----------------------------------------------------------------
13275        {
13276            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13277
13278            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13279            let entity_codes: Vec<String> = self
13280                .config
13281                .companies
13282                .iter()
13283                .map(|c| c.code.clone())
13284                .collect();
13285            let unusual_flags =
13286                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13287            info!(
13288                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13289                unusual_flags.len(),
13290                unusual_flags
13291                    .iter()
13292                    .filter(|f| matches!(
13293                        f.severity,
13294                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13295                    ))
13296                    .count(),
13297                unusual_flags
13298                    .iter()
13299                    .filter(|f| matches!(
13300                        f.severity,
13301                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13302                    ))
13303                    .count(),
13304                unusual_flags
13305                    .iter()
13306                    .filter(|f| matches!(
13307                        f.severity,
13308                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13309                    ))
13310                    .count(),
13311            );
13312            snapshot.unusual_items = unusual_flags;
13313        }
13314
13315        // ----------------------------------------------------------------
13316        // ISA 520: Analytical Relationships
13317        // ----------------------------------------------------------------
13318        {
13319            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13320
13321            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13322            let entity_codes: Vec<String> = self
13323                .config
13324                .companies
13325                .iter()
13326                .map(|c| c.code.clone())
13327                .collect();
13328            let current_period_label = format!("FY{fiscal_year}");
13329            let prior_period_label = format!("FY{}", fiscal_year - 1);
13330            let analytical_rels = ar_gen.generate_for_entities(
13331                &entity_codes,
13332                entries,
13333                &current_period_label,
13334                &prior_period_label,
13335            );
13336            let out_of_range = analytical_rels
13337                .iter()
13338                .filter(|r| !r.within_expected_range)
13339                .count();
13340            info!(
13341                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13342                analytical_rels.len(),
13343                out_of_range,
13344            );
13345            snapshot.analytical_relationships = analytical_rels;
13346        }
13347
13348        if let Some(pb) = pb {
13349            pb.finish_with_message(format!(
13350                "Audit data: {} engagements, {} workpapers, {} evidence, \
13351                 {} confirmations, {} procedure steps, {} samples, \
13352                 {} analytical, {} IA funcs, {} related parties, \
13353                 {} component auditors, {} letters, {} subsequent events, \
13354                 {} service orgs, {} going concern, {} accounting estimates, \
13355                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13356                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13357                 {} unusual items, {} analytical relationships",
13358                snapshot.engagements.len(),
13359                snapshot.workpapers.len(),
13360                snapshot.evidence.len(),
13361                snapshot.confirmations.len(),
13362                snapshot.procedure_steps.len(),
13363                snapshot.samples.len(),
13364                snapshot.analytical_results.len(),
13365                snapshot.ia_functions.len(),
13366                snapshot.related_parties.len(),
13367                snapshot.component_auditors.len(),
13368                snapshot.engagement_letters.len(),
13369                snapshot.subsequent_events.len(),
13370                snapshot.service_organizations.len(),
13371                snapshot.going_concern_assessments.len(),
13372                snapshot.accounting_estimates.len(),
13373                snapshot.audit_opinions.len(),
13374                snapshot.key_audit_matters.len(),
13375                snapshot.sox_302_certifications.len(),
13376                snapshot.sox_404_assessments.len(),
13377                snapshot.materiality_calculations.len(),
13378                snapshot.combined_risk_assessments.len(),
13379                snapshot.sampling_plans.len(),
13380                snapshot.significant_transaction_classes.len(),
13381                snapshot.unusual_items.len(),
13382                snapshot.analytical_relationships.len(),
13383            ));
13384        }
13385
13386        // ----------------------------------------------------------------
13387        // PCAOB-ISA cross-reference mappings
13388        // ----------------------------------------------------------------
13389        // Always include the standard PCAOB-ISA mappings when audit generation is
13390        // enabled. These are static reference data (no randomness required) so we
13391        // call standard_mappings() directly.
13392        {
13393            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13394            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13395            debug!(
13396                "PCAOB-ISA mappings generated: {} mappings",
13397                snapshot.isa_pcaob_mappings.len()
13398            );
13399        }
13400
13401        // ----------------------------------------------------------------
13402        // ISA standard reference entries
13403        // ----------------------------------------------------------------
13404        // Emit flat ISA standard reference data (number, title, series) so
13405        // consumers get a machine-readable listing of all 34 ISA standards in
13406        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13407        {
13408            use datasynth_standards::audit::isa_reference::IsaStandard;
13409            snapshot.isa_mappings = IsaStandard::standard_entries();
13410            debug!(
13411                "ISA standard entries generated: {} standards",
13412                snapshot.isa_mappings.len()
13413            );
13414        }
13415
13416        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13417        // For each RPT, find the chronologically closest JE for the engagement's entity.
13418        {
13419            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13420                .engagements
13421                .iter()
13422                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13423                .collect();
13424
13425            for rpt in &mut snapshot.related_party_transactions {
13426                if rpt.journal_entry_id.is_some() {
13427                    continue; // already set
13428                }
13429                let entity = engagement_by_id
13430                    .get(&rpt.engagement_id.to_string())
13431                    .copied()
13432                    .unwrap_or("");
13433
13434                // Find closest JE by date in the entity's company
13435                let best_je = entries
13436                    .iter()
13437                    .filter(|je| je.header.company_code == entity)
13438                    .min_by_key(|je| {
13439                        (je.header.posting_date - rpt.transaction_date)
13440                            .num_days()
13441                            .abs()
13442                    });
13443
13444                if let Some(je) = best_je {
13445                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13446                }
13447            }
13448
13449            let linked = snapshot
13450                .related_party_transactions
13451                .iter()
13452                .filter(|t| t.journal_entry_id.is_some())
13453                .count();
13454            debug!(
13455                "Linked {}/{} related party transactions to journal entries",
13456                linked,
13457                snapshot.related_party_transactions.len()
13458            );
13459        }
13460
13461        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13462        // One opinion per engagement, derived from that engagement's findings,
13463        // going-concern assessment, and any component-auditor reports. Fills
13464        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13465        // export.
13466        if !snapshot.engagements.is_empty() {
13467            use datasynth_generators::audit_opinion_generator::{
13468                AuditOpinionGenerator, AuditOpinionInput,
13469            };
13470
13471            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13472            let inputs: Vec<AuditOpinionInput> = snapshot
13473                .engagements
13474                .iter()
13475                .map(|eng| {
13476                    let findings = snapshot
13477                        .findings
13478                        .iter()
13479                        .filter(|f| f.engagement_id == eng.engagement_id)
13480                        .cloned()
13481                        .collect();
13482                    let going_concern = snapshot
13483                        .going_concern_assessments
13484                        .iter()
13485                        .find(|gc| gc.entity_code == eng.client_entity_id)
13486                        .cloned();
13487                    // ComponentAuditorReport doesn't carry an engagement id, but
13488                    // component scope is keyed by `entity_code`, so filter on that.
13489                    let component_reports = snapshot
13490                        .component_reports
13491                        .iter()
13492                        .filter(|r| r.entity_code == eng.client_entity_id)
13493                        .cloned()
13494                        .collect();
13495
13496                    AuditOpinionInput {
13497                        entity_code: eng.client_entity_id.clone(),
13498                        entity_name: eng.client_name.clone(),
13499                        engagement_id: eng.engagement_id,
13500                        period_end: eng.period_end_date,
13501                        findings,
13502                        going_concern,
13503                        component_reports,
13504                        is_us_listed: matches!(
13505                            eng.engagement_type,
13506                            datasynth_core::audit::EngagementType::IntegratedAudit
13507                                | datasynth_core::audit::EngagementType::Sox404
13508                        ),
13509                        auditor_name: "DataSynth Audit LLP".to_string(),
13510                        engagement_partner: "Engagement Partner".to_string(),
13511                    }
13512                })
13513                .collect();
13514
13515            let generated = opinion_gen.generate_batch(&inputs);
13516            for g in generated {
13517                snapshot.key_audit_matters.extend(g.key_audit_matters);
13518                snapshot.audit_opinions.push(g.opinion);
13519            }
13520            debug!(
13521                "Generated {} audit opinions with {} key audit matters",
13522                snapshot.audit_opinions.len(),
13523                snapshot.key_audit_matters.len()
13524            );
13525        }
13526
13527        Ok(snapshot)
13528    }
13529
13530    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13531    ///
13532    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13533    /// from the current orchestrator state, runs the FSM engine, and maps the
13534    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13535    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13536    fn generate_audit_data_with_fsm(
13537        &mut self,
13538        entries: &[JournalEntry],
13539    ) -> SynthResult<AuditSnapshot> {
13540        use datasynth_audit_fsm::{
13541            context::EngagementContext,
13542            engine::AuditFsmEngine,
13543            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13544        };
13545        use rand::SeedableRng;
13546        use rand_chacha::ChaCha8Rng;
13547
13548        info!("Audit FSM: generating audit data via FSM engine");
13549
13550        let fsm_config = self
13551            .config
13552            .audit
13553            .fsm
13554            .as_ref()
13555            .expect("FSM config must be present when FSM is enabled");
13556
13557        // 1. Load blueprint from config string.
13558        let bwp = match fsm_config.blueprint.as_str() {
13559            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13560            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13561            _ => {
13562                warn!(
13563                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13564                    fsm_config.blueprint
13565                );
13566                BlueprintWithPreconditions::load_builtin_fsa()
13567            }
13568        }
13569        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13570
13571        // 2. Load overlay from config string.
13572        let overlay = match fsm_config.overlay.as_str() {
13573            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13574            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13575            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13576            _ => {
13577                warn!(
13578                    "Unknown FSM overlay '{}', falling back to builtin:default",
13579                    fsm_config.overlay
13580                );
13581                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13582            }
13583        }
13584        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13585
13586        // 3. Build EngagementContext from orchestrator state.
13587        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13588            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13589        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13590
13591        // Determine the engagement entity early so we can filter JEs.
13592        let company = self.config.companies.first();
13593        let company_code = company
13594            .map(|c| c.code.clone())
13595            .unwrap_or_else(|| "UNKNOWN".to_string());
13596        let company_name = company
13597            .map(|c| c.name.clone())
13598            .unwrap_or_else(|| "Unknown Company".to_string());
13599        let currency = company
13600            .map(|c| c.currency.clone())
13601            .unwrap_or_else(|| "USD".to_string());
13602
13603        // Filter JEs to the engagement entity for single-company coherence.
13604        let entity_entries: Vec<_> = entries
13605            .iter()
13606            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13607            .cloned()
13608            .collect();
13609        let entries = &entity_entries; // Shadow the parameter for remaining usage
13610
13611        // Financial aggregates from journal entries.
13612        let total_revenue: rust_decimal::Decimal = entries
13613            .iter()
13614            .flat_map(|e| e.lines.iter())
13615            .filter(|l| l.account_code.starts_with('4'))
13616            .map(|l| l.credit_amount - l.debit_amount)
13617            .sum();
13618
13619        let total_assets: rust_decimal::Decimal = entries
13620            .iter()
13621            .flat_map(|e| e.lines.iter())
13622            .filter(|l| l.account_code.starts_with('1'))
13623            .map(|l| l.debit_amount - l.credit_amount)
13624            .sum();
13625
13626        let total_expenses: rust_decimal::Decimal = entries
13627            .iter()
13628            .flat_map(|e| e.lines.iter())
13629            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13630            .map(|l| l.debit_amount)
13631            .sum();
13632
13633        let equity: rust_decimal::Decimal = entries
13634            .iter()
13635            .flat_map(|e| e.lines.iter())
13636            .filter(|l| l.account_code.starts_with('3'))
13637            .map(|l| l.credit_amount - l.debit_amount)
13638            .sum();
13639
13640        let total_debt: rust_decimal::Decimal = entries
13641            .iter()
13642            .flat_map(|e| e.lines.iter())
13643            .filter(|l| l.account_code.starts_with('2'))
13644            .map(|l| l.credit_amount - l.debit_amount)
13645            .sum();
13646
13647        let pretax_income = total_revenue - total_expenses;
13648
13649        let cogs: rust_decimal::Decimal = entries
13650            .iter()
13651            .flat_map(|e| e.lines.iter())
13652            .filter(|l| l.account_code.starts_with('5'))
13653            .map(|l| l.debit_amount)
13654            .sum();
13655        let gross_profit = total_revenue - cogs;
13656
13657        let current_assets: rust_decimal::Decimal = entries
13658            .iter()
13659            .flat_map(|e| e.lines.iter())
13660            .filter(|l| {
13661                l.account_code.starts_with("10")
13662                    || l.account_code.starts_with("11")
13663                    || l.account_code.starts_with("12")
13664                    || l.account_code.starts_with("13")
13665            })
13666            .map(|l| l.debit_amount - l.credit_amount)
13667            .sum();
13668        let current_liabilities: rust_decimal::Decimal = entries
13669            .iter()
13670            .flat_map(|e| e.lines.iter())
13671            .filter(|l| {
13672                l.account_code.starts_with("20")
13673                    || l.account_code.starts_with("21")
13674                    || l.account_code.starts_with("22")
13675            })
13676            .map(|l| l.credit_amount - l.debit_amount)
13677            .sum();
13678        let working_capital = current_assets - current_liabilities;
13679
13680        let depreciation: rust_decimal::Decimal = entries
13681            .iter()
13682            .flat_map(|e| e.lines.iter())
13683            .filter(|l| l.account_code.starts_with("60"))
13684            .map(|l| l.debit_amount)
13685            .sum();
13686        let operating_cash_flow = pretax_income + depreciation;
13687
13688        // GL accounts for reference data.
13689        let accounts: Vec<String> = self
13690            .coa
13691            .as_ref()
13692            .map(|coa| {
13693                coa.get_postable_accounts()
13694                    .iter()
13695                    .map(|acc| acc.account_code().to_string())
13696                    .collect()
13697            })
13698            .unwrap_or_default();
13699
13700        // Team member IDs and display names from master data.
13701        let team_member_ids: Vec<String> = self
13702            .master_data
13703            .employees
13704            .iter()
13705            .take(8) // Cap team size
13706            .map(|e| e.employee_id.clone())
13707            .collect();
13708        let team_member_pairs: Vec<(String, String)> = self
13709            .master_data
13710            .employees
13711            .iter()
13712            .take(8)
13713            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13714            .collect();
13715
13716        let vendor_names: Vec<String> = self
13717            .master_data
13718            .vendors
13719            .iter()
13720            .map(|v| v.name.clone())
13721            .collect();
13722        let customer_names: Vec<String> = self
13723            .master_data
13724            .customers
13725            .iter()
13726            .map(|c| c.name.clone())
13727            .collect();
13728
13729        let entity_codes: Vec<String> = self
13730            .config
13731            .companies
13732            .iter()
13733            .map(|c| c.code.clone())
13734            .collect();
13735
13736        // Journal entry IDs for evidence tracing (sample up to 50).
13737        let journal_entry_ids: Vec<String> = entries
13738            .iter()
13739            .take(50)
13740            .map(|e| e.header.document_id.to_string())
13741            .collect();
13742
13743        // Account balances for risk weighting (aggregate debit - credit per account).
13744        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13745        for entry in entries {
13746            for line in &entry.lines {
13747                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13748                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13749                *account_balances
13750                    .entry(line.account_code.clone())
13751                    .or_insert(0.0) += debit_f64 - credit_f64;
13752            }
13753        }
13754
13755        // Internal control IDs and anomaly refs are populated by the
13756        // caller when available; here we default to empty because the
13757        // orchestrator state may not have generated controls/anomalies
13758        // yet at this point in the pipeline.
13759        let control_ids: Vec<String> = Vec::new();
13760        let anomaly_refs: Vec<String> = Vec::new();
13761
13762        let mut context = EngagementContext {
13763            company_code,
13764            company_name,
13765            fiscal_year: start_date.year(),
13766            currency,
13767            total_revenue,
13768            total_assets,
13769            engagement_start: start_date,
13770            report_date: period_end,
13771            pretax_income,
13772            equity,
13773            gross_profit,
13774            working_capital,
13775            operating_cash_flow,
13776            total_debt,
13777            team_member_ids,
13778            team_member_pairs,
13779            accounts,
13780            vendor_names,
13781            customer_names,
13782            journal_entry_ids,
13783            account_balances,
13784            control_ids,
13785            anomaly_refs,
13786            journal_entries: entries.to_vec(),
13787            is_us_listed: false,
13788            entity_codes,
13789            auditor_firm_name: "DataSynth Audit LLP".into(),
13790            accounting_framework: self
13791                .config
13792                .accounting_standards
13793                .framework
13794                .map(|f| match f {
13795                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13796                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13797                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13798                        "French GAAP"
13799                    }
13800                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13801                        "German GAAP"
13802                    }
13803                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13804                        "Dual Reporting"
13805                    }
13806                })
13807                .unwrap_or("IFRS")
13808                .into(),
13809        };
13810
13811        // 4. Create and run the FSM engine.
13812        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13813        let rng = ChaCha8Rng::seed_from_u64(seed);
13814        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13815
13816        let mut result = engine
13817            .run_engagement(&context)
13818            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13819
13820        info!(
13821            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13822             {} phases completed, duration {:.1}h",
13823            result.event_log.len(),
13824            result.artifacts.total_artifacts(),
13825            result.anomalies.len(),
13826            result.phases_completed.len(),
13827            result.total_duration_hours,
13828        );
13829
13830        // 4b. Populate financial data in the artifact bag for downstream consumers.
13831        let tb_entity = context.company_code.clone();
13832        let tb_fy = context.fiscal_year;
13833        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13834        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13835            entries,
13836            &tb_entity,
13837            tb_fy,
13838            self.coa.as_ref().map(|c| c.as_ref()),
13839        );
13840
13841        // 5. Map ArtifactBag fields to AuditSnapshot.
13842        let bag = result.artifacts;
13843        let mut snapshot = AuditSnapshot {
13844            engagements: bag.engagements,
13845            engagement_letters: bag.engagement_letters,
13846            materiality_calculations: bag.materiality_calculations,
13847            risk_assessments: bag.risk_assessments,
13848            combined_risk_assessments: bag.combined_risk_assessments,
13849            workpapers: bag.workpapers,
13850            evidence: bag.evidence,
13851            findings: bag.findings,
13852            judgments: bag.judgments,
13853            sampling_plans: bag.sampling_plans,
13854            sampled_items: bag.sampled_items,
13855            analytical_results: bag.analytical_results,
13856            going_concern_assessments: bag.going_concern_assessments,
13857            subsequent_events: bag.subsequent_events,
13858            audit_opinions: bag.audit_opinions,
13859            key_audit_matters: bag.key_audit_matters,
13860            procedure_steps: bag.procedure_steps,
13861            samples: bag.samples,
13862            confirmations: bag.confirmations,
13863            confirmation_responses: bag.confirmation_responses,
13864            // Store the event trail for downstream export.
13865            fsm_event_trail: Some(result.event_log),
13866            // Fields not produced by the FSM engine remain at their defaults.
13867            ..Default::default()
13868        };
13869
13870        // 6. Add static reference data (same as legacy path).
13871        {
13872            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13873            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13874        }
13875        {
13876            use datasynth_standards::audit::isa_reference::IsaStandard;
13877            snapshot.isa_mappings = IsaStandard::standard_entries();
13878        }
13879
13880        info!(
13881            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13882             {} risk assessments, {} findings, {} materiality calcs",
13883            snapshot.engagements.len(),
13884            snapshot.workpapers.len(),
13885            snapshot.evidence.len(),
13886            snapshot.risk_assessments.len(),
13887            snapshot.findings.len(),
13888            snapshot.materiality_calculations.len(),
13889        );
13890
13891        Ok(snapshot)
13892    }
13893
13894    /// Export journal entries as graph data for ML training and network reconstruction.
13895    ///
13896    /// Builds a transaction graph where:
13897    /// - Nodes are GL accounts
13898    /// - Edges are money flows from credit to debit accounts
13899    /// - Edge attributes include amount, date, business process, anomaly flags
13900    fn export_graphs(
13901        &mut self,
13902        entries: &[JournalEntry],
13903        _coa: &Arc<ChartOfAccounts>,
13904        stats: &mut EnhancedGenerationStatistics,
13905    ) -> SynthResult<GraphExportSnapshot> {
13906        let pb = self.create_progress_bar(100, "Exporting Graphs");
13907
13908        let mut snapshot = GraphExportSnapshot::default();
13909
13910        // Get output directory
13911        let output_dir = self
13912            .output_path
13913            .clone()
13914            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13915        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13916
13917        // Process each graph type configuration
13918        for graph_type in &self.config.graph_export.graph_types {
13919            if let Some(pb) = &pb {
13920                pb.inc(10);
13921            }
13922
13923            // Build transaction graph
13924            let graph_config = TransactionGraphConfig {
13925                include_vendors: false,
13926                include_customers: false,
13927                create_debit_credit_edges: true,
13928                include_document_nodes: graph_type.include_document_nodes,
13929                min_edge_weight: graph_type.min_edge_weight,
13930                aggregate_parallel_edges: graph_type.aggregate_edges,
13931                framework: None,
13932            };
13933
13934            let mut builder = TransactionGraphBuilder::new(graph_config);
13935            builder.add_journal_entries(entries);
13936            let graph = builder.build();
13937
13938            // Update stats
13939            stats.graph_node_count += graph.node_count();
13940            stats.graph_edge_count += graph.edge_count();
13941
13942            if let Some(pb) = &pb {
13943                pb.inc(40);
13944            }
13945
13946            // Export to each configured format
13947            for format in &self.config.graph_export.formats {
13948                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13949
13950                // Create output directory
13951                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13952                    warn!("Failed to create graph output directory: {}", e);
13953                    continue;
13954                }
13955
13956                match format {
13957                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13958                        let pyg_config = PyGExportConfig {
13959                            common: datasynth_graph::CommonExportConfig {
13960                                export_node_features: true,
13961                                export_edge_features: true,
13962                                export_node_labels: true,
13963                                export_edge_labels: true,
13964                                export_masks: true,
13965                                train_ratio: self.config.graph_export.train_ratio,
13966                                val_ratio: self.config.graph_export.validation_ratio,
13967                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13968                            },
13969                            one_hot_categoricals: false,
13970                        };
13971
13972                        let exporter = PyGExporter::new(pyg_config);
13973                        match exporter.export(&graph, &format_dir) {
13974                            Ok(metadata) => {
13975                                snapshot.exports.insert(
13976                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13977                                    GraphExportInfo {
13978                                        name: graph_type.name.clone(),
13979                                        format: "pytorch_geometric".to_string(),
13980                                        output_path: format_dir.clone(),
13981                                        node_count: metadata.num_nodes,
13982                                        edge_count: metadata.num_edges,
13983                                    },
13984                                );
13985                                snapshot.graph_count += 1;
13986                            }
13987                            Err(e) => {
13988                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13989                            }
13990                        }
13991                    }
13992                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13993                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13994
13995                        let neo4j_config = Neo4jExportConfig {
13996                            export_node_properties: true,
13997                            export_edge_properties: true,
13998                            export_features: true,
13999                            generate_cypher: true,
14000                            generate_admin_import: true,
14001                            database_name: "synth".to_string(),
14002                            cypher_batch_size: 1000,
14003                        };
14004
14005                        let exporter = Neo4jExporter::new(neo4j_config);
14006                        match exporter.export(&graph, &format_dir) {
14007                            Ok(metadata) => {
14008                                snapshot.exports.insert(
14009                                    format!("{}_{}", graph_type.name, "neo4j"),
14010                                    GraphExportInfo {
14011                                        name: graph_type.name.clone(),
14012                                        format: "neo4j".to_string(),
14013                                        output_path: format_dir.clone(),
14014                                        node_count: metadata.num_nodes,
14015                                        edge_count: metadata.num_edges,
14016                                    },
14017                                );
14018                                snapshot.graph_count += 1;
14019                            }
14020                            Err(e) => {
14021                                warn!("Failed to export Neo4j graph: {}", e);
14022                            }
14023                        }
14024                    }
14025                    datasynth_config::schema::GraphExportFormat::Dgl => {
14026                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14027
14028                        let dgl_config = DGLExportConfig {
14029                            common: datasynth_graph::CommonExportConfig {
14030                                export_node_features: true,
14031                                export_edge_features: true,
14032                                export_node_labels: true,
14033                                export_edge_labels: true,
14034                                export_masks: true,
14035                                train_ratio: self.config.graph_export.train_ratio,
14036                                val_ratio: self.config.graph_export.validation_ratio,
14037                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14038                            },
14039                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14040                            include_pickle_script: true, // DGL ecosystem standard helper
14041                        };
14042
14043                        let exporter = DGLExporter::new(dgl_config);
14044                        match exporter.export(&graph, &format_dir) {
14045                            Ok(metadata) => {
14046                                snapshot.exports.insert(
14047                                    format!("{}_{}", graph_type.name, "dgl"),
14048                                    GraphExportInfo {
14049                                        name: graph_type.name.clone(),
14050                                        format: "dgl".to_string(),
14051                                        output_path: format_dir.clone(),
14052                                        node_count: metadata.common.num_nodes,
14053                                        edge_count: metadata.common.num_edges,
14054                                    },
14055                                );
14056                                snapshot.graph_count += 1;
14057                            }
14058                            Err(e) => {
14059                                warn!("Failed to export DGL graph: {}", e);
14060                            }
14061                        }
14062                    }
14063                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14064                        use datasynth_graph::{
14065                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14066                        };
14067
14068                        let rustgraph_config = RustGraphExportConfig {
14069                            include_features: true,
14070                            include_temporal: true,
14071                            include_labels: true,
14072                            source_name: "datasynth".to_string(),
14073                            batch_id: None,
14074                            output_format: RustGraphOutputFormat::JsonLines,
14075                            export_node_properties: true,
14076                            export_edge_properties: true,
14077                            pretty_print: false,
14078                        };
14079
14080                        let exporter = RustGraphExporter::new(rustgraph_config);
14081                        match exporter.export(&graph, &format_dir) {
14082                            Ok(metadata) => {
14083                                snapshot.exports.insert(
14084                                    format!("{}_{}", graph_type.name, "rustgraph"),
14085                                    GraphExportInfo {
14086                                        name: graph_type.name.clone(),
14087                                        format: "rustgraph".to_string(),
14088                                        output_path: format_dir.clone(),
14089                                        node_count: metadata.num_nodes,
14090                                        edge_count: metadata.num_edges,
14091                                    },
14092                                );
14093                                snapshot.graph_count += 1;
14094                            }
14095                            Err(e) => {
14096                                warn!("Failed to export RustGraph: {}", e);
14097                            }
14098                        }
14099                    }
14100                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14101                        // Hypergraph export is handled separately in Phase 10b
14102                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14103                    }
14104                }
14105            }
14106
14107            if let Some(pb) = &pb {
14108                pb.inc(40);
14109            }
14110        }
14111
14112        stats.graph_export_count = snapshot.graph_count;
14113        snapshot.exported = snapshot.graph_count > 0;
14114
14115        if let Some(pb) = pb {
14116            pb.finish_with_message(format!(
14117                "Graphs exported: {} graphs ({} nodes, {} edges)",
14118                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14119            ));
14120        }
14121
14122        Ok(snapshot)
14123    }
14124
14125    /// Build additional graph types (banking, approval, entity) when relevant data
14126    /// is available. These run as a late phase because the data they need (banking
14127    /// snapshot, intercompany snapshot) is only generated after the main graph
14128    /// export phase.
14129    fn build_additional_graphs(
14130        &self,
14131        banking: &BankingSnapshot,
14132        intercompany: &IntercompanySnapshot,
14133        entries: &[JournalEntry],
14134        stats: &mut EnhancedGenerationStatistics,
14135    ) {
14136        let output_dir = self
14137            .output_path
14138            .clone()
14139            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14140        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14141
14142        // Banking graph: build when banking customers and transactions exist
14143        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14144            info!("Phase 10c: Building banking network graph");
14145            let config = BankingGraphConfig::default();
14146            let mut builder = BankingGraphBuilder::new(config);
14147            builder.add_customers(&banking.customers);
14148            builder.add_accounts(&banking.accounts, &banking.customers);
14149            builder.add_transactions(&banking.transactions);
14150            let graph = builder.build();
14151
14152            let node_count = graph.node_count();
14153            let edge_count = graph.edge_count();
14154            stats.graph_node_count += node_count;
14155            stats.graph_edge_count += edge_count;
14156
14157            // Export as PyG if configured
14158            for format in &self.config.graph_export.formats {
14159                if matches!(
14160                    format,
14161                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14162                ) {
14163                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14164                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14165                        warn!("Failed to create banking graph output dir: {}", e);
14166                        continue;
14167                    }
14168                    let pyg_config = PyGExportConfig::default();
14169                    let exporter = PyGExporter::new(pyg_config);
14170                    if let Err(e) = exporter.export(&graph, &format_dir) {
14171                        warn!("Failed to export banking graph as PyG: {}", e);
14172                    } else {
14173                        info!(
14174                            "Banking network graph exported: {} nodes, {} edges",
14175                            node_count, edge_count
14176                        );
14177                    }
14178                }
14179            }
14180        }
14181
14182        // Approval graph: build from journal entry approval workflows
14183        let approval_entries: Vec<_> = entries
14184            .iter()
14185            .filter(|je| je.header.approval_workflow.is_some())
14186            .collect();
14187
14188        if !approval_entries.is_empty() {
14189            info!(
14190                "Phase 10c: Building approval network graph ({} entries with approvals)",
14191                approval_entries.len()
14192            );
14193            let config = ApprovalGraphConfig::default();
14194            let mut builder = ApprovalGraphBuilder::new(config);
14195
14196            for je in &approval_entries {
14197                if let Some(ref wf) = je.header.approval_workflow {
14198                    for action in &wf.actions {
14199                        let record = datasynth_core::models::ApprovalRecord {
14200                            approval_id: format!(
14201                                "APR-{}-{}",
14202                                je.header.document_id, action.approval_level
14203                            ),
14204                            document_number: je.header.document_id.to_string(),
14205                            document_type: "JE".to_string(),
14206                            company_code: je.company_code().to_string(),
14207                            requester_id: wf.preparer_id.clone(),
14208                            requester_name: Some(wf.preparer_name.clone()),
14209                            approver_id: action.actor_id.clone(),
14210                            approver_name: action.actor_name.clone(),
14211                            approval_date: je.posting_date(),
14212                            action: format!("{:?}", action.action),
14213                            amount: wf.amount,
14214                            approval_limit: None,
14215                            comments: action.comments.clone(),
14216                            delegation_from: None,
14217                            is_auto_approved: false,
14218                        };
14219                        builder.add_approval(&record);
14220                    }
14221                }
14222            }
14223
14224            let graph = builder.build();
14225            let node_count = graph.node_count();
14226            let edge_count = graph.edge_count();
14227            stats.graph_node_count += node_count;
14228            stats.graph_edge_count += edge_count;
14229
14230            // Export as PyG if configured
14231            for format in &self.config.graph_export.formats {
14232                if matches!(
14233                    format,
14234                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14235                ) {
14236                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14237                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14238                        warn!("Failed to create approval graph output dir: {}", e);
14239                        continue;
14240                    }
14241                    let pyg_config = PyGExportConfig::default();
14242                    let exporter = PyGExporter::new(pyg_config);
14243                    if let Err(e) = exporter.export(&graph, &format_dir) {
14244                        warn!("Failed to export approval graph as PyG: {}", e);
14245                    } else {
14246                        info!(
14247                            "Approval network graph exported: {} nodes, {} edges",
14248                            node_count, edge_count
14249                        );
14250                    }
14251                }
14252            }
14253        }
14254
14255        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14256        if self.config.companies.len() >= 2 {
14257            info!(
14258                "Phase 10c: Building entity relationship graph ({} companies)",
14259                self.config.companies.len()
14260            );
14261
14262            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14263                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14264
14265            // Map CompanyConfig → Company objects
14266            let parent_code = &self.config.companies[0].code;
14267            let mut companies: Vec<datasynth_core::models::Company> =
14268                Vec::with_capacity(self.config.companies.len());
14269
14270            // First company is the parent
14271            let first = &self.config.companies[0];
14272            companies.push(datasynth_core::models::Company::parent(
14273                &first.code,
14274                &first.name,
14275                &first.country,
14276                &first.currency,
14277            ));
14278
14279            // Remaining companies are subsidiaries (100% owned by parent)
14280            for cc in self.config.companies.iter().skip(1) {
14281                companies.push(datasynth_core::models::Company::subsidiary(
14282                    &cc.code,
14283                    &cc.name,
14284                    &cc.country,
14285                    &cc.currency,
14286                    parent_code,
14287                    rust_decimal::Decimal::from(100),
14288                ));
14289            }
14290
14291            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14292            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14293                self.config
14294                    .companies
14295                    .iter()
14296                    .skip(1)
14297                    .enumerate()
14298                    .map(|(i, cc)| {
14299                        let mut rel =
14300                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14301                                format!("REL{:03}", i + 1),
14302                                parent_code.clone(),
14303                                cc.code.clone(),
14304                                rust_decimal::Decimal::from(100),
14305                                start_date,
14306                            );
14307                        rel.functional_currency = cc.currency.clone();
14308                        rel
14309                    })
14310                    .collect();
14311
14312            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14313            builder.add_companies(&companies);
14314            builder.add_ownership_relationships(&relationships);
14315
14316            // Thread IC matched-pair transaction edges into the entity graph
14317            for pair in &intercompany.matched_pairs {
14318                builder.add_intercompany_edge(
14319                    &pair.seller_company,
14320                    &pair.buyer_company,
14321                    pair.amount,
14322                    &format!("{:?}", pair.transaction_type),
14323                );
14324            }
14325
14326            let graph = builder.build();
14327            let node_count = graph.node_count();
14328            let edge_count = graph.edge_count();
14329            stats.graph_node_count += node_count;
14330            stats.graph_edge_count += edge_count;
14331
14332            // Export as PyG if configured
14333            for format in &self.config.graph_export.formats {
14334                if matches!(
14335                    format,
14336                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14337                ) {
14338                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14339                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14340                        warn!("Failed to create entity graph output dir: {}", e);
14341                        continue;
14342                    }
14343                    let pyg_config = PyGExportConfig::default();
14344                    let exporter = PyGExporter::new(pyg_config);
14345                    if let Err(e) = exporter.export(&graph, &format_dir) {
14346                        warn!("Failed to export entity graph as PyG: {}", e);
14347                    } else {
14348                        info!(
14349                            "Entity relationship graph exported: {} nodes, {} edges",
14350                            node_count, edge_count
14351                        );
14352                    }
14353                }
14354            }
14355        } else {
14356            debug!(
14357                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14358                self.config.companies.len()
14359            );
14360        }
14361    }
14362
14363    /// Export a multi-layer hypergraph for RustGraph integration.
14364    ///
14365    /// Builds a 3-layer hypergraph:
14366    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14367    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14368    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14369    #[allow(clippy::too_many_arguments)]
14370    fn export_hypergraph(
14371        &self,
14372        coa: &Arc<ChartOfAccounts>,
14373        entries: &[JournalEntry],
14374        document_flows: &DocumentFlowSnapshot,
14375        sourcing: &SourcingSnapshot,
14376        hr: &HrSnapshot,
14377        manufacturing: &ManufacturingSnapshot,
14378        banking: &BankingSnapshot,
14379        audit: &AuditSnapshot,
14380        financial_reporting: &FinancialReportingSnapshot,
14381        ocpm: &OcpmSnapshot,
14382        compliance: &ComplianceRegulationsSnapshot,
14383        stats: &mut EnhancedGenerationStatistics,
14384    ) -> SynthResult<HypergraphExportInfo> {
14385        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14386        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14387        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14388        use datasynth_graph::models::hypergraph::AggregationStrategy;
14389
14390        let hg_settings = &self.config.graph_export.hypergraph;
14391
14392        // Parse aggregation strategy from config string
14393        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14394            "truncate" => AggregationStrategy::Truncate,
14395            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14396            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14397            "importance_sample" => AggregationStrategy::ImportanceSample,
14398            _ => AggregationStrategy::PoolByCounterparty,
14399        };
14400
14401        let builder_config = HypergraphConfig {
14402            max_nodes: hg_settings.max_nodes,
14403            aggregation_strategy,
14404            include_coso: hg_settings.governance_layer.include_coso,
14405            include_controls: hg_settings.governance_layer.include_controls,
14406            include_sox: hg_settings.governance_layer.include_sox,
14407            include_vendors: hg_settings.governance_layer.include_vendors,
14408            include_customers: hg_settings.governance_layer.include_customers,
14409            include_employees: hg_settings.governance_layer.include_employees,
14410            include_p2p: hg_settings.process_layer.include_p2p,
14411            include_o2c: hg_settings.process_layer.include_o2c,
14412            include_s2c: hg_settings.process_layer.include_s2c,
14413            include_h2r: hg_settings.process_layer.include_h2r,
14414            include_mfg: hg_settings.process_layer.include_mfg,
14415            include_bank: hg_settings.process_layer.include_bank,
14416            include_audit: hg_settings.process_layer.include_audit,
14417            include_r2r: hg_settings.process_layer.include_r2r,
14418            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14419            docs_per_counterparty_threshold: hg_settings
14420                .process_layer
14421                .docs_per_counterparty_threshold,
14422            include_accounts: hg_settings.accounting_layer.include_accounts,
14423            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14424            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14425            include_compliance: self.config.compliance_regulations.enabled,
14426            include_tax: true,
14427            include_treasury: true,
14428            include_esg: true,
14429            include_project: true,
14430            include_intercompany: true,
14431            include_temporal_events: true,
14432        };
14433
14434        let mut builder = HypergraphBuilder::new(builder_config);
14435
14436        // Layer 1: Governance & Controls
14437        builder.add_coso_framework();
14438
14439        // Add controls if available (generated during JE generation)
14440        // Controls are generated per-company; we use the standard set
14441        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14442            let controls = InternalControl::standard_controls();
14443            builder.add_controls(&controls);
14444        }
14445
14446        // Add master data
14447        builder.add_vendors(&self.master_data.vendors);
14448        builder.add_customers(&self.master_data.customers);
14449        builder.add_employees(&self.master_data.employees);
14450
14451        // Layer 2: Process Events (all process families)
14452        builder.add_p2p_documents(
14453            &document_flows.purchase_orders,
14454            &document_flows.goods_receipts,
14455            &document_flows.vendor_invoices,
14456            &document_flows.payments,
14457        );
14458        builder.add_o2c_documents(
14459            &document_flows.sales_orders,
14460            &document_flows.deliveries,
14461            &document_flows.customer_invoices,
14462        );
14463        builder.add_s2c_documents(
14464            &sourcing.sourcing_projects,
14465            &sourcing.qualifications,
14466            &sourcing.rfx_events,
14467            &sourcing.bids,
14468            &sourcing.bid_evaluations,
14469            &sourcing.contracts,
14470        );
14471        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14472        builder.add_mfg_documents(
14473            &manufacturing.production_orders,
14474            &manufacturing.quality_inspections,
14475            &manufacturing.cycle_counts,
14476        );
14477        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14478        builder.add_audit_documents(
14479            &audit.engagements,
14480            &audit.workpapers,
14481            &audit.findings,
14482            &audit.evidence,
14483            &audit.risk_assessments,
14484            &audit.judgments,
14485            &audit.materiality_calculations,
14486            &audit.audit_opinions,
14487            &audit.going_concern_assessments,
14488        );
14489        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14490
14491        // OCPM events as hyperedges
14492        if let Some(ref event_log) = ocpm.event_log {
14493            builder.add_ocpm_events(event_log);
14494        }
14495
14496        // Compliance regulations as cross-layer nodes
14497        if self.config.compliance_regulations.enabled
14498            && hg_settings.governance_layer.include_controls
14499        {
14500            // Reconstruct ComplianceStandard objects from the registry
14501            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14502            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14503                .standard_records
14504                .iter()
14505                .filter_map(|r| {
14506                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14507                    registry.get(&sid).cloned()
14508                })
14509                .collect();
14510
14511            builder.add_compliance_regulations(
14512                &standards,
14513                &compliance.findings,
14514                &compliance.filings,
14515            );
14516        }
14517
14518        // Layer 3: Accounting Network
14519        builder.add_accounts(coa);
14520        builder.add_journal_entries_as_hyperedges(entries);
14521
14522        // Build the hypergraph
14523        let hypergraph = builder.build();
14524
14525        // Export
14526        let output_dir = self
14527            .output_path
14528            .clone()
14529            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14530        let hg_dir = output_dir
14531            .join(&self.config.graph_export.output_subdirectory)
14532            .join(&hg_settings.output_subdirectory);
14533
14534        // Branch on output format
14535        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14536            "unified" => {
14537                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14538                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14539                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14540                })?;
14541                (
14542                    metadata.num_nodes,
14543                    metadata.num_edges,
14544                    metadata.num_hyperedges,
14545                )
14546            }
14547            _ => {
14548                // "native" or any unrecognized format → use existing exporter
14549                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14550                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14551                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14552                })?;
14553                (
14554                    metadata.num_nodes,
14555                    metadata.num_edges,
14556                    metadata.num_hyperedges,
14557                )
14558            }
14559        };
14560
14561        // Stream to RustGraph ingest endpoint if configured
14562        #[cfg(feature = "streaming")]
14563        if let Some(ref target_url) = hg_settings.stream_target {
14564            use crate::stream_client::{StreamClient, StreamConfig};
14565            use std::io::Write as _;
14566
14567            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14568            let stream_config = StreamConfig {
14569                target_url: target_url.clone(),
14570                batch_size: hg_settings.stream_batch_size,
14571                api_key,
14572                ..StreamConfig::default()
14573            };
14574
14575            match StreamClient::new(stream_config) {
14576                Ok(mut client) => {
14577                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14578                    match exporter.export_to_writer(&hypergraph, &mut client) {
14579                        Ok(_) => {
14580                            if let Err(e) = client.flush() {
14581                                warn!("Failed to flush stream client: {}", e);
14582                            } else {
14583                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14584                            }
14585                        }
14586                        Err(e) => {
14587                            warn!("Streaming export failed: {}", e);
14588                        }
14589                    }
14590                }
14591                Err(e) => {
14592                    warn!("Failed to create stream client: {}", e);
14593                }
14594            }
14595        }
14596
14597        // Update stats
14598        stats.graph_node_count += num_nodes;
14599        stats.graph_edge_count += num_edges;
14600        stats.graph_export_count += 1;
14601
14602        Ok(HypergraphExportInfo {
14603            node_count: num_nodes,
14604            edge_count: num_edges,
14605            hyperedge_count: num_hyperedges,
14606            output_path: hg_dir,
14607        })
14608    }
14609
14610    /// Generate banking KYC/AML data.
14611    ///
14612    /// Creates banking customers, accounts, and transactions with AML typology injection.
14613    /// Uses the BankingOrchestrator from synth-banking crate.
14614    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14615        let pb = self.create_progress_bar(100, "Generating Banking Data");
14616
14617        // Build the banking orchestrator from config
14618        let orchestrator = BankingOrchestratorBuilder::new()
14619            .config(self.config.banking.clone())
14620            .seed(self.seed + 9000)
14621            .country_pack(self.primary_pack().clone())
14622            .build();
14623
14624        if let Some(pb) = &pb {
14625            pb.inc(10);
14626        }
14627
14628        // Generate the banking data
14629        let result = orchestrator.generate();
14630
14631        if let Some(pb) = &pb {
14632            pb.inc(90);
14633            pb.finish_with_message(format!(
14634                "Banking: {} customers, {} transactions",
14635                result.customers.len(),
14636                result.transactions.len()
14637            ));
14638        }
14639
14640        // Cross-reference banking customers with core master data so that
14641        // banking customer names align with the enterprise customer list.
14642        // We rotate through core customers, overlaying their name and country
14643        // onto the generated banking customers where possible.
14644        let mut banking_customers = result.customers;
14645        let core_customers = &self.master_data.customers;
14646        if !core_customers.is_empty() {
14647            for (i, bc) in banking_customers.iter_mut().enumerate() {
14648                let core = &core_customers[i % core_customers.len()];
14649                bc.name = CustomerName::business(&core.name);
14650                bc.residence_country = core.country.clone();
14651                bc.enterprise_customer_id = Some(core.customer_id.clone());
14652            }
14653            debug!(
14654                "Cross-referenced {} banking customers with {} core customers",
14655                banking_customers.len(),
14656                core_customers.len()
14657            );
14658        }
14659
14660        Ok(BankingSnapshot {
14661            customers: banking_customers,
14662            accounts: result.accounts,
14663            transactions: result.transactions,
14664            transaction_labels: result.transaction_labels,
14665            customer_labels: result.customer_labels,
14666            account_labels: result.account_labels,
14667            relationship_labels: result.relationship_labels,
14668            narratives: result.narratives,
14669            suspicious_count: result.stats.suspicious_count,
14670            scenario_count: result.scenarios.len(),
14671        })
14672    }
14673
14674    /// Calculate total transactions to generate.
14675    fn calculate_total_transactions(&self) -> u64 {
14676        let months = self.config.global.period_months as f64;
14677        self.config
14678            .companies
14679            .iter()
14680            .map(|c| {
14681                let annual = c.annual_transaction_volume.count() as f64;
14682                let weighted = annual * c.volume_weight;
14683                (weighted * months / 12.0) as u64
14684            })
14685            .sum()
14686    }
14687
14688    /// Create a progress bar if progress display is enabled.
14689    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14690        if !self.phase_config.show_progress {
14691            return None;
14692        }
14693
14694        let pb = if let Some(mp) = &self.multi_progress {
14695            mp.add(ProgressBar::new(total))
14696        } else {
14697            ProgressBar::new(total)
14698        };
14699
14700        pb.set_style(
14701            ProgressStyle::default_bar()
14702                .template(&format!(
14703                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14704                ))
14705                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14706                .progress_chars("#>-"),
14707        );
14708
14709        Some(pb)
14710    }
14711
14712    /// Get the generated chart of accounts.
14713    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14714        self.coa.clone()
14715    }
14716
14717    /// Get the generated master data.
14718    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14719        &self.master_data
14720    }
14721
14722    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14723    fn phase_compliance_regulations(
14724        &mut self,
14725        _stats: &mut EnhancedGenerationStatistics,
14726    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14727        if !self.phase_config.generate_compliance_regulations {
14728            return Ok(ComplianceRegulationsSnapshot::default());
14729        }
14730
14731        info!("Phase: Generating Compliance Regulations Data");
14732
14733        let cr_config = &self.config.compliance_regulations;
14734
14735        // Determine jurisdictions: from config or inferred from companies
14736        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14737            self.config
14738                .companies
14739                .iter()
14740                .map(|c| c.country.clone())
14741                .collect::<std::collections::HashSet<_>>()
14742                .into_iter()
14743                .collect()
14744        } else {
14745            cr_config.jurisdictions.clone()
14746        };
14747
14748        // Determine reference date
14749        let fallback_date =
14750            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14751        let reference_date = cr_config
14752            .reference_date
14753            .as_ref()
14754            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14755            .unwrap_or_else(|| {
14756                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14757                    .unwrap_or(fallback_date)
14758            });
14759
14760        // Generate standards registry data
14761        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14762        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14763        let cross_reference_records = reg_gen.generate_cross_reference_records();
14764        let jurisdiction_records =
14765            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14766
14767        info!(
14768            "  Standards: {} records, {} cross-references, {} jurisdictions",
14769            standard_records.len(),
14770            cross_reference_records.len(),
14771            jurisdiction_records.len()
14772        );
14773
14774        // Generate audit procedures (if enabled)
14775        let audit_procedures = if cr_config.audit_procedures.enabled {
14776            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14777                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14778                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14779                confidence_level: cr_config.audit_procedures.confidence_level,
14780                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14781            };
14782            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14783                self.seed + 9000,
14784                proc_config,
14785            );
14786            let registry = reg_gen.registry();
14787            let mut all_procs = Vec::new();
14788            for jurisdiction in &jurisdictions {
14789                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14790                all_procs.extend(procs);
14791            }
14792            info!("  Audit procedures: {}", all_procs.len());
14793            all_procs
14794        } else {
14795            Vec::new()
14796        };
14797
14798        // Generate compliance findings (if enabled)
14799        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14800            let finding_config =
14801                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14802                    finding_rate: cr_config.findings.finding_rate,
14803                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14804                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14805                    generate_remediation: cr_config.findings.generate_remediation,
14806                };
14807            let mut finding_gen =
14808                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14809                    self.seed + 9100,
14810                    finding_config,
14811                );
14812            let mut all_findings = Vec::new();
14813            for company in &self.config.companies {
14814                let company_findings =
14815                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14816                all_findings.extend(company_findings);
14817            }
14818            info!("  Compliance findings: {}", all_findings.len());
14819            all_findings
14820        } else {
14821            Vec::new()
14822        };
14823
14824        // Generate regulatory filings (if enabled)
14825        let filings = if cr_config.filings.enabled {
14826            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14827                filing_types: cr_config.filings.filing_types.clone(),
14828                generate_status_progression: cr_config.filings.generate_status_progression,
14829            };
14830            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14831                self.seed + 9200,
14832                filing_config,
14833            );
14834            let company_codes: Vec<String> = self
14835                .config
14836                .companies
14837                .iter()
14838                .map(|c| c.code.clone())
14839                .collect();
14840            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14841                .unwrap_or(fallback_date);
14842            let filings = filing_gen.generate_filings(
14843                &company_codes,
14844                &jurisdictions,
14845                start_date,
14846                self.config.global.period_months,
14847            );
14848            info!("  Regulatory filings: {}", filings.len());
14849            filings
14850        } else {
14851            Vec::new()
14852        };
14853
14854        // Build compliance graph (if enabled)
14855        let compliance_graph = if cr_config.graph.enabled {
14856            let graph_config = datasynth_graph::ComplianceGraphConfig {
14857                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14858                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14859                include_cross_references: cr_config.graph.include_cross_references,
14860                include_supersession_edges: cr_config.graph.include_supersession_edges,
14861                include_account_links: cr_config.graph.include_account_links,
14862                include_control_links: cr_config.graph.include_control_links,
14863                include_company_links: cr_config.graph.include_company_links,
14864            };
14865            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14866
14867            // Add standard nodes
14868            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14869                .iter()
14870                .map(|r| datasynth_graph::StandardNodeInput {
14871                    standard_id: r.standard_id.clone(),
14872                    title: r.title.clone(),
14873                    category: r.category.clone(),
14874                    domain: r.domain.clone(),
14875                    is_active: r.is_active,
14876                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14877                    applicable_account_types: r.applicable_account_types.clone(),
14878                    applicable_processes: r.applicable_processes.clone(),
14879                })
14880                .collect();
14881            builder.add_standards(&standard_inputs);
14882
14883            // Add jurisdiction nodes
14884            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14885                jurisdiction_records
14886                    .iter()
14887                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14888                        country_code: r.country_code.clone(),
14889                        country_name: r.country_name.clone(),
14890                        framework: r.accounting_framework.clone(),
14891                        standard_count: r.standard_count,
14892                        tax_rate: r.statutory_tax_rate,
14893                    })
14894                    .collect();
14895            builder.add_jurisdictions(&jurisdiction_inputs);
14896
14897            // Add cross-reference edges
14898            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14899                cross_reference_records
14900                    .iter()
14901                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14902                        from_standard: r.from_standard.clone(),
14903                        to_standard: r.to_standard.clone(),
14904                        relationship: r.relationship.clone(),
14905                        convergence_level: r.convergence_level,
14906                    })
14907                    .collect();
14908            builder.add_cross_references(&xref_inputs);
14909
14910            // Add jurisdiction→standard mappings
14911            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14912                .iter()
14913                .map(|r| datasynth_graph::JurisdictionMappingInput {
14914                    country_code: r.jurisdiction.clone(),
14915                    standard_id: r.standard_id.clone(),
14916                })
14917                .collect();
14918            builder.add_jurisdiction_mappings(&mapping_inputs);
14919
14920            // Add procedure nodes
14921            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14922                .iter()
14923                .map(|p| datasynth_graph::ProcedureNodeInput {
14924                    procedure_id: p.procedure_id.clone(),
14925                    standard_id: p.standard_id.clone(),
14926                    procedure_type: p.procedure_type.clone(),
14927                    sample_size: p.sample_size,
14928                    confidence_level: p.confidence_level,
14929                })
14930                .collect();
14931            builder.add_procedures(&proc_inputs);
14932
14933            // Add finding nodes
14934            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14935                .iter()
14936                .map(|f| datasynth_graph::FindingNodeInput {
14937                    finding_id: f.finding_id.to_string(),
14938                    standard_id: f
14939                        .related_standards
14940                        .first()
14941                        .map(|s| s.as_str().to_string())
14942                        .unwrap_or_default(),
14943                    severity: f.severity.to_string(),
14944                    deficiency_level: f.deficiency_level.to_string(),
14945                    severity_score: f.deficiency_level.severity_score(),
14946                    control_id: f.control_id.clone(),
14947                    affected_accounts: f.affected_accounts.clone(),
14948                })
14949                .collect();
14950            builder.add_findings(&finding_inputs);
14951
14952            // Cross-domain: link standards to accounts from chart of accounts
14953            if cr_config.graph.include_account_links {
14954                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14955                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14956                for std_record in &standard_records {
14957                    if let Some(std_obj) =
14958                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14959                            &std_record.standard_id,
14960                        ))
14961                    {
14962                        for acct_type in &std_obj.applicable_account_types {
14963                            account_links.push(datasynth_graph::AccountLinkInput {
14964                                standard_id: std_record.standard_id.clone(),
14965                                account_code: acct_type.clone(),
14966                                account_name: acct_type.clone(),
14967                            });
14968                        }
14969                    }
14970                }
14971                builder.add_account_links(&account_links);
14972            }
14973
14974            // Cross-domain: link standards to internal controls
14975            if cr_config.graph.include_control_links {
14976                let mut control_links = Vec::new();
14977                // SOX/PCAOB standards link to all controls
14978                let sox_like_ids: Vec<String> = standard_records
14979                    .iter()
14980                    .filter(|r| {
14981                        r.standard_id.starts_with("SOX")
14982                            || r.standard_id.starts_with("PCAOB-AS-2201")
14983                    })
14984                    .map(|r| r.standard_id.clone())
14985                    .collect();
14986                // Get control IDs from config (C001-C060 standard controls)
14987                let control_ids = [
14988                    ("C001", "Cash Controls"),
14989                    ("C002", "Large Transaction Approval"),
14990                    ("C010", "PO Approval"),
14991                    ("C011", "Three-Way Match"),
14992                    ("C020", "Revenue Recognition"),
14993                    ("C021", "Credit Check"),
14994                    ("C030", "Manual JE Approval"),
14995                    ("C031", "Period Close Review"),
14996                    ("C032", "Account Reconciliation"),
14997                    ("C040", "Payroll Processing"),
14998                    ("C050", "Fixed Asset Capitalization"),
14999                    ("C060", "Intercompany Elimination"),
15000                ];
15001                for sox_id in &sox_like_ids {
15002                    for (ctrl_id, ctrl_name) in &control_ids {
15003                        control_links.push(datasynth_graph::ControlLinkInput {
15004                            standard_id: sox_id.clone(),
15005                            control_id: ctrl_id.to_string(),
15006                            control_name: ctrl_name.to_string(),
15007                        });
15008                    }
15009                }
15010                builder.add_control_links(&control_links);
15011            }
15012
15013            // Cross-domain: filing nodes with company links
15014            if cr_config.graph.include_company_links {
15015                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15016                    .iter()
15017                    .enumerate()
15018                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15019                        filing_id: format!("F{:04}", i + 1),
15020                        filing_type: f.filing_type.to_string(),
15021                        company_code: f.company_code.clone(),
15022                        jurisdiction: f.jurisdiction.clone(),
15023                        status: format!("{:?}", f.status),
15024                    })
15025                    .collect();
15026                builder.add_filings(&filing_inputs);
15027            }
15028
15029            let graph = builder.build();
15030            info!(
15031                "  Compliance graph: {} nodes, {} edges",
15032                graph.nodes.len(),
15033                graph.edges.len()
15034            );
15035            Some(graph)
15036        } else {
15037            None
15038        };
15039
15040        self.check_resources_with_log("post-compliance-regulations")?;
15041
15042        Ok(ComplianceRegulationsSnapshot {
15043            standard_records,
15044            cross_reference_records,
15045            jurisdiction_records,
15046            audit_procedures,
15047            findings,
15048            filings,
15049            compliance_graph,
15050        })
15051    }
15052
15053    /// Build a lineage graph describing config → phase → output relationships.
15054    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15055        use super::lineage::LineageGraphBuilder;
15056
15057        let mut builder = LineageGraphBuilder::new();
15058
15059        // Config sections
15060        builder.add_config_section("config:global", "Global Config");
15061        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15062        builder.add_config_section("config:transactions", "Transaction Config");
15063
15064        // Generator phases
15065        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15066        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15067
15068        // Config → phase edges
15069        builder.configured_by("phase:coa", "config:chart_of_accounts");
15070        builder.configured_by("phase:je", "config:transactions");
15071
15072        // Output files
15073        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15074        builder.produced_by("output:je", "phase:je");
15075
15076        // Optional phases based on config
15077        if self.phase_config.generate_master_data {
15078            builder.add_config_section("config:master_data", "Master Data Config");
15079            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15080            builder.configured_by("phase:master_data", "config:master_data");
15081            builder.input_to("phase:master_data", "phase:je");
15082        }
15083
15084        if self.phase_config.generate_document_flows {
15085            builder.add_config_section("config:document_flows", "Document Flow Config");
15086            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15087            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15088            builder.configured_by("phase:p2p", "config:document_flows");
15089            builder.configured_by("phase:o2c", "config:document_flows");
15090
15091            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15092            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15093            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15094            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15095            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15096
15097            builder.produced_by("output:po", "phase:p2p");
15098            builder.produced_by("output:gr", "phase:p2p");
15099            builder.produced_by("output:vi", "phase:p2p");
15100            builder.produced_by("output:so", "phase:o2c");
15101            builder.produced_by("output:ci", "phase:o2c");
15102        }
15103
15104        if self.phase_config.inject_anomalies {
15105            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15106            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15107            builder.configured_by("phase:anomaly", "config:fraud");
15108            builder.add_output_file(
15109                "output:labels",
15110                "Anomaly Labels",
15111                "labels/anomaly_labels.csv",
15112            );
15113            builder.produced_by("output:labels", "phase:anomaly");
15114        }
15115
15116        if self.phase_config.generate_audit {
15117            builder.add_config_section("config:audit", "Audit Config");
15118            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15119            builder.configured_by("phase:audit", "config:audit");
15120        }
15121
15122        if self.phase_config.generate_banking {
15123            builder.add_config_section("config:banking", "Banking Config");
15124            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15125            builder.configured_by("phase:banking", "config:banking");
15126        }
15127
15128        if self.config.llm.enabled {
15129            builder.add_config_section("config:llm", "LLM Enrichment Config");
15130            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15131            builder.configured_by("phase:llm_enrichment", "config:llm");
15132        }
15133
15134        if self.config.diffusion.enabled {
15135            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15136            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15137            builder.configured_by("phase:diffusion", "config:diffusion");
15138        }
15139
15140        if self.config.causal.enabled {
15141            builder.add_config_section("config:causal", "Causal Generation Config");
15142            builder.add_generator_phase("phase:causal", "Causal Overlay");
15143            builder.configured_by("phase:causal", "config:causal");
15144        }
15145
15146        builder.build()
15147    }
15148
15149    // -----------------------------------------------------------------------
15150    // Trial-balance helpers used to replace hardcoded proxy values
15151    // -----------------------------------------------------------------------
15152
15153    /// Compute total revenue for a company from its journal entries.
15154    ///
15155    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15156    /// net credits on all revenue-account lines filtered to `company_code`.
15157    fn compute_company_revenue(
15158        entries: &[JournalEntry],
15159        company_code: &str,
15160    ) -> rust_decimal::Decimal {
15161        use rust_decimal::Decimal;
15162        let mut revenue = Decimal::ZERO;
15163        for je in entries {
15164            if je.header.company_code != company_code {
15165                continue;
15166            }
15167            for line in &je.lines {
15168                if line.gl_account.starts_with('4') {
15169                    // Revenue is credit-normal
15170                    revenue += line.credit_amount - line.debit_amount;
15171                }
15172            }
15173        }
15174        revenue.max(Decimal::ZERO)
15175    }
15176
15177    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15178    ///
15179    /// Asset accounts start with "1"; liability accounts start with "2".
15180    fn compute_entity_net_assets(
15181        entries: &[JournalEntry],
15182        entity_code: &str,
15183    ) -> rust_decimal::Decimal {
15184        use rust_decimal::Decimal;
15185        let mut asset_net = Decimal::ZERO;
15186        let mut liability_net = Decimal::ZERO;
15187        for je in entries {
15188            if je.header.company_code != entity_code {
15189                continue;
15190            }
15191            for line in &je.lines {
15192                if line.gl_account.starts_with('1') {
15193                    asset_net += line.debit_amount - line.credit_amount;
15194                } else if line.gl_account.starts_with('2') {
15195                    liability_net += line.credit_amount - line.debit_amount;
15196                }
15197            }
15198        }
15199        asset_net - liability_net
15200    }
15201
15202    /// v3.5.1+: Run the statistical validation suite configured in
15203    /// `distributions.validation.tests` over the final amount
15204    /// distribution.  Collects every non-zero line-level amount (debit +
15205    /// credit) and hands it to the runners in
15206    /// `datasynth_core::distributions::validation`.
15207    ///
15208    /// Returns `Ok(None)` when validation is disabled (the default).
15209    /// When `reporting.fail_on_error = true` and any test fails, returns
15210    /// `Err` with a concise message; otherwise attaches the report to
15211    /// the result and lets callers inspect it.
15212    fn phase_statistical_validation(
15213        &self,
15214        entries: &[JournalEntry],
15215    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15216        use datasynth_config::schema::StatisticalTestConfig;
15217        use datasynth_core::distributions::{
15218            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15219            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15220        };
15221        use rust_decimal::prelude::ToPrimitive;
15222
15223        let cfg = &self.config.distributions.validation;
15224        if !cfg.enabled {
15225            return Ok(None);
15226        }
15227
15228        // Collect per-line positive amounts (debit + credit is zero on the
15229        // non-posting side, so this naturally picks the magnitude).
15230        let amounts: Vec<rust_decimal::Decimal> = entries
15231            .iter()
15232            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15233            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15234            .collect();
15235
15236        // v4.1.0+ paired (amount, line_count) per entry for correlation
15237        // checks. Amount per entry is the debit-side total (= credit-side
15238        // total for a balanced entry).
15239        let paired_amount_linecount: Vec<(f64, f64)> = entries
15240            .iter()
15241            .filter_map(|je| {
15242                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15243                if amt > rust_decimal::Decimal::ZERO {
15244                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15245                } else {
15246                    None
15247                }
15248            })
15249            .collect();
15250
15251        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15252        for test_cfg in &cfg.tests {
15253            match test_cfg {
15254                StatisticalTestConfig::BenfordFirstDigit {
15255                    threshold_mad,
15256                    warning_mad,
15257                } => {
15258                    results.push(run_benford_first_digit(
15259                        &amounts,
15260                        *threshold_mad,
15261                        *warning_mad,
15262                    ));
15263                }
15264                StatisticalTestConfig::ChiSquared { bins, significance } => {
15265                    results.push(run_chi_squared(&amounts, *bins, *significance));
15266                }
15267                StatisticalTestConfig::DistributionFit {
15268                    target: _,
15269                    ks_significance,
15270                    method: _,
15271                } => {
15272                    // v3.5.1+: log-uniformity KS check. Target-specific
15273                    // fits against Normal / Exponential land in v4.1.1+.
15274                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15275                }
15276                StatisticalTestConfig::AndersonDarling {
15277                    target: _,
15278                    significance,
15279                } => {
15280                    // v4.1.0+: A*² statistic against log-normal on the
15281                    // log-scale. Other targets follow the same pattern.
15282                    results.push(run_anderson_darling(&amounts, *significance));
15283                }
15284                StatisticalTestConfig::CorrelationCheck {
15285                    expected_correlations,
15286                } => {
15287                    // v4.1.0+: (amount, line_count) is tracked today.
15288                    // Other pairs resolve to Skipped pending richer
15289                    // per-entry attribute collection.
15290                    if expected_correlations.is_empty() {
15291                        results.push(StatisticalTestResult {
15292                            name: "correlation_check".to_string(),
15293                            outcome: TestOutcome::Skipped,
15294                            statistic: 0.0,
15295                            threshold: 0.0,
15296                            message: "no expected correlations declared".to_string(),
15297                        });
15298                    } else {
15299                        for ec in expected_correlations {
15300                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15301                            let is_amount_linecount = (ec.field1 == "amount"
15302                                && ec.field2 == "line_count")
15303                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15304                            if is_amount_linecount {
15305                                let xs: Vec<f64> =
15306                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15307                                let ys: Vec<f64> =
15308                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15309                                results.push(run_correlation_check(
15310                                    &pair_key,
15311                                    &xs,
15312                                    &ys,
15313                                    ec.expected_r,
15314                                    ec.tolerance,
15315                                ));
15316                            } else {
15317                                results.push(StatisticalTestResult {
15318                                    name: format!("correlation_check_{pair_key}"),
15319                                    outcome: TestOutcome::Skipped,
15320                                    statistic: 0.0,
15321                                    threshold: ec.tolerance,
15322                                    message: format!(
15323                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15324                                        ec.field1, ec.field2
15325                                    ),
15326                                });
15327                            }
15328                        }
15329                    }
15330                }
15331            }
15332        }
15333
15334        let report = StatisticalValidationReport {
15335            sample_count: amounts.len(),
15336            results,
15337        };
15338
15339        if cfg.reporting.fail_on_error && !report.all_passed() {
15340            let failed = report.failed_names().join(", ");
15341            return Err(SynthError::validation(format!(
15342                "statistical validation failed: {failed}"
15343            )));
15344        }
15345
15346        Ok(Some(report))
15347    }
15348
15349    /// v3.3.0: analytics-metadata phase.
15350    ///
15351    /// Runs AFTER all JE-adding phases (including Phase 20b's
15352    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15353    /// gated by an individual `analytics_metadata.<flag>` toggle:
15354    ///
15355    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15356    ///    current-period account balances.
15357    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15358    ///    configured `global.industry`.
15359    /// 3. `ManagementReportGenerator` — management-report artefacts.
15360    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15361    fn phase_analytics_metadata(
15362        &mut self,
15363        entries: &[JournalEntry],
15364    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15365        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15366        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15367        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15368        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15369        use std::collections::BTreeMap;
15370
15371        let mut snap = AnalyticsMetadataSnapshot::default();
15372
15373        if !self.phase_config.generate_analytics_metadata {
15374            return Ok(snap);
15375        }
15376
15377        let cfg = &self.config.analytics_metadata;
15378        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15379            .map(|d| d.year())
15380            .unwrap_or(2025);
15381
15382        // ---- 1. Prior-year comparatives ----
15383        if cfg.prior_year {
15384            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15385            for company in &self.config.companies {
15386                // Aggregate current-period balances per account code +
15387                // account name from the entries slice.
15388                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15389                    BTreeMap::new();
15390                for je in entries {
15391                    if je.header.company_code != company.code {
15392                        continue;
15393                    }
15394                    for line in &je.lines {
15395                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15396                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15397                        });
15398                        entry.1 += line.debit_amount - line.credit_amount;
15399                    }
15400                }
15401                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15402                    .into_iter()
15403                    .filter(|(_, (_, bal))| !bal.is_zero())
15404                    .map(|(code, (name, bal))| (code, name, bal))
15405                    .collect();
15406                if !current.is_empty() {
15407                    let comparatives =
15408                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15409                    snap.prior_year_comparatives.extend(comparatives);
15410                }
15411            }
15412            info!(
15413                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15414                snap.prior_year_comparatives.len(),
15415                self.config.companies.len()
15416            );
15417        }
15418
15419        // ---- 2. Industry benchmarks ----
15420        if cfg.industry_benchmark {
15421            use datasynth_core::models::IndustrySector;
15422            let industry = match self.config.global.industry {
15423                IndustrySector::Manufacturing => "manufacturing",
15424                IndustrySector::Retail => "retail",
15425                IndustrySector::FinancialServices => "financial_services",
15426                IndustrySector::Technology => "technology",
15427                IndustrySector::Healthcare => "healthcare",
15428                _ => "other",
15429            };
15430            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15431            let benchmarks = gen.generate(industry, fiscal_year);
15432            info!(
15433                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15434                benchmarks.len()
15435            );
15436            snap.industry_benchmarks = benchmarks;
15437        }
15438
15439        // ---- 3. Management reports ----
15440        if cfg.management_reports {
15441            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15442            let period_months = self.config.global.period_months;
15443            for company in &self.config.companies {
15444                let reports =
15445                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15446                snap.management_reports.extend(reports);
15447            }
15448            info!(
15449                "v3.3.0 analytics: {} management reports across {} companies",
15450                snap.management_reports.len(),
15451                self.config.companies.len()
15452            );
15453        }
15454
15455        // ---- 4. Drift-event labels ----
15456        if cfg.drift_events {
15457            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15458                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15459            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15460                .unwrap_or(fallback_start);
15461            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15462            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15463            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15464            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15465            snap.drift_events = drifts;
15466        }
15467        // `entries` parameter reserved for future JE-aware drift detection
15468        let _ = entries;
15469
15470        Ok(snap)
15471    }
15472}
15473
15474/// Get the directory name for a graph export format.
15475fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15476    match format {
15477        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15478        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15479        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15480        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15481        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15482    }
15483}
15484
15485/// Aggregate journal entry lines into per-account trial balance rows.
15486///
15487/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15488/// debit/credit totals and a net balance (debit minus credit).
15489fn compute_trial_balance_entries(
15490    entries: &[JournalEntry],
15491    entity_code: &str,
15492    fiscal_year: i32,
15493    coa: Option<&ChartOfAccounts>,
15494) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15495    use std::collections::BTreeMap;
15496
15497    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15498        BTreeMap::new();
15499
15500    for je in entries {
15501        for line in &je.lines {
15502            let entry = balances.entry(line.account_code.clone()).or_default();
15503            entry.0 += line.debit_amount;
15504            entry.1 += line.credit_amount;
15505        }
15506    }
15507
15508    balances
15509        .into_iter()
15510        .map(
15511            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15512                account_description: coa
15513                    .and_then(|c| c.get_account(&account_code))
15514                    .map(|a| a.description().to_string())
15515                    .unwrap_or_else(|| account_code.clone()),
15516                account_code,
15517                debit_balance: debit,
15518                credit_balance: credit,
15519                net_balance: debit - credit,
15520                entity_code: entity_code.to_string(),
15521                period: format!("FY{}", fiscal_year),
15522            },
15523        )
15524        .collect()
15525}
15526
15527#[cfg(test)]
15528#[allow(clippy::unwrap_used)]
15529mod tests {
15530    use super::*;
15531    use datasynth_config::schema::*;
15532
15533    fn create_test_config() -> GeneratorConfig {
15534        GeneratorConfig {
15535            global: GlobalConfig {
15536                industry: IndustrySector::Manufacturing,
15537                start_date: "2024-01-01".to_string(),
15538                period_months: 1,
15539                seed: Some(42),
15540                parallel: false,
15541                group_currency: "USD".to_string(),
15542                presentation_currency: None,
15543                worker_threads: 0,
15544                memory_limit_mb: 0,
15545                fiscal_year_months: None,
15546            },
15547            companies: vec![CompanyConfig {
15548                code: "1000".to_string(),
15549                name: "Test Company".to_string(),
15550                currency: "USD".to_string(),
15551                functional_currency: None,
15552                country: "US".to_string(),
15553                annual_transaction_volume: TransactionVolume::TenK,
15554                volume_weight: 1.0,
15555                fiscal_year_variant: "K4".to_string(),
15556            }],
15557            chart_of_accounts: ChartOfAccountsConfig {
15558                complexity: CoAComplexity::Small,
15559                industry_specific: true,
15560                custom_accounts: None,
15561                min_hierarchy_depth: 2,
15562                max_hierarchy_depth: 4,
15563                expand_industry_subaccounts: false,
15564            },
15565            transactions: TransactionConfig::default(),
15566            output: OutputConfig::default(),
15567            fraud: FraudConfig::default(),
15568            internal_controls: InternalControlsConfig::default(),
15569            business_processes: BusinessProcessConfig::default(),
15570            user_personas: UserPersonaConfig::default(),
15571            templates: TemplateConfig::default(),
15572            approval: ApprovalConfig::default(),
15573            departments: DepartmentConfig::default(),
15574            master_data: MasterDataConfig::default(),
15575            document_flows: DocumentFlowConfig::default(),
15576            intercompany: IntercompanyConfig::default(),
15577            balance: BalanceConfig::default(),
15578            ocpm: OcpmConfig::default(),
15579            audit: AuditGenerationConfig::default(),
15580            banking: datasynth_banking::BankingConfig::default(),
15581            data_quality: DataQualitySchemaConfig::default(),
15582            scenario: ScenarioConfig::default(),
15583            temporal: TemporalDriftConfig::default(),
15584            graph_export: GraphExportConfig::default(),
15585            streaming: StreamingSchemaConfig::default(),
15586            rate_limit: RateLimitSchemaConfig::default(),
15587            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15588            relationships: RelationshipSchemaConfig::default(),
15589            accounting_standards: AccountingStandardsConfig::default(),
15590            audit_standards: AuditStandardsConfig::default(),
15591            distributions: Default::default(),
15592            temporal_patterns: Default::default(),
15593            vendor_network: VendorNetworkSchemaConfig::default(),
15594            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15595            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15596            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15597            organizational_events: OrganizationalEventsSchemaConfig::default(),
15598            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15599            market_drift: MarketDriftSchemaConfig::default(),
15600            drift_labeling: DriftLabelingSchemaConfig::default(),
15601            anomaly_injection: Default::default(),
15602            industry_specific: Default::default(),
15603            fingerprint_privacy: Default::default(),
15604            quality_gates: Default::default(),
15605            compliance: Default::default(),
15606            webhooks: Default::default(),
15607            llm: Default::default(),
15608            diffusion: Default::default(),
15609            causal: Default::default(),
15610            source_to_pay: Default::default(),
15611            financial_reporting: Default::default(),
15612            hr: Default::default(),
15613            manufacturing: Default::default(),
15614            sales_quotes: Default::default(),
15615            tax: Default::default(),
15616            treasury: Default::default(),
15617            project_accounting: Default::default(),
15618            esg: Default::default(),
15619            country_packs: None,
15620            scenarios: Default::default(),
15621            session: Default::default(),
15622            compliance_regulations: Default::default(),
15623            analytics_metadata: Default::default(),
15624        }
15625    }
15626
15627    #[test]
15628    fn test_enhanced_orchestrator_creation() {
15629        let config = create_test_config();
15630        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15631        assert!(orchestrator.is_ok());
15632    }
15633
15634    #[test]
15635    fn test_minimal_generation() {
15636        let config = create_test_config();
15637        let phase_config = PhaseConfig {
15638            generate_master_data: false,
15639            generate_document_flows: false,
15640            generate_journal_entries: true,
15641            inject_anomalies: false,
15642            show_progress: false,
15643            ..Default::default()
15644        };
15645
15646        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15647        let result = orchestrator.generate();
15648
15649        assert!(result.is_ok());
15650        let result = result.unwrap();
15651        assert!(!result.journal_entries.is_empty());
15652    }
15653
15654    #[test]
15655    fn test_master_data_generation() {
15656        let config = create_test_config();
15657        let phase_config = PhaseConfig {
15658            generate_master_data: true,
15659            generate_document_flows: false,
15660            generate_journal_entries: false,
15661            inject_anomalies: false,
15662            show_progress: false,
15663            vendors_per_company: 5,
15664            customers_per_company: 5,
15665            materials_per_company: 10,
15666            assets_per_company: 5,
15667            employees_per_company: 10,
15668            ..Default::default()
15669        };
15670
15671        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15672        let result = orchestrator.generate().unwrap();
15673
15674        assert!(!result.master_data.vendors.is_empty());
15675        assert!(!result.master_data.customers.is_empty());
15676        assert!(!result.master_data.materials.is_empty());
15677    }
15678
15679    #[test]
15680    fn test_document_flow_generation() {
15681        let config = create_test_config();
15682        let phase_config = PhaseConfig {
15683            generate_master_data: true,
15684            generate_document_flows: true,
15685            generate_journal_entries: false,
15686            inject_anomalies: false,
15687            inject_data_quality: false,
15688            validate_balances: false,
15689            validate_coa_coverage_strict: false,
15690            generate_ocpm_events: false,
15691            show_progress: false,
15692            vendors_per_company: 5,
15693            customers_per_company: 5,
15694            materials_per_company: 10,
15695            assets_per_company: 5,
15696            employees_per_company: 10,
15697            p2p_chains: 5,
15698            o2c_chains: 5,
15699            ..Default::default()
15700        };
15701
15702        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15703        let result = orchestrator.generate().unwrap();
15704
15705        // Should have generated P2P and O2C chains
15706        assert!(!result.document_flows.p2p_chains.is_empty());
15707        assert!(!result.document_flows.o2c_chains.is_empty());
15708
15709        // Flattened documents should be populated
15710        assert!(!result.document_flows.purchase_orders.is_empty());
15711        assert!(!result.document_flows.sales_orders.is_empty());
15712    }
15713
15714    #[test]
15715    fn test_anomaly_injection() {
15716        let config = create_test_config();
15717        let phase_config = PhaseConfig {
15718            generate_master_data: false,
15719            generate_document_flows: false,
15720            generate_journal_entries: true,
15721            inject_anomalies: true,
15722            show_progress: false,
15723            ..Default::default()
15724        };
15725
15726        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15727        let result = orchestrator.generate().unwrap();
15728
15729        // Should have journal entries
15730        assert!(!result.journal_entries.is_empty());
15731
15732        // With ~833 entries and 2% rate, expect some anomalies
15733        // Note: This is probabilistic, so we just verify the structure exists
15734        assert!(result.anomaly_labels.summary.is_some());
15735    }
15736
15737    #[test]
15738    fn test_full_generation_pipeline() {
15739        let config = create_test_config();
15740        let phase_config = PhaseConfig {
15741            generate_master_data: true,
15742            generate_document_flows: true,
15743            generate_journal_entries: true,
15744            inject_anomalies: false,
15745            inject_data_quality: false,
15746            validate_balances: true,
15747            validate_coa_coverage_strict: false,
15748            generate_ocpm_events: false,
15749            show_progress: false,
15750            vendors_per_company: 3,
15751            customers_per_company: 3,
15752            materials_per_company: 5,
15753            assets_per_company: 3,
15754            employees_per_company: 5,
15755            p2p_chains: 3,
15756            o2c_chains: 3,
15757            ..Default::default()
15758        };
15759
15760        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15761        let result = orchestrator.generate().unwrap();
15762
15763        // All phases should have results
15764        assert!(!result.master_data.vendors.is_empty());
15765        assert!(!result.master_data.customers.is_empty());
15766        assert!(!result.document_flows.p2p_chains.is_empty());
15767        assert!(!result.document_flows.o2c_chains.is_empty());
15768        assert!(!result.journal_entries.is_empty());
15769        assert!(result.statistics.accounts_count > 0);
15770
15771        // Subledger linking should have run
15772        assert!(!result.subledger.ap_invoices.is_empty());
15773        assert!(!result.subledger.ar_invoices.is_empty());
15774
15775        // Balance validation should have run
15776        assert!(result.balance_validation.validated);
15777        assert!(result.balance_validation.entries_processed > 0);
15778    }
15779
15780    #[test]
15781    fn test_subledger_linking() {
15782        let config = create_test_config();
15783        let phase_config = PhaseConfig {
15784            generate_master_data: true,
15785            generate_document_flows: true,
15786            generate_journal_entries: false,
15787            inject_anomalies: false,
15788            inject_data_quality: false,
15789            validate_balances: false,
15790            validate_coa_coverage_strict: false,
15791            generate_ocpm_events: false,
15792            show_progress: false,
15793            vendors_per_company: 5,
15794            customers_per_company: 5,
15795            materials_per_company: 10,
15796            assets_per_company: 3,
15797            employees_per_company: 5,
15798            p2p_chains: 5,
15799            o2c_chains: 5,
15800            ..Default::default()
15801        };
15802
15803        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15804        let result = orchestrator.generate().unwrap();
15805
15806        // Should have document flows
15807        assert!(!result.document_flows.vendor_invoices.is_empty());
15808        assert!(!result.document_flows.customer_invoices.is_empty());
15809
15810        // Subledger should be linked from document flows
15811        assert!(!result.subledger.ap_invoices.is_empty());
15812        assert!(!result.subledger.ar_invoices.is_empty());
15813
15814        // AP invoices count should match vendor invoices count
15815        assert_eq!(
15816            result.subledger.ap_invoices.len(),
15817            result.document_flows.vendor_invoices.len()
15818        );
15819
15820        // AR invoices count should match customer invoices count
15821        assert_eq!(
15822            result.subledger.ar_invoices.len(),
15823            result.document_flows.customer_invoices.len()
15824        );
15825
15826        // Statistics should reflect subledger counts
15827        assert_eq!(
15828            result.statistics.ap_invoice_count,
15829            result.subledger.ap_invoices.len()
15830        );
15831        assert_eq!(
15832            result.statistics.ar_invoice_count,
15833            result.subledger.ar_invoices.len()
15834        );
15835    }
15836
15837    #[test]
15838    fn test_balance_validation() {
15839        let config = create_test_config();
15840        let phase_config = PhaseConfig {
15841            generate_master_data: false,
15842            generate_document_flows: false,
15843            generate_journal_entries: true,
15844            inject_anomalies: false,
15845            validate_balances: true,
15846            validate_coa_coverage_strict: false,
15847            show_progress: false,
15848            ..Default::default()
15849        };
15850
15851        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15852        let result = orchestrator.generate().unwrap();
15853
15854        // Balance validation should run
15855        assert!(result.balance_validation.validated);
15856        assert!(result.balance_validation.entries_processed > 0);
15857
15858        // Generated JEs should be balanced (no unbalanced entries)
15859        assert!(!result.balance_validation.has_unbalanced_entries);
15860
15861        // Total debits should equal total credits
15862        assert_eq!(
15863            result.balance_validation.total_debits,
15864            result.balance_validation.total_credits
15865        );
15866    }
15867
15868    #[test]
15869    fn test_statistics_accuracy() {
15870        let config = create_test_config();
15871        let phase_config = PhaseConfig {
15872            generate_master_data: true,
15873            generate_document_flows: false,
15874            generate_journal_entries: true,
15875            inject_anomalies: false,
15876            show_progress: false,
15877            vendors_per_company: 10,
15878            customers_per_company: 20,
15879            materials_per_company: 15,
15880            assets_per_company: 5,
15881            employees_per_company: 8,
15882            ..Default::default()
15883        };
15884
15885        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15886        let result = orchestrator.generate().unwrap();
15887
15888        // Statistics should match actual data
15889        assert_eq!(
15890            result.statistics.vendor_count,
15891            result.master_data.vendors.len()
15892        );
15893        assert_eq!(
15894            result.statistics.customer_count,
15895            result.master_data.customers.len()
15896        );
15897        assert_eq!(
15898            result.statistics.material_count,
15899            result.master_data.materials.len()
15900        );
15901        assert_eq!(
15902            result.statistics.total_entries as usize,
15903            result.journal_entries.len()
15904        );
15905    }
15906
15907    #[test]
15908    fn test_phase_config_defaults() {
15909        let config = PhaseConfig::default();
15910        assert!(config.generate_master_data);
15911        assert!(config.generate_document_flows);
15912        assert!(config.generate_journal_entries);
15913        assert!(!config.inject_anomalies);
15914        assert!(config.validate_balances);
15915        assert!(config.show_progress);
15916        assert!(config.vendors_per_company > 0);
15917        assert!(config.customers_per_company > 0);
15918    }
15919
15920    #[test]
15921    fn test_get_coa_before_generation() {
15922        let config = create_test_config();
15923        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15924
15925        // Before generation, CoA should be None
15926        assert!(orchestrator.get_coa().is_none());
15927    }
15928
15929    #[test]
15930    fn test_get_coa_after_generation() {
15931        let config = create_test_config();
15932        let phase_config = PhaseConfig {
15933            generate_master_data: false,
15934            generate_document_flows: false,
15935            generate_journal_entries: true,
15936            inject_anomalies: false,
15937            show_progress: false,
15938            ..Default::default()
15939        };
15940
15941        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15942        let _ = orchestrator.generate().unwrap();
15943
15944        // After generation, CoA should be available
15945        assert!(orchestrator.get_coa().is_some());
15946    }
15947
15948    #[test]
15949    fn test_get_master_data() {
15950        let config = create_test_config();
15951        let phase_config = PhaseConfig {
15952            generate_master_data: true,
15953            generate_document_flows: false,
15954            generate_journal_entries: false,
15955            inject_anomalies: false,
15956            show_progress: false,
15957            vendors_per_company: 5,
15958            customers_per_company: 5,
15959            materials_per_company: 5,
15960            assets_per_company: 5,
15961            employees_per_company: 5,
15962            ..Default::default()
15963        };
15964
15965        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15966        let result = orchestrator.generate().unwrap();
15967
15968        // After generate(), master_data is moved into the result
15969        assert!(!result.master_data.vendors.is_empty());
15970    }
15971
15972    #[test]
15973    fn test_with_progress_builder() {
15974        let config = create_test_config();
15975        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15976            .unwrap()
15977            .with_progress(false);
15978
15979        // Should still work without progress
15980        assert!(!orchestrator.phase_config.show_progress);
15981    }
15982
15983    #[test]
15984    fn test_multi_company_generation() {
15985        let mut config = create_test_config();
15986        config.companies.push(CompanyConfig {
15987            code: "2000".to_string(),
15988            name: "Subsidiary".to_string(),
15989            currency: "EUR".to_string(),
15990            functional_currency: None,
15991            country: "DE".to_string(),
15992            annual_transaction_volume: TransactionVolume::TenK,
15993            volume_weight: 0.5,
15994            fiscal_year_variant: "K4".to_string(),
15995        });
15996
15997        let phase_config = PhaseConfig {
15998            generate_master_data: true,
15999            generate_document_flows: false,
16000            generate_journal_entries: true,
16001            inject_anomalies: false,
16002            show_progress: false,
16003            vendors_per_company: 5,
16004            customers_per_company: 5,
16005            materials_per_company: 5,
16006            assets_per_company: 5,
16007            employees_per_company: 5,
16008            ..Default::default()
16009        };
16010
16011        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16012        let result = orchestrator.generate().unwrap();
16013
16014        // Should have master data for both companies
16015        assert!(result.statistics.vendor_count >= 10); // 5 per company
16016        assert!(result.statistics.customer_count >= 10);
16017        assert!(result.statistics.companies_count == 2);
16018    }
16019
16020    #[test]
16021    fn test_empty_master_data_skips_document_flows() {
16022        let config = create_test_config();
16023        let phase_config = PhaseConfig {
16024            generate_master_data: false,   // Skip master data
16025            generate_document_flows: true, // Try to generate flows
16026            generate_journal_entries: false,
16027            inject_anomalies: false,
16028            show_progress: false,
16029            ..Default::default()
16030        };
16031
16032        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16033        let result = orchestrator.generate().unwrap();
16034
16035        // Without master data, document flows should be empty
16036        assert!(result.document_flows.p2p_chains.is_empty());
16037        assert!(result.document_flows.o2c_chains.is_empty());
16038    }
16039
16040    #[test]
16041    fn test_journal_entry_line_item_count() {
16042        let config = create_test_config();
16043        let phase_config = PhaseConfig {
16044            generate_master_data: false,
16045            generate_document_flows: false,
16046            generate_journal_entries: true,
16047            inject_anomalies: false,
16048            show_progress: false,
16049            ..Default::default()
16050        };
16051
16052        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16053        let result = orchestrator.generate().unwrap();
16054
16055        // Total line items should match sum of all entry line counts
16056        let calculated_line_items: u64 = result
16057            .journal_entries
16058            .iter()
16059            .map(|e| e.line_count() as u64)
16060            .sum();
16061        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16062    }
16063
16064    #[test]
16065    fn test_audit_generation() {
16066        let config = create_test_config();
16067        let phase_config = PhaseConfig {
16068            generate_master_data: false,
16069            generate_document_flows: false,
16070            generate_journal_entries: true,
16071            inject_anomalies: false,
16072            show_progress: false,
16073            generate_audit: true,
16074            audit_engagements: 2,
16075            workpapers_per_engagement: 5,
16076            evidence_per_workpaper: 2,
16077            risks_per_engagement: 3,
16078            findings_per_engagement: 2,
16079            judgments_per_engagement: 2,
16080            ..Default::default()
16081        };
16082
16083        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16084        let result = orchestrator.generate().unwrap();
16085
16086        // Should have generated audit data
16087        assert_eq!(result.audit.engagements.len(), 2);
16088        assert!(!result.audit.workpapers.is_empty());
16089        assert!(!result.audit.evidence.is_empty());
16090        assert!(!result.audit.risk_assessments.is_empty());
16091        assert!(!result.audit.findings.is_empty());
16092        assert!(!result.audit.judgments.is_empty());
16093
16094        // New ISA entity collections should also be populated
16095        assert!(
16096            !result.audit.confirmations.is_empty(),
16097            "ISA 505 confirmations should be generated"
16098        );
16099        assert!(
16100            !result.audit.confirmation_responses.is_empty(),
16101            "ISA 505 confirmation responses should be generated"
16102        );
16103        assert!(
16104            !result.audit.procedure_steps.is_empty(),
16105            "ISA 330 procedure steps should be generated"
16106        );
16107        // Samples may or may not be generated depending on workpaper sampling methods
16108        assert!(
16109            !result.audit.analytical_results.is_empty(),
16110            "ISA 520 analytical procedures should be generated"
16111        );
16112        assert!(
16113            !result.audit.ia_functions.is_empty(),
16114            "ISA 610 IA functions should be generated (one per engagement)"
16115        );
16116        assert!(
16117            !result.audit.related_parties.is_empty(),
16118            "ISA 550 related parties should be generated"
16119        );
16120
16121        // Statistics should match
16122        assert_eq!(
16123            result.statistics.audit_engagement_count,
16124            result.audit.engagements.len()
16125        );
16126        assert_eq!(
16127            result.statistics.audit_workpaper_count,
16128            result.audit.workpapers.len()
16129        );
16130        assert_eq!(
16131            result.statistics.audit_evidence_count,
16132            result.audit.evidence.len()
16133        );
16134        assert_eq!(
16135            result.statistics.audit_risk_count,
16136            result.audit.risk_assessments.len()
16137        );
16138        assert_eq!(
16139            result.statistics.audit_finding_count,
16140            result.audit.findings.len()
16141        );
16142        assert_eq!(
16143            result.statistics.audit_judgment_count,
16144            result.audit.judgments.len()
16145        );
16146        assert_eq!(
16147            result.statistics.audit_confirmation_count,
16148            result.audit.confirmations.len()
16149        );
16150        assert_eq!(
16151            result.statistics.audit_confirmation_response_count,
16152            result.audit.confirmation_responses.len()
16153        );
16154        assert_eq!(
16155            result.statistics.audit_procedure_step_count,
16156            result.audit.procedure_steps.len()
16157        );
16158        assert_eq!(
16159            result.statistics.audit_sample_count,
16160            result.audit.samples.len()
16161        );
16162        assert_eq!(
16163            result.statistics.audit_analytical_result_count,
16164            result.audit.analytical_results.len()
16165        );
16166        assert_eq!(
16167            result.statistics.audit_ia_function_count,
16168            result.audit.ia_functions.len()
16169        );
16170        assert_eq!(
16171            result.statistics.audit_ia_report_count,
16172            result.audit.ia_reports.len()
16173        );
16174        assert_eq!(
16175            result.statistics.audit_related_party_count,
16176            result.audit.related_parties.len()
16177        );
16178        assert_eq!(
16179            result.statistics.audit_related_party_transaction_count,
16180            result.audit.related_party_transactions.len()
16181        );
16182    }
16183
16184    #[test]
16185    fn test_new_phases_disabled_by_default() {
16186        let config = create_test_config();
16187        // Verify new config fields default to disabled
16188        assert!(!config.llm.enabled);
16189        assert!(!config.diffusion.enabled);
16190        assert!(!config.causal.enabled);
16191
16192        let phase_config = PhaseConfig {
16193            generate_master_data: false,
16194            generate_document_flows: false,
16195            generate_journal_entries: true,
16196            inject_anomalies: false,
16197            show_progress: false,
16198            ..Default::default()
16199        };
16200
16201        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16202        let result = orchestrator.generate().unwrap();
16203
16204        // All new phase statistics should be zero when disabled
16205        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16206        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16207        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16208        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16209        assert_eq!(result.statistics.causal_generation_ms, 0);
16210        assert_eq!(result.statistics.causal_samples_generated, 0);
16211        assert!(result.statistics.causal_validation_passed.is_none());
16212        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16213        assert!(result.counterfactual_pairs.is_empty());
16214    }
16215
16216    #[test]
16217    fn test_counterfactual_generation_enabled() {
16218        let config = create_test_config();
16219        let phase_config = PhaseConfig {
16220            generate_master_data: false,
16221            generate_document_flows: false,
16222            generate_journal_entries: true,
16223            inject_anomalies: false,
16224            show_progress: false,
16225            generate_counterfactuals: true,
16226            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16227            ..Default::default()
16228        };
16229
16230        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16231        let result = orchestrator.generate().unwrap();
16232
16233        // With JE generation enabled, counterfactual pairs should be generated
16234        if !result.journal_entries.is_empty() {
16235            assert_eq!(
16236                result.counterfactual_pairs.len(),
16237                result.journal_entries.len()
16238            );
16239            assert_eq!(
16240                result.statistics.counterfactual_pair_count,
16241                result.journal_entries.len()
16242            );
16243            // Each pair should have a distinct pair_id
16244            let ids: std::collections::HashSet<_> = result
16245                .counterfactual_pairs
16246                .iter()
16247                .map(|p| p.pair_id.clone())
16248                .collect();
16249            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16250        }
16251    }
16252
16253    #[test]
16254    fn test_llm_enrichment_enabled() {
16255        let mut config = create_test_config();
16256        config.llm.enabled = true;
16257        config.llm.max_vendor_enrichments = 3;
16258
16259        let phase_config = PhaseConfig {
16260            generate_master_data: true,
16261            generate_document_flows: false,
16262            generate_journal_entries: false,
16263            inject_anomalies: false,
16264            show_progress: false,
16265            vendors_per_company: 5,
16266            customers_per_company: 3,
16267            materials_per_company: 3,
16268            assets_per_company: 3,
16269            employees_per_company: 3,
16270            ..Default::default()
16271        };
16272
16273        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16274        let result = orchestrator.generate().unwrap();
16275
16276        // LLM enrichment should have run
16277        assert!(result.statistics.llm_vendors_enriched > 0);
16278        assert!(result.statistics.llm_vendors_enriched <= 3);
16279    }
16280
16281    #[test]
16282    fn test_diffusion_enhancement_enabled() {
16283        let mut config = create_test_config();
16284        config.diffusion.enabled = true;
16285        config.diffusion.n_steps = 50;
16286        config.diffusion.sample_size = 20;
16287
16288        let phase_config = PhaseConfig {
16289            generate_master_data: false,
16290            generate_document_flows: false,
16291            generate_journal_entries: true,
16292            inject_anomalies: false,
16293            show_progress: false,
16294            ..Default::default()
16295        };
16296
16297        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16298        let result = orchestrator.generate().unwrap();
16299
16300        // Diffusion phase should have generated samples
16301        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16302    }
16303
16304    #[test]
16305    fn test_causal_overlay_enabled() {
16306        let mut config = create_test_config();
16307        config.causal.enabled = true;
16308        config.causal.template = "fraud_detection".to_string();
16309        config.causal.sample_size = 100;
16310        config.causal.validate = true;
16311
16312        let phase_config = PhaseConfig {
16313            generate_master_data: false,
16314            generate_document_flows: false,
16315            generate_journal_entries: true,
16316            inject_anomalies: false,
16317            show_progress: false,
16318            ..Default::default()
16319        };
16320
16321        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16322        let result = orchestrator.generate().unwrap();
16323
16324        // Causal phase should have generated samples
16325        assert_eq!(result.statistics.causal_samples_generated, 100);
16326        // Validation should have run
16327        assert!(result.statistics.causal_validation_passed.is_some());
16328    }
16329
16330    #[test]
16331    fn test_causal_overlay_revenue_cycle_template() {
16332        let mut config = create_test_config();
16333        config.causal.enabled = true;
16334        config.causal.template = "revenue_cycle".to_string();
16335        config.causal.sample_size = 50;
16336        config.causal.validate = false;
16337
16338        let phase_config = PhaseConfig {
16339            generate_master_data: false,
16340            generate_document_flows: false,
16341            generate_journal_entries: true,
16342            inject_anomalies: false,
16343            show_progress: false,
16344            ..Default::default()
16345        };
16346
16347        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16348        let result = orchestrator.generate().unwrap();
16349
16350        // Causal phase should have generated samples
16351        assert_eq!(result.statistics.causal_samples_generated, 50);
16352        // Validation was disabled
16353        assert!(result.statistics.causal_validation_passed.is_none());
16354    }
16355
16356    #[test]
16357    fn test_all_new_phases_enabled_together() {
16358        let mut config = create_test_config();
16359        config.llm.enabled = true;
16360        config.llm.max_vendor_enrichments = 2;
16361        config.diffusion.enabled = true;
16362        config.diffusion.n_steps = 20;
16363        config.diffusion.sample_size = 10;
16364        config.causal.enabled = true;
16365        config.causal.sample_size = 50;
16366        config.causal.validate = true;
16367
16368        let phase_config = PhaseConfig {
16369            generate_master_data: true,
16370            generate_document_flows: false,
16371            generate_journal_entries: true,
16372            inject_anomalies: false,
16373            show_progress: false,
16374            vendors_per_company: 5,
16375            customers_per_company: 3,
16376            materials_per_company: 3,
16377            assets_per_company: 3,
16378            employees_per_company: 3,
16379            ..Default::default()
16380        };
16381
16382        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16383        let result = orchestrator.generate().unwrap();
16384
16385        // All three phases should have run
16386        assert!(result.statistics.llm_vendors_enriched > 0);
16387        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16388        assert_eq!(result.statistics.causal_samples_generated, 50);
16389        assert!(result.statistics.causal_validation_passed.is_some());
16390    }
16391
16392    #[test]
16393    fn test_statistics_serialization_with_new_fields() {
16394        let stats = EnhancedGenerationStatistics {
16395            total_entries: 100,
16396            total_line_items: 500,
16397            llm_enrichment_ms: 42,
16398            llm_vendors_enriched: 10,
16399            diffusion_enhancement_ms: 100,
16400            diffusion_samples_generated: 50,
16401            causal_generation_ms: 200,
16402            causal_samples_generated: 100,
16403            causal_validation_passed: Some(true),
16404            ..Default::default()
16405        };
16406
16407        let json = serde_json::to_string(&stats).unwrap();
16408        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16409
16410        assert_eq!(deserialized.llm_enrichment_ms, 42);
16411        assert_eq!(deserialized.llm_vendors_enriched, 10);
16412        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16413        assert_eq!(deserialized.diffusion_samples_generated, 50);
16414        assert_eq!(deserialized.causal_generation_ms, 200);
16415        assert_eq!(deserialized.causal_samples_generated, 100);
16416        assert_eq!(deserialized.causal_validation_passed, Some(true));
16417    }
16418
16419    #[test]
16420    fn test_statistics_backward_compat_deserialization() {
16421        // Old JSON without the new fields should still deserialize
16422        let old_json = r#"{
16423            "total_entries": 100,
16424            "total_line_items": 500,
16425            "accounts_count": 50,
16426            "companies_count": 1,
16427            "period_months": 12,
16428            "vendor_count": 10,
16429            "customer_count": 20,
16430            "material_count": 15,
16431            "asset_count": 5,
16432            "employee_count": 8,
16433            "p2p_chain_count": 5,
16434            "o2c_chain_count": 5,
16435            "ap_invoice_count": 5,
16436            "ar_invoice_count": 5,
16437            "ocpm_event_count": 0,
16438            "ocpm_object_count": 0,
16439            "ocpm_case_count": 0,
16440            "audit_engagement_count": 0,
16441            "audit_workpaper_count": 0,
16442            "audit_evidence_count": 0,
16443            "audit_risk_count": 0,
16444            "audit_finding_count": 0,
16445            "audit_judgment_count": 0,
16446            "anomalies_injected": 0,
16447            "data_quality_issues": 0,
16448            "banking_customer_count": 0,
16449            "banking_account_count": 0,
16450            "banking_transaction_count": 0,
16451            "banking_suspicious_count": 0,
16452            "graph_export_count": 0,
16453            "graph_node_count": 0,
16454            "graph_edge_count": 0
16455        }"#;
16456
16457        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16458
16459        // New fields should default to 0 / None
16460        assert_eq!(stats.llm_enrichment_ms, 0);
16461        assert_eq!(stats.llm_vendors_enriched, 0);
16462        assert_eq!(stats.diffusion_enhancement_ms, 0);
16463        assert_eq!(stats.diffusion_samples_generated, 0);
16464        assert_eq!(stats.causal_generation_ms, 0);
16465        assert_eq!(stats.causal_samples_generated, 0);
16466        assert!(stats.causal_validation_passed.is_none());
16467    }
16468}