Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Show progress bars.
291    pub show_progress: bool,
292    /// Number of vendors to generate per company.
293    pub vendors_per_company: usize,
294    /// Number of customers to generate per company.
295    pub customers_per_company: usize,
296    /// Number of materials to generate per company.
297    pub materials_per_company: usize,
298    /// Number of assets to generate per company.
299    pub assets_per_company: usize,
300    /// Number of employees to generate per company.
301    pub employees_per_company: usize,
302    /// Number of P2P chains to generate.
303    pub p2p_chains: usize,
304    /// Number of O2C chains to generate.
305    pub o2c_chains: usize,
306    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
307    pub generate_audit: bool,
308    /// Number of audit engagements to generate.
309    pub audit_engagements: usize,
310    /// Number of workpapers per engagement.
311    pub workpapers_per_engagement: usize,
312    /// Number of evidence items per workpaper.
313    pub evidence_per_workpaper: usize,
314    /// Number of risk assessments per engagement.
315    pub risks_per_engagement: usize,
316    /// Number of findings per engagement.
317    pub findings_per_engagement: usize,
318    /// Number of professional judgments per engagement.
319    pub judgments_per_engagement: usize,
320    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
321    pub generate_banking: bool,
322    /// Generate graph exports (accounting network for ML training).
323    pub generate_graph_export: bool,
324    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
325    pub generate_sourcing: bool,
326    /// Generate bank reconciliations from payments.
327    pub generate_bank_reconciliation: bool,
328    /// Generate financial statements from trial balances.
329    pub generate_financial_statements: bool,
330    /// Generate accounting standards data (revenue recognition, impairment).
331    pub generate_accounting_standards: bool,
332    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
333    pub generate_manufacturing: bool,
334    /// Generate sales quotes, management KPIs, and budgets.
335    pub generate_sales_kpi_budgets: bool,
336    /// Generate tax jurisdictions and tax codes.
337    pub generate_tax: bool,
338    /// Generate ESG data (emissions, energy, water, waste, social, governance).
339    pub generate_esg: bool,
340    /// Generate intercompany transactions and eliminations.
341    pub generate_intercompany: bool,
342    /// Generate process evolution and organizational events.
343    pub generate_evolution_events: bool,
344    /// Generate counterfactual (original, mutated) JE pairs for ML training.
345    pub generate_counterfactuals: bool,
346    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
347    pub generate_compliance_regulations: bool,
348    /// Generate period-close journal entries (tax provision, income statement close).
349    pub generate_period_close: bool,
350    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
351    pub generate_hr: bool,
352    /// Generate treasury data (cash management, hedging, debt, pooling).
353    pub generate_treasury: bool,
354    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
355    pub generate_project_accounting: bool,
356    /// v3.3.0: generate legal documents per engagement (engagement letters,
357    /// management rep letters, legal opinions, regulatory filings,
358    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
359    pub generate_legal_documents: bool,
360    /// v3.3.0: generate IT general controls (access logs, change
361    /// management records) per audit engagement. Gated by
362    /// `audit.it_controls.enabled`.
363    pub generate_it_controls: bool,
364    /// v3.3.0: run the analytics-metadata phase after all JE-adding
365    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
366    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
367    /// top-level `analytics_metadata.enabled` config flag.
368    pub generate_analytics_metadata: bool,
369}
370
371impl Default for PhaseConfig {
372    fn default() -> Self {
373        Self {
374            generate_master_data: true,
375            generate_document_flows: true,
376            generate_ocpm_events: false, // Off by default
377            generate_journal_entries: true,
378            inject_anomalies: false,
379            inject_data_quality: false, // Off by default (to preserve clean test data)
380            validate_balances: true,
381            show_progress: true,
382            vendors_per_company: 50,
383            customers_per_company: 100,
384            materials_per_company: 200,
385            assets_per_company: 50,
386            employees_per_company: 100,
387            p2p_chains: 100,
388            o2c_chains: 100,
389            generate_audit: false, // Off by default
390            audit_engagements: 5,
391            workpapers_per_engagement: 20,
392            evidence_per_workpaper: 5,
393            risks_per_engagement: 15,
394            findings_per_engagement: 8,
395            judgments_per_engagement: 10,
396            generate_banking: false,                // Off by default
397            generate_graph_export: false,           // Off by default
398            generate_sourcing: false,               // Off by default
399            generate_bank_reconciliation: false,    // Off by default
400            generate_financial_statements: false,   // Off by default
401            generate_accounting_standards: false,   // Off by default
402            generate_manufacturing: false,          // Off by default
403            generate_sales_kpi_budgets: false,      // Off by default
404            generate_tax: false,                    // Off by default
405            generate_esg: false,                    // Off by default
406            generate_intercompany: false,           // Off by default
407            generate_evolution_events: true,        // On by default
408            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
409            generate_compliance_regulations: false, // Off by default
410            generate_period_close: true,            // On by default
411            generate_hr: false,                     // Off by default
412            generate_treasury: false,               // Off by default
413            generate_project_accounting: false,     // Off by default
414            generate_legal_documents: false,        // v3.3.0 — off by default
415            generate_it_controls: false,            // v3.3.0 — off by default
416            generate_analytics_metadata: false,     // v3.3.0 — off by default
417        }
418    }
419}
420
421impl PhaseConfig {
422    /// Derive phase flags from [`GeneratorConfig`].
423    ///
424    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
425    /// CLI flags can override individual fields after calling this method.
426    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
427        Self {
428            // Always-on phases
429            generate_master_data: true,
430            generate_document_flows: true,
431            generate_journal_entries: true,
432            validate_balances: true,
433            generate_period_close: true,
434            generate_evolution_events: true,
435            show_progress: true,
436
437            // Feature-gated phases — derived from config sections
438            generate_audit: cfg.audit.enabled,
439            generate_banking: cfg.banking.enabled,
440            generate_graph_export: cfg.graph_export.enabled,
441            generate_sourcing: cfg.source_to_pay.enabled,
442            generate_intercompany: cfg.intercompany.enabled,
443            generate_financial_statements: cfg.financial_reporting.enabled,
444            generate_bank_reconciliation: cfg.financial_reporting.enabled,
445            generate_accounting_standards: cfg.accounting_standards.enabled,
446            generate_manufacturing: cfg.manufacturing.enabled,
447            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
448            generate_tax: cfg.tax.enabled,
449            generate_esg: cfg.esg.enabled,
450            generate_ocpm_events: cfg.ocpm.enabled,
451            generate_compliance_regulations: cfg.compliance_regulations.enabled,
452            generate_hr: cfg.hr.enabled,
453            generate_treasury: cfg.treasury.enabled,
454            generate_project_accounting: cfg.project_accounting.enabled,
455
456            // v3.3.0: L1 generator wiring
457            // Legal documents emitted when compliance_regulations is enabled
458            // and the nested legal_documents.enabled flag is set.
459            generate_legal_documents: cfg.compliance_regulations.enabled
460                && cfg.compliance_regulations.legal_documents.enabled,
461            // IT general controls emitted when audit is enabled and the
462            // nested it_controls.enabled flag is set.
463            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
464            // Analytics metadata phase (prior-year, industry benchmarks,
465            // management reports, drift events).
466            generate_analytics_metadata: cfg.analytics_metadata.enabled,
467
468            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
469            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
470
471            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
472            inject_data_quality: cfg.data_quality.enabled,
473
474            // Count defaults (CLI can override after calling this method)
475            vendors_per_company: 50,
476            customers_per_company: 100,
477            materials_per_company: 200,
478            assets_per_company: 50,
479            employees_per_company: 100,
480            p2p_chains: 100,
481            o2c_chains: 100,
482            audit_engagements: 5,
483            workpapers_per_engagement: 20,
484            evidence_per_workpaper: 5,
485            risks_per_engagement: 15,
486            findings_per_engagement: 8,
487            judgments_per_engagement: 10,
488        }
489    }
490}
491
492/// Master data snapshot containing all generated entities.
493#[derive(Debug, Clone, Default)]
494pub struct MasterDataSnapshot {
495    /// Generated vendors.
496    pub vendors: Vec<Vendor>,
497    /// Generated customers.
498    pub customers: Vec<Customer>,
499    /// Generated materials.
500    pub materials: Vec<Material>,
501    /// Generated fixed assets.
502    pub assets: Vec<FixedAsset>,
503    /// Generated employees.
504    pub employees: Vec<Employee>,
505    /// Generated cost center hierarchy (two-level: departments + sub-departments).
506    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
507    /// v5.1: Generated profit centre hierarchy (two-level: top-level
508    /// segment / region / product-group nodes + sub-units).  Emits to
509    /// SAP CEPC alongside `cost_centers` → CSKS.
510    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
511    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
512    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
513    /// v3.3.0+: organizational profiles (one per company) with
514    /// industry / geography / structure / complexity metadata. Emitted
515    /// alongside master data when `generate_master_data = true`.
516    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
517}
518
519/// Info about a completed hypergraph export.
520#[derive(Debug, Clone)]
521pub struct HypergraphExportInfo {
522    /// Number of nodes exported.
523    pub node_count: usize,
524    /// Number of pairwise edges exported.
525    pub edge_count: usize,
526    /// Number of hyperedges exported.
527    pub hyperedge_count: usize,
528    /// Output directory path.
529    pub output_path: PathBuf,
530}
531
532/// Document flow snapshot containing all generated document chains.
533#[derive(Debug, Clone, Default)]
534pub struct DocumentFlowSnapshot {
535    /// P2P document chains.
536    pub p2p_chains: Vec<P2PDocumentChain>,
537    /// O2C document chains.
538    pub o2c_chains: Vec<O2CDocumentChain>,
539    /// All purchase orders (flattened).
540    pub purchase_orders: Vec<documents::PurchaseOrder>,
541    /// All goods receipts (flattened).
542    pub goods_receipts: Vec<documents::GoodsReceipt>,
543    /// All vendor invoices (flattened).
544    pub vendor_invoices: Vec<documents::VendorInvoice>,
545    /// All sales orders (flattened).
546    pub sales_orders: Vec<documents::SalesOrder>,
547    /// All deliveries (flattened).
548    pub deliveries: Vec<documents::Delivery>,
549    /// All customer invoices (flattened).
550    pub customer_invoices: Vec<documents::CustomerInvoice>,
551    /// All payments (flattened).
552    pub payments: Vec<documents::Payment>,
553    /// Cross-document references collected from all document headers
554    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
555    pub document_references: Vec<documents::DocumentReference>,
556}
557
558/// Subledger snapshot containing generated subledger records.
559#[derive(Debug, Clone, Default)]
560pub struct SubledgerSnapshot {
561    /// AP invoices linked from document flow vendor invoices.
562    pub ap_invoices: Vec<APInvoice>,
563    /// AR invoices linked from document flow customer invoices.
564    pub ar_invoices: Vec<ARInvoice>,
565    /// FA subledger records (asset acquisitions from FA generator).
566    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
567    /// Inventory positions from inventory generator.
568    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
569    /// Inventory movements from inventory generator.
570    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
571    /// AR aging reports, one per company, computed after payment settlement.
572    pub ar_aging_reports: Vec<ARAgingReport>,
573    /// AP aging reports, one per company, computed after payment settlement.
574    pub ap_aging_reports: Vec<APAgingReport>,
575    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
576    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
577    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
578    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
579    /// Dunning runs executed after AR aging (one per company per dunning cycle).
580    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
581    /// Dunning letters generated across all dunning runs.
582    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
583}
584
585/// OCPM snapshot containing generated OCPM event log data.
586#[derive(Debug, Clone, Default)]
587pub struct OcpmSnapshot {
588    /// OCPM event log (if generated)
589    pub event_log: Option<OcpmEventLog>,
590    /// Number of events generated
591    pub event_count: usize,
592    /// Number of objects generated
593    pub object_count: usize,
594    /// Number of cases generated
595    pub case_count: usize,
596}
597
598/// Audit data snapshot containing all generated audit-related entities.
599#[derive(Debug, Clone, Default)]
600pub struct AuditSnapshot {
601    /// Audit engagements per ISA 210/220.
602    pub engagements: Vec<AuditEngagement>,
603    /// Workpapers per ISA 230.
604    pub workpapers: Vec<Workpaper>,
605    /// Audit evidence per ISA 500.
606    pub evidence: Vec<AuditEvidence>,
607    /// Risk assessments per ISA 315/330.
608    pub risk_assessments: Vec<RiskAssessment>,
609    /// Audit findings per ISA 265.
610    pub findings: Vec<AuditFinding>,
611    /// Professional judgments per ISA 200.
612    pub judgments: Vec<ProfessionalJudgment>,
613    /// External confirmations per ISA 505.
614    pub confirmations: Vec<ExternalConfirmation>,
615    /// Confirmation responses per ISA 505.
616    pub confirmation_responses: Vec<ConfirmationResponse>,
617    /// Audit procedure steps per ISA 330/530.
618    pub procedure_steps: Vec<AuditProcedureStep>,
619    /// Audit samples per ISA 530.
620    pub samples: Vec<AuditSample>,
621    /// Analytical procedure results per ISA 520.
622    pub analytical_results: Vec<AnalyticalProcedureResult>,
623    /// Internal audit functions per ISA 610.
624    pub ia_functions: Vec<InternalAuditFunction>,
625    /// Internal audit reports per ISA 610.
626    pub ia_reports: Vec<InternalAuditReport>,
627    /// Related parties per ISA 550.
628    pub related_parties: Vec<RelatedParty>,
629    /// Related party transactions per ISA 550.
630    pub related_party_transactions: Vec<RelatedPartyTransaction>,
631    // ---- ISA 600: Group Audits ----
632    /// Component auditors assigned by jurisdiction (ISA 600).
633    pub component_auditors: Vec<ComponentAuditor>,
634    /// Group audit plan with materiality allocations (ISA 600).
635    pub group_audit_plan: Option<GroupAuditPlan>,
636    /// Component instructions issued to component auditors (ISA 600).
637    pub component_instructions: Vec<ComponentInstruction>,
638    /// Reports received from component auditors (ISA 600).
639    pub component_reports: Vec<ComponentAuditorReport>,
640    // ---- ISA 210: Engagement Letters ----
641    /// Engagement letters per ISA 210.
642    pub engagement_letters: Vec<EngagementLetter>,
643    // ---- ISA 560 / IAS 10: Subsequent Events ----
644    /// Subsequent events per ISA 560 / IAS 10.
645    pub subsequent_events: Vec<SubsequentEvent>,
646    // ---- ISA 402: Service Organization Controls ----
647    /// Service organizations identified per ISA 402.
648    pub service_organizations: Vec<ServiceOrganization>,
649    /// SOC reports obtained per ISA 402.
650    pub soc_reports: Vec<SocReport>,
651    /// User entity controls documented per ISA 402.
652    pub user_entity_controls: Vec<UserEntityControl>,
653    // ---- ISA 570: Going Concern ----
654    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
655    pub going_concern_assessments:
656        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
657    // ---- ISA 540: Accounting Estimates ----
658    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
659    pub accounting_estimates:
660        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
661    // ---- ISA 700/701/705/706: Audit Opinions ----
662    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
663    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
664    /// Key Audit Matters per ISA 701 (flattened across all opinions).
665    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
666    // ---- SOX 302 / 404 ----
667    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
668    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
669    /// SOX Section 404 ICFR assessments (one per entity per year).
670    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
671    // ---- ISA 320: Materiality ----
672    /// Materiality calculations per entity per period (ISA 320).
673    pub materiality_calculations:
674        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
675    // ---- ISA 315: Combined Risk Assessments ----
676    /// Combined Risk Assessments per account area / assertion (ISA 315).
677    pub combined_risk_assessments:
678        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
679    // ---- ISA 530: Sampling Plans ----
680    /// Sampling plans per CRA at Moderate or higher (ISA 530).
681    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
682    /// Individual sampled items (key items + representative items) per ISA 530.
683    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
684    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
685    /// Significant classes of transactions per ISA 315 (one set per entity).
686    pub significant_transaction_classes:
687        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
688    // ---- ISA 520: Unusual Item Markers ----
689    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
690    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
691    // ---- ISA 520: Analytical Relationships ----
692    /// Analytical relationships (ratios, trends, correlations) per entity.
693    pub analytical_relationships:
694        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
695    // ---- PCAOB-ISA Cross-Reference ----
696    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
697    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
698    // ---- ISA Standard Reference ----
699    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
700    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
701    // ---- ISA 220 / ISA 300: Audit Scopes ----
702    /// Audit scope records (one per engagement) describing the audit boundary.
703    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
704    // ---- FSM Event Trail ----
705    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
706    /// Contains the ordered sequence of state-transition and procedure-step events
707    /// generated by the audit FSM engine.
708    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
709    // ---- v3.3.0: L1 generator wiring ----
710    /// Legal documents (engagement letters, management reps, legal
711    /// opinions, regulatory filings, board resolutions) per entity.
712    /// Emitted by `LegalDocumentGenerator` when
713    /// `compliance_regulations.legal_documents.enabled = true`.
714    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
715    /// IT general controls — access logs (login/privileged action
716    /// audit trail). Emitted by `ItControlsGenerator` when
717    /// `audit.it_controls.enabled = true`.
718    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
719    /// IT general controls — change management records (code deploys,
720    /// config changes, patches). Emitted by `ItControlsGenerator`.
721    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
722}
723
724/// Banking KYC/AML data snapshot containing all generated banking entities.
725#[derive(Debug, Clone, Default)]
726pub struct BankingSnapshot {
727    /// Banking customers (retail, business, trust).
728    pub customers: Vec<BankingCustomer>,
729    /// Bank accounts.
730    pub accounts: Vec<BankAccount>,
731    /// Bank transactions with AML labels.
732    pub transactions: Vec<BankTransaction>,
733    /// Transaction-level AML labels with features.
734    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
735    /// Customer-level AML labels.
736    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
737    /// Account-level AML labels.
738    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
739    /// Relationship-level AML labels.
740    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
741    /// Case narratives for AML scenarios.
742    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
743    /// Number of suspicious transactions.
744    pub suspicious_count: usize,
745    /// Number of AML scenarios generated.
746    pub scenario_count: usize,
747}
748
749/// Graph export snapshot containing exported graph metadata.
750#[derive(Debug, Clone, Default, Serialize)]
751pub struct GraphExportSnapshot {
752    /// Whether graph export was performed.
753    pub exported: bool,
754    /// Number of graphs exported.
755    pub graph_count: usize,
756    /// Exported graph metadata (by format name).
757    pub exports: HashMap<String, GraphExportInfo>,
758}
759
760/// Information about an exported graph.
761#[derive(Debug, Clone, Serialize)]
762pub struct GraphExportInfo {
763    /// Graph name.
764    pub name: String,
765    /// Export format (pytorch_geometric, neo4j, dgl).
766    pub format: String,
767    /// Output directory path.
768    pub output_path: PathBuf,
769    /// Number of nodes.
770    pub node_count: usize,
771    /// Number of edges.
772    pub edge_count: usize,
773}
774
775/// S2C sourcing data snapshot.
776#[derive(Debug, Clone, Default)]
777pub struct SourcingSnapshot {
778    /// Spend analyses.
779    pub spend_analyses: Vec<SpendAnalysis>,
780    /// Sourcing projects.
781    pub sourcing_projects: Vec<SourcingProject>,
782    /// Supplier qualifications.
783    pub qualifications: Vec<SupplierQualification>,
784    /// RFx events (RFI, RFP, RFQ).
785    pub rfx_events: Vec<RfxEvent>,
786    /// Supplier bids.
787    pub bids: Vec<SupplierBid>,
788    /// Bid evaluations.
789    pub bid_evaluations: Vec<BidEvaluation>,
790    /// Procurement contracts.
791    pub contracts: Vec<ProcurementContract>,
792    /// Catalog items.
793    pub catalog_items: Vec<CatalogItem>,
794    /// Supplier scorecards.
795    pub scorecards: Vec<SupplierScorecard>,
796}
797
798/// A single period's trial balance with metadata.
799///
800/// Used as the orchestrator's in-memory representation while it
801/// builds per-period FS / CF artefacts.  At write time the runtime
802/// converts each `PeriodTrialBalance` to the canonical
803/// [`datasynth_core::models::balance::TrialBalance`] shape via
804/// [`PeriodTrialBalance::into_canonical`] so the on-disk
805/// `period_close/trial_balances.json` matches what the group
806/// aggregate phase loads — see
807/// [`crate::output_writer::write_outputs`].
808#[derive(Debug, Clone, Serialize, Deserialize)]
809pub struct PeriodTrialBalance {
810    /// Fiscal year.
811    pub fiscal_year: u16,
812    /// Fiscal period (1-12).
813    pub fiscal_period: u8,
814    /// Period start date.
815    pub period_start: NaiveDate,
816    /// Period end date.
817    pub period_end: NaiveDate,
818    /// Trial balance entries for this period.
819    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
820}
821
822impl PeriodTrialBalance {
823    /// Convert this in-memory period TB into the canonical
824    /// [`datasynth_core::models::balance::TrialBalance`] shape used
825    /// for the on-disk artefact.
826    ///
827    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
828    /// aggregate's `tb_loader` consumes the canonical type directly,
829    /// dropping the v5.0 dual-shape detection that converted from
830    /// `PeriodTrialBalance` JSON on the fly.
831    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
832        let mut total_debits = Decimal::ZERO;
833        let mut total_credits = Decimal::ZERO;
834        let lines: Vec<TrialBalanceLine> = self
835            .entries
836            .into_iter()
837            .map(|e| {
838                total_debits += e.debit_balance;
839                total_credits += e.credit_balance;
840                let category = AccountCategory::from_account_code(&e.account_code);
841                TrialBalanceLine {
842                    account_code: e.account_code,
843                    account_description: e.account_name,
844                    category,
845                    account_type: AccountType::Asset,
846                    opening_balance: Decimal::ZERO,
847                    period_debits: e.debit_balance,
848                    period_credits: e.credit_balance,
849                    closing_balance: e.debit_balance - e.credit_balance,
850                    debit_balance: e.debit_balance,
851                    credit_balance: e.credit_balance,
852                    cost_center: None,
853                    profit_center: None,
854                }
855            })
856            .collect();
857        let imbalance = total_debits - total_credits;
858        let is_balanced = imbalance.abs() < Decimal::new(1, 2);
859        TrialBalance {
860            trial_balance_id: format!(
861                "{company_code}-{:04}{:02}",
862                self.fiscal_year, self.fiscal_period
863            ),
864            company_code: company_code.to_string(),
865            company_name: None,
866            as_of_date: self.period_end,
867            fiscal_year: self.fiscal_year as i32,
868            fiscal_period: self.fiscal_period as u32,
869            currency: currency.to_string(),
870            balance_type: TrialBalanceType::Adjusted,
871            lines,
872            total_debits,
873            total_credits,
874            is_balanced,
875            out_of_balance: imbalance,
876            is_equation_valid: is_balanced,
877            equation_difference: imbalance,
878            category_summary: std::collections::HashMap::new(),
879            created_at: self
880                .period_start
881                .and_hms_opt(0, 0, 0)
882                .expect("midnight is a valid time"),
883            created_by: "ORCHESTRATOR".to_string(),
884            approved_by: None,
885            approved_at: None,
886            status: TrialBalanceStatus::Final,
887        }
888    }
889}
890
891/// Financial reporting snapshot (financial statements + bank reconciliations).
892#[derive(Debug, Clone, Default)]
893pub struct FinancialReportingSnapshot {
894    /// Financial statements (balance sheet, income statement, cash flow).
895    /// For multi-entity configs this includes all standalone statements.
896    pub financial_statements: Vec<FinancialStatement>,
897    /// Standalone financial statements keyed by entity code.
898    /// Each entity has its own slice of statements.
899    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
900    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
901    pub consolidated_statements: Vec<FinancialStatement>,
902    /// Consolidation schedules (one per period) showing pre/post elimination detail.
903    pub consolidation_schedules: Vec<ConsolidationSchedule>,
904    /// Bank reconciliations.
905    pub bank_reconciliations: Vec<BankReconciliation>,
906    /// Period-close trial balances (one per period).
907    pub trial_balances: Vec<PeriodTrialBalance>,
908    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
909    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
910    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
911    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
912    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
913    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
914}
915
916/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
917#[derive(Debug, Clone, Default)]
918pub struct HrSnapshot {
919    /// Payroll runs (actual data).
920    pub payroll_runs: Vec<PayrollRun>,
921    /// Payroll line items (actual data).
922    pub payroll_line_items: Vec<PayrollLineItem>,
923    /// Time entries (actual data).
924    pub time_entries: Vec<TimeEntry>,
925    /// Expense reports (actual data).
926    pub expense_reports: Vec<ExpenseReport>,
927    /// Benefit enrollments (actual data).
928    pub benefit_enrollments: Vec<BenefitEnrollment>,
929    /// Defined benefit pension plans (IAS 19 / ASC 715).
930    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
931    /// Pension obligation (DBO) roll-forwards.
932    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
933    /// Plan asset roll-forwards.
934    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
935    /// Pension disclosures.
936    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
937    /// Journal entries generated from pension expense and OCI remeasurements.
938    pub pension_journal_entries: Vec<JournalEntry>,
939    /// Stock grants (ASC 718 / IFRS 2).
940    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
941    /// Stock-based compensation period expense records.
942    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
943    /// Journal entries generated from stock-based compensation expense.
944    pub stock_comp_journal_entries: Vec<JournalEntry>,
945    /// Payroll runs.
946    pub payroll_run_count: usize,
947    /// Payroll line item count.
948    pub payroll_line_item_count: usize,
949    /// Time entry count.
950    pub time_entry_count: usize,
951    /// Expense report count.
952    pub expense_report_count: usize,
953    /// Benefit enrollment count.
954    pub benefit_enrollment_count: usize,
955    /// Pension plan count.
956    pub pension_plan_count: usize,
957    /// Stock grant count.
958    pub stock_grant_count: usize,
959}
960
961/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
962#[derive(Debug, Clone, Default)]
963pub struct AccountingStandardsSnapshot {
964    /// Revenue recognition contracts (actual data).
965    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
966    /// Impairment tests (actual data).
967    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
968    /// Business combinations (IFRS 3 / ASC 805).
969    pub business_combinations:
970        Vec<datasynth_core::models::business_combination::BusinessCombination>,
971    /// Journal entries generated from business combinations (Day 1 + amortization).
972    pub business_combination_journal_entries: Vec<JournalEntry>,
973    /// ECL models (IFRS 9 / ASC 326).
974    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
975    /// ECL provision movements.
976    pub ecl_provision_movements:
977        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
978    /// Journal entries from ECL provision.
979    pub ecl_journal_entries: Vec<JournalEntry>,
980    /// Provisions (IAS 37 / ASC 450).
981    pub provisions: Vec<datasynth_core::models::provision::Provision>,
982    /// Provision movement roll-forwards (IAS 37 / ASC 450).
983    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
984    /// Contingent liabilities (IAS 37 / ASC 450).
985    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
986    /// Journal entries from provisions.
987    pub provision_journal_entries: Vec<JournalEntry>,
988    /// IAS 21 functional currency translation results (one per entity per period).
989    pub currency_translation_results:
990        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
991    /// Revenue recognition contract count.
992    pub revenue_contract_count: usize,
993    /// Impairment test count.
994    pub impairment_test_count: usize,
995    /// Business combination count.
996    pub business_combination_count: usize,
997    /// ECL model count.
998    pub ecl_model_count: usize,
999    /// Provision count.
1000    pub provision_count: usize,
1001    /// Currency translation result count (IAS 21).
1002    pub currency_translation_count: usize,
1003    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1004    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1005    /// ROU asset + lease liability details.
1006    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1007    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1008    pub fair_value_measurements:
1009        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1010    /// Framework difference records (dual-reporting only).
1011    pub framework_differences:
1012        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1013    /// Per-entity framework reconciliation (dual-reporting only).
1014    pub framework_reconciliations:
1015        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1016    /// Counts for stats logging.
1017    pub lease_count: usize,
1018    pub fair_value_measurement_count: usize,
1019    pub framework_difference_count: usize,
1020}
1021
1022/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1023#[derive(Debug, Clone, Default)]
1024pub struct ComplianceRegulationsSnapshot {
1025    /// Flattened standard records for output.
1026    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1027    /// Cross-reference records.
1028    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1029    /// Jurisdiction profile records.
1030    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1031    /// Generated audit procedures.
1032    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1033    /// Generated compliance findings.
1034    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1035    /// Generated regulatory filings.
1036    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1037    /// Compliance graph (if graph integration enabled).
1038    pub compliance_graph: Option<datasynth_graph::Graph>,
1039}
1040
1041/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1042#[derive(Debug, Clone, Default)]
1043pub struct ManufacturingSnapshot {
1044    /// Production orders (actual data).
1045    pub production_orders: Vec<ProductionOrder>,
1046    /// Quality inspections (actual data).
1047    pub quality_inspections: Vec<QualityInspection>,
1048    /// Cycle counts (actual data).
1049    pub cycle_counts: Vec<CycleCount>,
1050    /// BOM components (actual data).
1051    pub bom_components: Vec<BomComponent>,
1052    /// Inventory movements (actual data).
1053    pub inventory_movements: Vec<InventoryMovement>,
1054    /// Production order count.
1055    pub production_order_count: usize,
1056    /// Quality inspection count.
1057    pub quality_inspection_count: usize,
1058    /// Cycle count count.
1059    pub cycle_count_count: usize,
1060    /// BOM component count.
1061    pub bom_component_count: usize,
1062    /// Inventory movement count.
1063    pub inventory_movement_count: usize,
1064}
1065
1066/// Sales, KPI, and budget data snapshot.
1067#[derive(Debug, Clone, Default)]
1068pub struct SalesKpiBudgetsSnapshot {
1069    /// Sales quotes (actual data).
1070    pub sales_quotes: Vec<SalesQuote>,
1071    /// Management KPIs (actual data).
1072    pub kpis: Vec<ManagementKpi>,
1073    /// Budgets (actual data).
1074    pub budgets: Vec<Budget>,
1075    /// Sales quote count.
1076    pub sales_quote_count: usize,
1077    /// Management KPI count.
1078    pub kpi_count: usize,
1079    /// Budget line count.
1080    pub budget_line_count: usize,
1081}
1082
1083/// Anomaly labels generated during injection.
1084#[derive(Debug, Clone, Default)]
1085pub struct AnomalyLabels {
1086    /// All anomaly labels.
1087    pub labels: Vec<LabeledAnomaly>,
1088    /// Summary statistics.
1089    pub summary: Option<AnomalySummary>,
1090    /// Count by anomaly type.
1091    pub by_type: HashMap<String, usize>,
1092}
1093
1094/// Balance validation results from running balance tracker.
1095#[derive(Debug, Clone, Default)]
1096pub struct BalanceValidationResult {
1097    /// Whether validation was performed.
1098    pub validated: bool,
1099    /// Whether balance sheet equation is satisfied.
1100    pub is_balanced: bool,
1101    /// Number of entries processed.
1102    pub entries_processed: u64,
1103    /// Total debits across all entries.
1104    pub total_debits: rust_decimal::Decimal,
1105    /// Total credits across all entries.
1106    pub total_credits: rust_decimal::Decimal,
1107    /// Number of accounts tracked.
1108    pub accounts_tracked: usize,
1109    /// Number of companies tracked.
1110    pub companies_tracked: usize,
1111    /// Validation errors encountered.
1112    pub validation_errors: Vec<ValidationError>,
1113    /// Whether any unbalanced entries were found.
1114    pub has_unbalanced_entries: bool,
1115}
1116
1117/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1118#[derive(Debug, Clone, Default)]
1119pub struct TaxSnapshot {
1120    /// Tax jurisdictions.
1121    pub jurisdictions: Vec<TaxJurisdiction>,
1122    /// Tax codes.
1123    pub codes: Vec<TaxCode>,
1124    /// Tax lines computed on documents.
1125    pub tax_lines: Vec<TaxLine>,
1126    /// Tax returns filed per period.
1127    pub tax_returns: Vec<TaxReturn>,
1128    /// Tax provisions.
1129    pub tax_provisions: Vec<TaxProvision>,
1130    /// Withholding tax records.
1131    pub withholding_records: Vec<WithholdingTaxRecord>,
1132    /// Tax anomaly labels.
1133    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1134    /// Jurisdiction count.
1135    pub jurisdiction_count: usize,
1136    /// Code count.
1137    pub code_count: usize,
1138    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1139    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1140    /// Journal entries posting tax payable/receivable from computed tax lines.
1141    pub tax_posting_journal_entries: Vec<JournalEntry>,
1142}
1143
1144/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1145#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1146pub struct IntercompanySnapshot {
1147    /// Group ownership structure (parent/subsidiary/associate relationships).
1148    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1149    /// IC matched pairs (transaction pairs between related entities).
1150    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1151    /// IC journal entries generated from matched pairs (seller side).
1152    pub seller_journal_entries: Vec<JournalEntry>,
1153    /// IC journal entries generated from matched pairs (buyer side).
1154    pub buyer_journal_entries: Vec<JournalEntry>,
1155    /// Elimination entries for consolidation.
1156    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1157    /// NCI measurements derived from group structure ownership percentages.
1158    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1159    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1160    #[serde(skip)]
1161    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1162    /// IC matched pair count.
1163    pub matched_pair_count: usize,
1164    /// IC elimination entry count.
1165    pub elimination_entry_count: usize,
1166    /// IC matching rate (0.0 to 1.0).
1167    pub match_rate: f64,
1168}
1169
1170/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1171#[derive(Debug, Clone, Default)]
1172pub struct EsgSnapshot {
1173    /// Emission records (scope 1, 2, 3).
1174    pub emissions: Vec<EmissionRecord>,
1175    /// Energy consumption records.
1176    pub energy: Vec<EnergyConsumption>,
1177    /// Water usage records.
1178    pub water: Vec<WaterUsage>,
1179    /// Waste records.
1180    pub waste: Vec<WasteRecord>,
1181    /// Workforce diversity metrics.
1182    pub diversity: Vec<WorkforceDiversityMetric>,
1183    /// Pay equity metrics.
1184    pub pay_equity: Vec<PayEquityMetric>,
1185    /// Safety incidents.
1186    pub safety_incidents: Vec<SafetyIncident>,
1187    /// Safety metrics.
1188    pub safety_metrics: Vec<SafetyMetric>,
1189    /// Governance metrics.
1190    pub governance: Vec<GovernanceMetric>,
1191    /// Supplier ESG assessments.
1192    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1193    /// Materiality assessments.
1194    pub materiality: Vec<MaterialityAssessment>,
1195    /// ESG disclosures.
1196    pub disclosures: Vec<EsgDisclosure>,
1197    /// Climate scenarios.
1198    pub climate_scenarios: Vec<ClimateScenario>,
1199    /// ESG anomaly labels.
1200    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1201    /// Total emission record count.
1202    pub emission_count: usize,
1203    /// Total disclosure count.
1204    pub disclosure_count: usize,
1205}
1206
1207/// Treasury data snapshot (cash management, hedging, debt, pooling).
1208#[derive(Debug, Clone, Default)]
1209pub struct TreasurySnapshot {
1210    /// Cash positions (daily balances per account).
1211    pub cash_positions: Vec<CashPosition>,
1212    /// Cash forecasts.
1213    pub cash_forecasts: Vec<CashForecast>,
1214    /// Cash pools.
1215    pub cash_pools: Vec<CashPool>,
1216    /// Cash pool sweep transactions.
1217    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1218    /// Hedging instruments.
1219    pub hedging_instruments: Vec<HedgingInstrument>,
1220    /// Hedge relationships (ASC 815/IFRS 9 designations).
1221    pub hedge_relationships: Vec<HedgeRelationship>,
1222    /// Debt instruments.
1223    pub debt_instruments: Vec<DebtInstrument>,
1224    /// Bank guarantees and letters of credit.
1225    pub bank_guarantees: Vec<BankGuarantee>,
1226    /// Intercompany netting runs.
1227    pub netting_runs: Vec<NettingRun>,
1228    /// Treasury anomaly labels.
1229    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1230    /// Journal entries generated from treasury instruments (debt interest accruals,
1231    /// hedge MTM, cash pool sweeps).
1232    pub journal_entries: Vec<JournalEntry>,
1233}
1234
1235/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1236#[derive(Debug, Clone, Default)]
1237pub struct ProjectAccountingSnapshot {
1238    /// Projects with WBS hierarchies.
1239    pub projects: Vec<Project>,
1240    /// Project cost lines (linked from source documents).
1241    pub cost_lines: Vec<ProjectCostLine>,
1242    /// Revenue recognition records.
1243    pub revenue_records: Vec<ProjectRevenue>,
1244    /// Earned value metrics.
1245    pub earned_value_metrics: Vec<EarnedValueMetric>,
1246    /// Change orders.
1247    pub change_orders: Vec<ChangeOrder>,
1248    /// Project milestones.
1249    pub milestones: Vec<ProjectMilestone>,
1250}
1251
1252/// Complete result of enhanced generation run.
1253#[derive(Debug, Default)]
1254pub struct EnhancedGenerationResult {
1255    /// Generated chart of accounts.
1256    pub chart_of_accounts: ChartOfAccounts,
1257    /// Master data snapshot.
1258    pub master_data: MasterDataSnapshot,
1259    /// Document flow snapshot.
1260    pub document_flows: DocumentFlowSnapshot,
1261    /// Subledger snapshot (linked from document flows).
1262    pub subledger: SubledgerSnapshot,
1263    /// OCPM event log snapshot (if OCPM generation enabled).
1264    pub ocpm: OcpmSnapshot,
1265    /// Audit data snapshot (if audit generation enabled).
1266    pub audit: AuditSnapshot,
1267    /// Banking KYC/AML data snapshot (if banking generation enabled).
1268    pub banking: BankingSnapshot,
1269    /// Graph export snapshot (if graph export enabled).
1270    pub graph_export: GraphExportSnapshot,
1271    /// S2C sourcing data snapshot (if sourcing generation enabled).
1272    pub sourcing: SourcingSnapshot,
1273    /// Financial reporting snapshot (financial statements + bank reconciliations).
1274    pub financial_reporting: FinancialReportingSnapshot,
1275    /// HR data snapshot (payroll, time entries, expenses).
1276    pub hr: HrSnapshot,
1277    /// Accounting standards snapshot (revenue recognition, impairment).
1278    pub accounting_standards: AccountingStandardsSnapshot,
1279    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1280    pub manufacturing: ManufacturingSnapshot,
1281    /// Sales, KPI, and budget snapshot.
1282    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1283    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1284    pub tax: TaxSnapshot,
1285    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1286    pub esg: EsgSnapshot,
1287    /// Treasury data snapshot (cash management, hedging, debt).
1288    pub treasury: TreasurySnapshot,
1289    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1290    pub project_accounting: ProjectAccountingSnapshot,
1291    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1292    pub process_evolution: Vec<ProcessEvolutionEvent>,
1293    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1294    pub organizational_events: Vec<OrganizationalEvent>,
1295    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1296    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1297    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1298    pub intercompany: IntercompanySnapshot,
1299    /// Generated journal entries.
1300    pub journal_entries: Vec<JournalEntry>,
1301    /// Anomaly labels (if injection enabled).
1302    pub anomaly_labels: AnomalyLabels,
1303    /// Balance validation results (if validation enabled).
1304    pub balance_validation: BalanceValidationResult,
1305    /// Data quality statistics (if injection enabled).
1306    pub data_quality_stats: DataQualityStats,
1307    /// Data quality issue records (if injection enabled).
1308    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1309    /// Generation statistics.
1310    pub statistics: EnhancedGenerationStatistics,
1311    /// Data lineage graph (if tracking enabled).
1312    pub lineage: Option<super::lineage::LineageGraph>,
1313    /// Quality gate evaluation result.
1314    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1315    /// Internal controls (if controls generation enabled).
1316    pub internal_controls: Vec<InternalControl>,
1317    /// SoD (Segregation of Duties) violations identified during control application.
1318    ///
1319    /// Each record corresponds to a journal entry where `sod_violation == true`.
1320    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1321    /// Opening balances (if opening balance generation enabled).
1322    pub opening_balances: Vec<GeneratedOpeningBalance>,
1323    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1324    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1325    /// Counterfactual (original, mutated) JE pairs for ML training.
1326    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1327    /// Fraud red-flag indicators on P2P/O2C documents.
1328    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1329    /// Collusion rings (coordinated fraud networks).
1330    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1331    /// Bi-temporal version chains for vendor entities.
1332    pub temporal_vendor_chains:
1333        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1334    /// Entity relationship graph (nodes + edges with strength scores).
1335    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1336    /// Cross-process links (P2P ↔ O2C via inventory movements).
1337    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1338    /// Industry-specific GL accounts and metadata.
1339    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1340    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1341    pub compliance_regulations: ComplianceRegulationsSnapshot,
1342    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1343    /// industry benchmarks, management reports, drift events). Empty
1344    /// when `analytics_metadata.enabled = false`.
1345    pub analytics_metadata: AnalyticsMetadataSnapshot,
1346    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1347    /// KS) over the generated amount distribution.  `None` when
1348    /// `distributions.validation.enabled = false`.
1349    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1350    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1351    /// customer value-segment labels, and industry-specific metadata
1352    /// populated from the previously-inert `vendor_network`,
1353    /// `customer_segmentation`, and `industry_specific` schema
1354    /// sections. Empty when those sections are disabled.
1355    pub interconnectivity: InterconnectivitySnapshot,
1356}
1357
1358/// v4.1.3+: interconnectivity snapshot. Populated when
1359/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1360/// `industry_specific.enabled` are set. Holds tier / segment / industry
1361/// labels for generated entities so downstream tooling (graph export,
1362/// risk models) can consume them without re-deriving from scratch.
1363#[derive(Debug, Clone, Default)]
1364pub struct InterconnectivitySnapshot {
1365    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1366    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1367    pub vendor_tiers: Vec<(String, u8)>,
1368    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1369    /// `"reliable_strategic" / "standard_operational" / "transactional"
1370    /// / "problematic"`.
1371    pub vendor_clusters: Vec<(String, String)>,
1372    /// `(customer_id, value_segment)` pairs where value_segment is one
1373    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1374    pub customer_value_segments: Vec<(String, String)>,
1375    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1376    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1377    /// "churned" / "won_back"`.
1378    pub customer_lifecycle_stages: Vec<(String, String)>,
1379    /// Summary: industry-specific knob applied, if any (e.g.
1380    /// `"manufacturing.bom_depth=3"`).
1381    pub industry_metadata: Vec<String>,
1382}
1383
1384/// v3.3.0: snapshot for the analytics-metadata phase.
1385#[derive(Debug, Clone, Default)]
1386pub struct AnalyticsMetadataSnapshot {
1387    /// Prior-year comparative balances per account, per entity.
1388    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1389    /// Industry benchmarks for the configured industry.
1390    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1391    /// Management-report artefacts (dashboards, MDA sections).
1392    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1393    /// Drift-event labels emitted from the post-generation sweep.
1394    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1395}
1396
1397/// Enhanced statistics about a generation run.
1398#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1399pub struct EnhancedGenerationStatistics {
1400    /// Total journal entries generated.
1401    pub total_entries: u64,
1402    /// Total line items generated.
1403    pub total_line_items: u64,
1404    /// Number of accounts in CoA.
1405    pub accounts_count: usize,
1406    /// Number of companies.
1407    pub companies_count: usize,
1408    /// Period in months.
1409    pub period_months: u32,
1410    /// Master data counts.
1411    pub vendor_count: usize,
1412    pub customer_count: usize,
1413    pub material_count: usize,
1414    pub asset_count: usize,
1415    pub employee_count: usize,
1416    /// Document flow counts.
1417    pub p2p_chain_count: usize,
1418    pub o2c_chain_count: usize,
1419    /// Subledger counts.
1420    pub ap_invoice_count: usize,
1421    pub ar_invoice_count: usize,
1422    /// OCPM counts.
1423    pub ocpm_event_count: usize,
1424    pub ocpm_object_count: usize,
1425    pub ocpm_case_count: usize,
1426    /// Audit counts.
1427    pub audit_engagement_count: usize,
1428    pub audit_workpaper_count: usize,
1429    pub audit_evidence_count: usize,
1430    pub audit_risk_count: usize,
1431    pub audit_finding_count: usize,
1432    pub audit_judgment_count: usize,
1433    /// ISA 505 confirmation counts.
1434    #[serde(default)]
1435    pub audit_confirmation_count: usize,
1436    #[serde(default)]
1437    pub audit_confirmation_response_count: usize,
1438    /// ISA 330/530 procedure step and sample counts.
1439    #[serde(default)]
1440    pub audit_procedure_step_count: usize,
1441    #[serde(default)]
1442    pub audit_sample_count: usize,
1443    /// ISA 520 analytical procedure counts.
1444    #[serde(default)]
1445    pub audit_analytical_result_count: usize,
1446    /// ISA 610 internal audit counts.
1447    #[serde(default)]
1448    pub audit_ia_function_count: usize,
1449    #[serde(default)]
1450    pub audit_ia_report_count: usize,
1451    /// ISA 550 related party counts.
1452    #[serde(default)]
1453    pub audit_related_party_count: usize,
1454    #[serde(default)]
1455    pub audit_related_party_transaction_count: usize,
1456    /// Anomaly counts.
1457    pub anomalies_injected: usize,
1458    /// Data quality issue counts.
1459    pub data_quality_issues: usize,
1460    /// Banking counts.
1461    pub banking_customer_count: usize,
1462    pub banking_account_count: usize,
1463    pub banking_transaction_count: usize,
1464    pub banking_suspicious_count: usize,
1465    /// Graph export counts.
1466    pub graph_export_count: usize,
1467    pub graph_node_count: usize,
1468    pub graph_edge_count: usize,
1469    /// LLM enrichment timing (milliseconds).
1470    #[serde(default)]
1471    pub llm_enrichment_ms: u64,
1472    /// Number of vendor names enriched by LLM.
1473    #[serde(default)]
1474    pub llm_vendors_enriched: usize,
1475    /// v4.1.1+: number of customer names enriched by LLM.
1476    #[serde(default)]
1477    pub llm_customers_enriched: usize,
1478    /// v4.1.1+: number of material descriptions enriched by LLM.
1479    #[serde(default)]
1480    pub llm_materials_enriched: usize,
1481    /// v4.1.1+: number of audit finding titles enriched by LLM.
1482    #[serde(default)]
1483    pub llm_findings_enriched: usize,
1484    /// Diffusion enhancement timing (milliseconds).
1485    #[serde(default)]
1486    pub diffusion_enhancement_ms: u64,
1487    /// Number of diffusion samples generated.
1488    #[serde(default)]
1489    pub diffusion_samples_generated: usize,
1490    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1491    /// `None` when the neural/hybrid backend is not active.
1492    #[serde(default, skip_serializing_if = "Option::is_none")]
1493    pub neural_hybrid_weight: Option<f64>,
1494    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1495    #[serde(default, skip_serializing_if = "Option::is_none")]
1496    pub neural_hybrid_strategy: Option<String>,
1497    /// How many columns were routed through the neural backend.
1498    #[serde(default, skip_serializing_if = "Option::is_none")]
1499    pub neural_routed_column_count: Option<usize>,
1500    /// Causal generation timing (milliseconds).
1501    #[serde(default)]
1502    pub causal_generation_ms: u64,
1503    /// Number of causal samples generated.
1504    #[serde(default)]
1505    pub causal_samples_generated: usize,
1506    /// Whether causal validation passed.
1507    #[serde(default)]
1508    pub causal_validation_passed: Option<bool>,
1509    /// S2C sourcing counts.
1510    #[serde(default)]
1511    pub sourcing_project_count: usize,
1512    #[serde(default)]
1513    pub rfx_event_count: usize,
1514    #[serde(default)]
1515    pub bid_count: usize,
1516    #[serde(default)]
1517    pub contract_count: usize,
1518    #[serde(default)]
1519    pub catalog_item_count: usize,
1520    #[serde(default)]
1521    pub scorecard_count: usize,
1522    /// Financial reporting counts.
1523    #[serde(default)]
1524    pub financial_statement_count: usize,
1525    #[serde(default)]
1526    pub bank_reconciliation_count: usize,
1527    /// HR counts.
1528    #[serde(default)]
1529    pub payroll_run_count: usize,
1530    #[serde(default)]
1531    pub time_entry_count: usize,
1532    #[serde(default)]
1533    pub expense_report_count: usize,
1534    #[serde(default)]
1535    pub benefit_enrollment_count: usize,
1536    #[serde(default)]
1537    pub pension_plan_count: usize,
1538    #[serde(default)]
1539    pub stock_grant_count: usize,
1540    /// Accounting standards counts.
1541    #[serde(default)]
1542    pub revenue_contract_count: usize,
1543    #[serde(default)]
1544    pub impairment_test_count: usize,
1545    #[serde(default)]
1546    pub business_combination_count: usize,
1547    #[serde(default)]
1548    pub ecl_model_count: usize,
1549    #[serde(default)]
1550    pub provision_count: usize,
1551    /// Manufacturing counts.
1552    #[serde(default)]
1553    pub production_order_count: usize,
1554    #[serde(default)]
1555    pub quality_inspection_count: usize,
1556    #[serde(default)]
1557    pub cycle_count_count: usize,
1558    #[serde(default)]
1559    pub bom_component_count: usize,
1560    #[serde(default)]
1561    pub inventory_movement_count: usize,
1562    /// Sales & reporting counts.
1563    #[serde(default)]
1564    pub sales_quote_count: usize,
1565    #[serde(default)]
1566    pub kpi_count: usize,
1567    #[serde(default)]
1568    pub budget_line_count: usize,
1569    /// Tax counts.
1570    #[serde(default)]
1571    pub tax_jurisdiction_count: usize,
1572    #[serde(default)]
1573    pub tax_code_count: usize,
1574    /// ESG counts.
1575    #[serde(default)]
1576    pub esg_emission_count: usize,
1577    #[serde(default)]
1578    pub esg_disclosure_count: usize,
1579    /// Intercompany counts.
1580    #[serde(default)]
1581    pub ic_matched_pair_count: usize,
1582    #[serde(default)]
1583    pub ic_elimination_count: usize,
1584    /// Number of intercompany journal entries (seller + buyer side).
1585    #[serde(default)]
1586    pub ic_transaction_count: usize,
1587    /// Number of fixed asset subledger records.
1588    #[serde(default)]
1589    pub fa_subledger_count: usize,
1590    /// Number of inventory subledger records.
1591    #[serde(default)]
1592    pub inventory_subledger_count: usize,
1593    /// Treasury debt instrument count.
1594    #[serde(default)]
1595    pub treasury_debt_instrument_count: usize,
1596    /// Treasury hedging instrument count.
1597    #[serde(default)]
1598    pub treasury_hedging_instrument_count: usize,
1599    /// Project accounting project count.
1600    #[serde(default)]
1601    pub project_count: usize,
1602    /// Project accounting change order count.
1603    #[serde(default)]
1604    pub project_change_order_count: usize,
1605    /// Tax provision count.
1606    #[serde(default)]
1607    pub tax_provision_count: usize,
1608    /// Opening balance count.
1609    #[serde(default)]
1610    pub opening_balance_count: usize,
1611    /// Subledger reconciliation count.
1612    #[serde(default)]
1613    pub subledger_reconciliation_count: usize,
1614    /// Tax line count.
1615    #[serde(default)]
1616    pub tax_line_count: usize,
1617    /// Project cost line count.
1618    #[serde(default)]
1619    pub project_cost_line_count: usize,
1620    /// Cash position count.
1621    #[serde(default)]
1622    pub cash_position_count: usize,
1623    /// Cash forecast count.
1624    #[serde(default)]
1625    pub cash_forecast_count: usize,
1626    /// Cash pool count.
1627    #[serde(default)]
1628    pub cash_pool_count: usize,
1629    /// Process evolution event count.
1630    #[serde(default)]
1631    pub process_evolution_event_count: usize,
1632    /// Organizational event count.
1633    #[serde(default)]
1634    pub organizational_event_count: usize,
1635    /// Counterfactual pair count.
1636    #[serde(default)]
1637    pub counterfactual_pair_count: usize,
1638    /// Number of fraud red-flag indicators generated.
1639    #[serde(default)]
1640    pub red_flag_count: usize,
1641    /// Number of collusion rings generated.
1642    #[serde(default)]
1643    pub collusion_ring_count: usize,
1644    /// Number of bi-temporal vendor version chains generated.
1645    #[serde(default)]
1646    pub temporal_version_chain_count: usize,
1647    /// Number of nodes in the entity relationship graph.
1648    #[serde(default)]
1649    pub entity_relationship_node_count: usize,
1650    /// Number of edges in the entity relationship graph.
1651    #[serde(default)]
1652    pub entity_relationship_edge_count: usize,
1653    /// Number of cross-process links generated.
1654    #[serde(default)]
1655    pub cross_process_link_count: usize,
1656    /// Number of disruption events generated.
1657    #[serde(default)]
1658    pub disruption_event_count: usize,
1659    /// Number of industry-specific GL accounts generated.
1660    #[serde(default)]
1661    pub industry_gl_account_count: usize,
1662    /// Number of period-close journal entries generated (tax provision + closing entries).
1663    #[serde(default)]
1664    pub period_close_je_count: usize,
1665}
1666
1667/// Enhanced orchestrator with full feature integration.
1668pub struct EnhancedOrchestrator {
1669    config: GeneratorConfig,
1670    phase_config: PhaseConfig,
1671    coa: Option<Arc<ChartOfAccounts>>,
1672    master_data: MasterDataSnapshot,
1673    seed: u64,
1674    multi_progress: Option<MultiProgress>,
1675    /// Resource guard for memory, disk, and CPU monitoring
1676    resource_guard: ResourceGuard,
1677    /// Output path for disk space monitoring
1678    output_path: Option<PathBuf>,
1679    /// Copula generators for preserving correlations (from fingerprint)
1680    copula_generators: Vec<CopulaGeneratorSpec>,
1681    /// Country pack registry for localized data generation
1682    country_pack_registry: datasynth_core::CountryPackRegistry,
1683    /// Optional streaming sink for phase-by-phase output
1684    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1685    /// Shared template provider for user-supplied template packs.
1686    ///
1687    /// Constructed from `config.templates.path` at orchestrator creation
1688    /// time. When the path is `None`, this is still populated with an
1689    /// embedded-only provider so generators can always call trait methods
1690    /// without an `Option<…>` guard. v3.2.0+.
1691    template_provider: datasynth_core::templates::SharedTemplateProvider,
1692    /// v3.4.1+ temporal context for business-day / holiday awareness.
1693    ///
1694    /// Populated only when `temporal_patterns.business_days.enabled`. When
1695    /// `None`, document-flow / HR / treasury / period-close generators keep
1696    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1697    /// for the same seed).
1698    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1699    /// Optional shard-mode context (set by group-engine shard runners).
1700    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1701    shard_context: Option<crate::shard_context::ShardContext>,
1702}
1703
1704impl EnhancedOrchestrator {
1705    /// Create a new enhanced orchestrator.
1706    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1707        datasynth_config::validate_config(&config)?;
1708
1709        let seed = config.global.seed.unwrap_or_else(rand::random);
1710
1711        // Build resource guard from config
1712        let resource_guard = Self::build_resource_guard(&config, None);
1713
1714        // Build country pack registry from config
1715        let country_pack_registry = match &config.country_packs {
1716            Some(cp) => {
1717                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1718                    .map_err(|e| SynthError::config(e.to_string()))?
1719            }
1720            None => datasynth_core::CountryPackRegistry::builtin_only()
1721                .map_err(|e| SynthError::config(e.to_string()))?,
1722        };
1723
1724        // Build the shared template provider from config.templates.path.
1725        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1726        // `Some(path)` → load file/dir and honour `merge_strategy`.
1727        let template_provider = Self::build_template_provider(&config)?;
1728
1729        // v3.4.1: build a shared temporal context when
1730        // `temporal_patterns.business_days.enabled`. `None` preserves the
1731        // raw-RNG date-offset behaviour per-generator.
1732        let temporal_context = Self::build_temporal_context(&config)?;
1733
1734        Ok(Self {
1735            config,
1736            phase_config,
1737            coa: None,
1738            master_data: MasterDataSnapshot::default(),
1739            seed,
1740            multi_progress: None,
1741            resource_guard,
1742            output_path: None,
1743            copula_generators: Vec::new(),
1744            country_pack_registry,
1745            phase_sink: None,
1746            template_provider,
1747            temporal_context,
1748            shard_context: None,
1749        })
1750    }
1751
1752    /// Install shard-mode context.  Called by the group shard runner
1753    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1754    /// entry point).  Has no effect on single-entity runs.
1755    ///
1756    /// See [`crate::shard_context::ShardContext`] for rationale.
1757    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1758        self.shard_context = Some(ctx);
1759    }
1760
1761    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1762    ///
1763    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1764    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1765    /// enabled. Returns `Err` only for unrecoverable config errors.
1766    fn build_temporal_context(
1767        config: &GeneratorConfig,
1768    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1769        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1770
1771        let tp = &config.temporal_patterns;
1772        if !tp.enabled || !tp.business_days.enabled {
1773            return Ok(None);
1774        }
1775
1776        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1777            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1778        let end_date = start_date + chrono::Months::new(config.global.period_months);
1779
1780        let region_code = tp
1781            .calendars
1782            .regions
1783            .first()
1784            .cloned()
1785            .unwrap_or_else(|| "US".to_string());
1786        let region = parse_region_code(&region_code);
1787
1788        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1789    }
1790
1791    /// Build the shared template provider from `config.templates`.
1792    ///
1793    /// Always returns a provider — falls back to embedded-only when
1794    /// `config.templates.path` is `None`. The merge-strategy from config
1795    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1796    /// orchestrator-construction time are fatal (preferable to silently
1797    /// using embedded pools when the user supplied a bad path).
1798    fn build_template_provider(
1799        config: &GeneratorConfig,
1800    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1801        use datasynth_core::templates::{
1802            loader::{MergeStrategy, TemplateLoader},
1803            DefaultTemplateProvider,
1804        };
1805        use std::sync::Arc;
1806
1807        let provider = match &config.templates.path {
1808            None => DefaultTemplateProvider::new(),
1809            Some(path) => {
1810                let data = if path.is_dir() {
1811                    TemplateLoader::load_from_directory(path)
1812                } else {
1813                    TemplateLoader::load_from_file(path)
1814                }
1815                .map_err(|e| {
1816                    SynthError::config(format!(
1817                        "Failed to load templates from {}: {e}",
1818                        path.display()
1819                    ))
1820                })?;
1821                let strategy = match config.templates.merge_strategy {
1822                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1823                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1824                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1825                        MergeStrategy::MergePreferFile
1826                    }
1827                };
1828                DefaultTemplateProvider::with_templates(data, strategy)
1829            }
1830        };
1831        Ok(Arc::new(provider))
1832    }
1833
1834    /// Create with default phase config.
1835    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1836        Self::new(config, PhaseConfig::default())
1837    }
1838
1839    /// Set a streaming phase sink for real-time output (builder pattern).
1840    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1841        self.phase_sink = Some(sink);
1842        self
1843    }
1844
1845    /// Set a streaming phase sink on an existing orchestrator.
1846    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1847        self.phase_sink = Some(sink);
1848    }
1849
1850    /// Emit a batch of items to the phase sink (if configured).
1851    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1852        if let Some(ref sink) = self.phase_sink {
1853            for item in items {
1854                if let Ok(value) = serde_json::to_value(item) {
1855                    if let Err(e) = sink.emit(phase, type_name, &value) {
1856                        warn!(
1857                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1858                        );
1859                    }
1860                }
1861            }
1862            if let Err(e) = sink.phase_complete(phase) {
1863                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1864            }
1865        }
1866    }
1867
1868    /// Enable/disable progress bars.
1869    pub fn with_progress(mut self, show: bool) -> Self {
1870        self.phase_config.show_progress = show;
1871        if show {
1872            self.multi_progress = Some(MultiProgress::new());
1873        }
1874        self
1875    }
1876
1877    /// Set the output path for disk space monitoring.
1878    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1879        let path = path.into();
1880        self.output_path = Some(path.clone());
1881        // Rebuild resource guard with the output path
1882        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1883        self
1884    }
1885
1886    /// Access the country pack registry.
1887    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1888        &self.country_pack_registry
1889    }
1890
1891    /// Look up a country pack by country code string.
1892    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1893        self.country_pack_registry.get_by_str(country)
1894    }
1895
1896    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1897    /// company, defaulting to `"US"` if no companies are configured.
1898    fn primary_country_code(&self) -> &str {
1899        self.config
1900            .companies
1901            .first()
1902            .map(|c| c.country.as_str())
1903            .unwrap_or("US")
1904    }
1905
1906    /// Resolve the country pack for the primary (first) company.
1907    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1908        self.country_pack_for(self.primary_country_code())
1909    }
1910
1911    /// Resolve the CoA framework from config/country-pack.
1912    fn resolve_coa_framework(&self) -> CoAFramework {
1913        if self.config.accounting_standards.enabled {
1914            match self.config.accounting_standards.framework {
1915                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1916                    return CoAFramework::FrenchPcg;
1917                }
1918                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1919                    return CoAFramework::GermanSkr04;
1920                }
1921                _ => {}
1922            }
1923        }
1924        // Fallback: derive from country pack
1925        let pack = self.primary_pack();
1926        match pack.accounting.framework.as_str() {
1927            "french_gaap" => CoAFramework::FrenchPcg,
1928            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1929            _ => CoAFramework::UsGaap,
1930        }
1931    }
1932
1933    /// Check if copula generators are available.
1934    ///
1935    /// Returns true if the orchestrator has copula generators for preserving
1936    /// correlations (typically from fingerprint-based generation).
1937    pub fn has_copulas(&self) -> bool {
1938        !self.copula_generators.is_empty()
1939    }
1940
1941    /// Get the copula generators.
1942    ///
1943    /// Returns a reference to the copula generators for use during generation.
1944    /// These can be used to generate correlated samples that preserve the
1945    /// statistical relationships from the source data.
1946    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1947        &self.copula_generators
1948    }
1949
1950    /// Get a mutable reference to the copula generators.
1951    ///
1952    /// Allows generators to sample from copulas during data generation.
1953    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1954        &mut self.copula_generators
1955    }
1956
1957    /// Sample correlated values from a named copula.
1958    ///
1959    /// Returns None if the copula doesn't exist.
1960    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1961        self.copula_generators
1962            .iter_mut()
1963            .find(|c| c.name == copula_name)
1964            .map(|c| c.generator.sample())
1965    }
1966
1967    /// Create an orchestrator from a fingerprint file.
1968    ///
1969    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1970    /// and creates an orchestrator configured to generate data matching
1971    /// the statistical properties of the original data.
1972    ///
1973    /// # Arguments
1974    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1975    /// * `phase_config` - Phase configuration for generation
1976    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1977    ///
1978    /// # Example
1979    /// ```no_run
1980    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1981    /// use std::path::Path;
1982    ///
1983    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1984    ///     Path::new("fingerprint.dsf"),
1985    ///     PhaseConfig::default(),
1986    ///     1.0,
1987    /// ).unwrap();
1988    /// ```
1989    pub fn from_fingerprint(
1990        fingerprint_path: &std::path::Path,
1991        phase_config: PhaseConfig,
1992        scale: f64,
1993    ) -> SynthResult<Self> {
1994        info!("Loading fingerprint from: {}", fingerprint_path.display());
1995
1996        // Read the fingerprint
1997        let reader = FingerprintReader::new();
1998        let fingerprint = reader
1999            .read_from_file(fingerprint_path)
2000            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2001
2002        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2003    }
2004
2005    /// Create an orchestrator from a loaded fingerprint.
2006    ///
2007    /// # Arguments
2008    /// * `fingerprint` - The loaded fingerprint
2009    /// * `phase_config` - Phase configuration for generation
2010    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2011    pub fn from_fingerprint_data(
2012        fingerprint: Fingerprint,
2013        phase_config: PhaseConfig,
2014        scale: f64,
2015    ) -> SynthResult<Self> {
2016        info!(
2017            "Synthesizing config from fingerprint (version: {}, tables: {})",
2018            fingerprint.manifest.version,
2019            fingerprint.schema.tables.len()
2020        );
2021
2022        // Generate a seed for the synthesis
2023        let seed: u64 = rand::random();
2024        info!("Fingerprint synthesis seed: {}", seed);
2025
2026        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2027        let options = SynthesisOptions {
2028            scale,
2029            seed: Some(seed),
2030            preserve_correlations: true,
2031            inject_anomalies: true,
2032        };
2033        let synthesizer = ConfigSynthesizer::with_options(options);
2034
2035        // Synthesize full result including copula generators
2036        let synthesis_result = synthesizer
2037            .synthesize_full(&fingerprint, seed)
2038            .map_err(|e| {
2039                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2040            })?;
2041
2042        // Start with a base config from the fingerprint's industry if available
2043        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2044            Self::base_config_for_industry(industry)
2045        } else {
2046            Self::base_config_for_industry("manufacturing")
2047        };
2048
2049        // Apply the synthesized patches
2050        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2051
2052        // Log synthesis results
2053        info!(
2054            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2055            fingerprint.schema.tables.len(),
2056            scale,
2057            synthesis_result.copula_generators.len()
2058        );
2059
2060        if !synthesis_result.copula_generators.is_empty() {
2061            for spec in &synthesis_result.copula_generators {
2062                info!(
2063                    "  Copula '{}' for table '{}': {} columns",
2064                    spec.name,
2065                    spec.table,
2066                    spec.columns.len()
2067                );
2068            }
2069        }
2070
2071        // Create the orchestrator with the synthesized config
2072        let mut orchestrator = Self::new(config, phase_config)?;
2073
2074        // Store copula generators for use during generation
2075        orchestrator.copula_generators = synthesis_result.copula_generators;
2076
2077        Ok(orchestrator)
2078    }
2079
2080    /// Create a base config for a given industry.
2081    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2082        use datasynth_config::presets::create_preset;
2083        use datasynth_config::TransactionVolume;
2084        use datasynth_core::models::{CoAComplexity, IndustrySector};
2085
2086        let sector = match industry.to_lowercase().as_str() {
2087            "manufacturing" => IndustrySector::Manufacturing,
2088            "retail" => IndustrySector::Retail,
2089            "financial" | "financial_services" => IndustrySector::FinancialServices,
2090            "healthcare" => IndustrySector::Healthcare,
2091            "technology" | "tech" => IndustrySector::Technology,
2092            _ => IndustrySector::Manufacturing,
2093        };
2094
2095        // Create a preset with reasonable defaults
2096        create_preset(
2097            sector,
2098            1,  // company count
2099            12, // period months
2100            CoAComplexity::Medium,
2101            TransactionVolume::TenK,
2102        )
2103    }
2104
2105    /// Apply a config patch to a GeneratorConfig.
2106    fn apply_config_patch(
2107        mut config: GeneratorConfig,
2108        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2109    ) -> GeneratorConfig {
2110        use datasynth_fingerprint::synthesis::ConfigValue;
2111
2112        for (key, value) in patch.values() {
2113            match (key.as_str(), value) {
2114                // Transaction count is handled via TransactionVolume enum on companies
2115                // Log it but cannot directly set it (would need to modify company volumes)
2116                ("transactions.count", ConfigValue::Integer(n)) => {
2117                    info!(
2118                        "Fingerprint suggests {} transactions (apply via company volumes)",
2119                        n
2120                    );
2121                }
2122                ("global.period_months", ConfigValue::Integer(n)) => {
2123                    config.global.period_months = (*n).clamp(1, 120) as u32;
2124                }
2125                ("global.start_date", ConfigValue::String(s)) => {
2126                    config.global.start_date = s.clone();
2127                }
2128                ("global.seed", ConfigValue::Integer(n)) => {
2129                    config.global.seed = Some(*n as u64);
2130                }
2131                ("fraud.enabled", ConfigValue::Bool(b)) => {
2132                    config.fraud.enabled = *b;
2133                }
2134                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2135                    config.fraud.fraud_rate = *f;
2136                }
2137                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2138                    config.data_quality.enabled = *b;
2139                }
2140                // Handle anomaly injection paths (mapped to fraud config)
2141                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2142                    config.fraud.enabled = *b;
2143                }
2144                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2145                    config.fraud.fraud_rate = *f;
2146                }
2147                _ => {
2148                    debug!("Ignoring unknown config patch key: {}", key);
2149                }
2150            }
2151        }
2152
2153        config
2154    }
2155
2156    /// Build a resource guard from the configuration.
2157    fn build_resource_guard(
2158        config: &GeneratorConfig,
2159        output_path: Option<PathBuf>,
2160    ) -> ResourceGuard {
2161        let mut builder = ResourceGuardBuilder::new();
2162
2163        // Configure memory limit if set
2164        if config.global.memory_limit_mb > 0 {
2165            builder = builder.memory_limit(config.global.memory_limit_mb);
2166        }
2167
2168        // Configure disk monitoring for output path
2169        if let Some(path) = output_path {
2170            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2171        }
2172
2173        // Use conservative degradation settings for production safety
2174        builder = builder.conservative();
2175
2176        builder.build()
2177    }
2178
2179    /// Check resources (memory, disk, CPU) and return degradation level.
2180    ///
2181    /// Returns an error if hard limits are exceeded.
2182    /// Returns Ok(DegradationLevel) indicating current resource state.
2183    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2184        self.resource_guard.check()
2185    }
2186
2187    /// Check resources with logging.
2188    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2189        let level = self.resource_guard.check()?;
2190
2191        if level != DegradationLevel::Normal {
2192            warn!(
2193                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2194                phase,
2195                level,
2196                self.resource_guard.current_memory_mb(),
2197                self.resource_guard.available_disk_mb()
2198            );
2199        }
2200
2201        Ok(level)
2202    }
2203
2204    /// Get current degradation actions based on resource state.
2205    fn get_degradation_actions(&self) -> DegradationActions {
2206        self.resource_guard.get_actions()
2207    }
2208
2209    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2210    fn check_memory_limit(&self) -> SynthResult<()> {
2211        self.check_resources()?;
2212        Ok(())
2213    }
2214
2215    /// Run the complete generation workflow.
2216    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2217        info!("Starting enhanced generation workflow");
2218        info!(
2219            "Config: industry={:?}, period_months={}, companies={}",
2220            self.config.global.industry,
2221            self.config.global.period_months,
2222            self.config.companies.len()
2223        );
2224
2225        // Set decimal serialization mode (thread-local, affects JSON output).
2226        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2227        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2228        datasynth_core::serde_decimal::set_numeric_native(is_native);
2229        struct NumericModeGuard;
2230        impl Drop for NumericModeGuard {
2231            fn drop(&mut self) {
2232                datasynth_core::serde_decimal::set_numeric_native(false);
2233            }
2234        }
2235        let _numeric_guard = if is_native {
2236            Some(NumericModeGuard)
2237        } else {
2238            None
2239        };
2240
2241        // Initial resource check before starting
2242        let initial_level = self.check_resources_with_log("initial")?;
2243        if initial_level == DegradationLevel::Emergency {
2244            return Err(SynthError::resource(
2245                "Insufficient resources to start generation",
2246            ));
2247        }
2248
2249        let mut stats = EnhancedGenerationStatistics {
2250            companies_count: self.config.companies.len(),
2251            period_months: self.config.global.period_months,
2252            ..Default::default()
2253        };
2254
2255        // Phase 1: Chart of Accounts
2256        let coa = self.phase_chart_of_accounts(&mut stats)?;
2257
2258        // Phase 2: Master Data
2259        self.phase_master_data(&mut stats)?;
2260
2261        // Emit master data to stream sink
2262        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2263        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2264        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2265
2266        // Phase 3: Document Flows + Subledger Linking
2267        let (mut document_flows, mut subledger, fa_journal_entries) =
2268            self.phase_document_flows(&mut stats)?;
2269
2270        // Emit document flows to stream sink
2271        self.emit_phase_items(
2272            "document_flows",
2273            "PurchaseOrder",
2274            &document_flows.purchase_orders,
2275        );
2276        self.emit_phase_items(
2277            "document_flows",
2278            "GoodsReceipt",
2279            &document_flows.goods_receipts,
2280        );
2281        self.emit_phase_items(
2282            "document_flows",
2283            "VendorInvoice",
2284            &document_flows.vendor_invoices,
2285        );
2286        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2287        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2288
2289        // Phase 3b: Opening Balances (before JE generation)
2290        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2291
2292        // Phase 3c: Convert opening balances to journal entries and prepend them.
2293        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2294        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2295        // balance map type.
2296        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2297            .iter()
2298            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2299            .collect();
2300        if !opening_balance_jes.is_empty() {
2301            debug!(
2302                "Prepending {} opening balance JEs to entries",
2303                opening_balance_jes.len()
2304            );
2305        }
2306
2307        // Phase 4: Journal Entries
2308        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2309
2310        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2311        // starts from the correct initial state.
2312        if !opening_balance_jes.is_empty() {
2313            let mut combined = opening_balance_jes;
2314            combined.extend(entries);
2315            entries = combined;
2316        }
2317
2318        // Phase 4c: Append FA acquisition journal entries to main entries
2319        if !fa_journal_entries.is_empty() {
2320            debug!(
2321                "Appending {} FA acquisition JEs to main entries",
2322                fa_journal_entries.len()
2323            );
2324            entries.extend(fa_journal_entries);
2325        }
2326
2327        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2328        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2329
2330        // Get current degradation actions for optional phases
2331        let actions = self.get_degradation_actions();
2332
2333        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2334        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2335
2336        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2337        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2338        if !sourcing.contracts.is_empty() {
2339            let mut linked_count = 0usize;
2340            // Collect (vendor_id, po_id) pairs from P2P chains
2341            let po_vendor_pairs: Vec<(String, String)> = document_flows
2342                .p2p_chains
2343                .iter()
2344                .map(|chain| {
2345                    (
2346                        chain.purchase_order.vendor_id.clone(),
2347                        chain.purchase_order.header.document_id.clone(),
2348                    )
2349                })
2350                .collect();
2351
2352            for chain in &mut document_flows.p2p_chains {
2353                if chain.purchase_order.contract_id.is_none() {
2354                    if let Some(contract) = sourcing
2355                        .contracts
2356                        .iter()
2357                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2358                    {
2359                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2360                        linked_count += 1;
2361                    }
2362                }
2363            }
2364
2365            // Populate reverse FK: purchase_order_ids on each contract
2366            for contract in &mut sourcing.contracts {
2367                let po_ids: Vec<String> = po_vendor_pairs
2368                    .iter()
2369                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2370                    .map(|(_, po_id)| po_id.clone())
2371                    .collect();
2372                if !po_ids.is_empty() {
2373                    contract.purchase_order_ids = po_ids;
2374                }
2375            }
2376
2377            if linked_count > 0 {
2378                debug!(
2379                    "Linked {} purchase orders to S2C contracts by vendor match",
2380                    linked_count
2381                );
2382            }
2383        }
2384
2385        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2386        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2387
2388        // Phase 5c: Append IC journal entries to main entries
2389        if !intercompany.seller_journal_entries.is_empty()
2390            || !intercompany.buyer_journal_entries.is_empty()
2391        {
2392            let ic_je_count = intercompany.seller_journal_entries.len()
2393                + intercompany.buyer_journal_entries.len();
2394            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2395            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2396            debug!(
2397                "Appended {} IC journal entries to main entries",
2398                ic_je_count
2399            );
2400        }
2401
2402        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2403        if !intercompany.elimination_entries.is_empty() {
2404            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2405                &intercompany.elimination_entries,
2406            );
2407            if !elim_jes.is_empty() {
2408                debug!(
2409                    "Appended {} elimination journal entries to main entries",
2410                    elim_jes.len()
2411                );
2412                // IC elimination net-zero assertion (v2.5 hardening)
2413                let elim_debit: rust_decimal::Decimal =
2414                    elim_jes.iter().map(|je| je.total_debit()).sum();
2415                let elim_credit: rust_decimal::Decimal =
2416                    elim_jes.iter().map(|je| je.total_credit()).sum();
2417                let elim_diff = (elim_debit - elim_credit).abs();
2418                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2419                if elim_diff > tolerance {
2420                    return Err(datasynth_core::error::SynthError::generation(format!(
2421                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2422                        elim_debit, elim_credit, elim_diff, tolerance
2423                    )));
2424                }
2425                debug!(
2426                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2427                    elim_debit, elim_credit, elim_diff
2428                );
2429                entries.extend(elim_jes);
2430            }
2431        }
2432
2433        // Phase 5e: Wire IC source documents into document flow snapshot
2434        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2435            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2436                document_flows
2437                    .customer_invoices
2438                    .extend(ic_docs.seller_invoices.iter().cloned());
2439                document_flows
2440                    .purchase_orders
2441                    .extend(ic_docs.buyer_orders.iter().cloned());
2442                document_flows
2443                    .goods_receipts
2444                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2445                document_flows
2446                    .vendor_invoices
2447                    .extend(ic_docs.buyer_invoices.iter().cloned());
2448                debug!(
2449                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2450                    ic_docs.seller_invoices.len(),
2451                    ic_docs.buyer_orders.len(),
2452                    ic_docs.buyer_goods_receipts.len(),
2453                    ic_docs.buyer_invoices.len(),
2454                );
2455            }
2456        }
2457
2458        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2459        let hr = self.phase_hr_data(&mut stats)?;
2460
2461        // Phase 6b: Generate JEs from payroll runs
2462        if !hr.payroll_runs.is_empty() {
2463            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2464            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2465            entries.extend(payroll_jes);
2466        }
2467
2468        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2469        if !hr.pension_journal_entries.is_empty() {
2470            debug!(
2471                "Generated {} JEs from pension plans",
2472                hr.pension_journal_entries.len()
2473            );
2474            entries.extend(hr.pension_journal_entries.iter().cloned());
2475        }
2476
2477        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2478        if !hr.stock_comp_journal_entries.is_empty() {
2479            debug!(
2480                "Generated {} JEs from stock-based compensation",
2481                hr.stock_comp_journal_entries.len()
2482            );
2483            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2484        }
2485
2486        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2487        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2488
2489        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2490        if !manufacturing_snap.production_orders.is_empty() {
2491            let currency = self
2492                .config
2493                .companies
2494                .first()
2495                .map(|c| c.currency.as_str())
2496                .unwrap_or("USD");
2497            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2498                &manufacturing_snap.production_orders,
2499                &manufacturing_snap.quality_inspections,
2500                currency,
2501            );
2502            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2503            entries.extend(mfg_jes);
2504        }
2505
2506        // Phase 7a-warranty: Generate warranty provisions per company
2507        if !manufacturing_snap.quality_inspections.is_empty() {
2508            let framework = match self.config.accounting_standards.framework {
2509                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2510                _ => "US_GAAP",
2511            };
2512            for company in &self.config.companies {
2513                let company_orders: Vec<_> = manufacturing_snap
2514                    .production_orders
2515                    .iter()
2516                    .filter(|o| o.company_code == company.code)
2517                    .cloned()
2518                    .collect();
2519                let company_inspections: Vec<_> = manufacturing_snap
2520                    .quality_inspections
2521                    .iter()
2522                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2523                    .cloned()
2524                    .collect();
2525                if company_inspections.is_empty() {
2526                    continue;
2527                }
2528                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2529                let warranty_result = warranty_gen.generate(
2530                    &company.code,
2531                    &company_orders,
2532                    &company_inspections,
2533                    &company.currency,
2534                    framework,
2535                );
2536                if !warranty_result.journal_entries.is_empty() {
2537                    debug!(
2538                        "Generated {} warranty provision JEs for {}",
2539                        warranty_result.journal_entries.len(),
2540                        company.code
2541                    );
2542                    entries.extend(warranty_result.journal_entries);
2543                }
2544            }
2545        }
2546
2547        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2548        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2549        {
2550            let cogs_currency = self
2551                .config
2552                .companies
2553                .first()
2554                .map(|c| c.currency.as_str())
2555                .unwrap_or("USD");
2556            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2557                &document_flows.deliveries,
2558                &manufacturing_snap.production_orders,
2559                cogs_currency,
2560            );
2561            if !cogs_jes.is_empty() {
2562                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2563                entries.extend(cogs_jes);
2564            }
2565        }
2566
2567        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2568        //
2569        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2570        // subledger inventory positions.  Here we reconcile them so that position balances
2571        // reflect the actual stock movements within the generation period.
2572        if !manufacturing_snap.inventory_movements.is_empty()
2573            && !subledger.inventory_positions.is_empty()
2574        {
2575            use datasynth_core::models::MovementType as MfgMovementType;
2576            let mut receipt_count = 0usize;
2577            let mut issue_count = 0usize;
2578            for movement in &manufacturing_snap.inventory_movements {
2579                // Find a matching position by material code and company
2580                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2581                    p.material_id == movement.material_code
2582                        && p.company_code == movement.entity_code
2583                }) {
2584                    match movement.movement_type {
2585                        MfgMovementType::GoodsReceipt => {
2586                            // Increase stock and update weighted-average cost
2587                            pos.add_quantity(
2588                                movement.quantity,
2589                                movement.value,
2590                                movement.movement_date,
2591                            );
2592                            receipt_count += 1;
2593                        }
2594                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2595                            // Decrease stock (best-effort; silently skip if insufficient)
2596                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2597                            issue_count += 1;
2598                        }
2599                        _ => {}
2600                    }
2601                }
2602            }
2603            debug!(
2604                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2605                manufacturing_snap.inventory_movements.len(),
2606                receipt_count,
2607                issue_count,
2608            );
2609        }
2610
2611        // Update final entry/line-item stats after all JE-generating phases
2612        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2613        if !entries.is_empty() {
2614            stats.total_entries = entries.len() as u64;
2615            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2616            debug!(
2617                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2618                stats.total_entries, stats.total_line_items
2619            );
2620        }
2621
2622        // Phase 7b: Apply internal controls to journal entries
2623        if self.config.internal_controls.enabled && !entries.is_empty() {
2624            info!("Phase 7b: Applying internal controls to journal entries");
2625            let control_config = ControlGeneratorConfig {
2626                exception_rate: self.config.internal_controls.exception_rate,
2627                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2628                enable_sox_marking: true,
2629                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2630                    self.config.internal_controls.sox_materiality_threshold,
2631                )
2632                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2633                ..Default::default()
2634            };
2635            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2636            for entry in &mut entries {
2637                control_gen.apply_controls(entry, &coa);
2638            }
2639            let with_controls = entries
2640                .iter()
2641                .filter(|e| !e.header.control_ids.is_empty())
2642                .count();
2643            info!(
2644                "Applied controls to {} entries ({} with control IDs assigned)",
2645                entries.len(),
2646                with_controls
2647            );
2648        }
2649
2650        // Phase 7c: Extract SoD violations from annotated journal entries.
2651        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2652        // Here we materialise those flags into standalone SodViolation records.
2653        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2654            .iter()
2655            .filter(|e| e.header.sod_violation)
2656            .filter_map(|e| {
2657                e.header.sod_conflict_type.map(|ct| {
2658                    use datasynth_core::models::{RiskLevel, SodViolation};
2659                    let severity = match ct {
2660                        datasynth_core::models::SodConflictType::PaymentReleaser
2661                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2662                            RiskLevel::Critical
2663                        }
2664                        datasynth_core::models::SodConflictType::PreparerApprover
2665                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2666                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2667                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2668                            RiskLevel::High
2669                        }
2670                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2671                            RiskLevel::Medium
2672                        }
2673                    };
2674                    let action = format!(
2675                        "SoD conflict {:?} on entry {} ({})",
2676                        ct, e.header.document_id, e.header.company_code
2677                    );
2678                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2679                })
2680            })
2681            .collect();
2682        if !sod_violations.is_empty() {
2683            info!(
2684                "Phase 7c: Extracted {} SoD violations from {} entries",
2685                sod_violations.len(),
2686                entries.len()
2687            );
2688        }
2689
2690        // Emit journal entries to stream sink (after all JE-generating phases)
2691        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2692
2693        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2694        //
2695        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2696        // document-level fraud are exempt from subsequent line-level flag
2697        // overwrites, and so downstream consumers see a coherent picture.
2698        //
2699        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2700        {
2701            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2702            if self.config.fraud.enabled && doc_rate > 0.0 {
2703                use datasynth_core::fraud_propagation::{
2704                    inject_document_fraud, propagate_documents_to_entries,
2705                };
2706                use datasynth_core::utils::weighted_select;
2707                use datasynth_core::FraudType;
2708                use rand_chacha::rand_core::SeedableRng;
2709
2710                let dist = &self.config.fraud.fraud_type_distribution;
2711                let fraud_type_weights: [(FraudType, f64); 8] = [
2712                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2713                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2714                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2715                    (
2716                        FraudType::ImproperCapitalization,
2717                        dist.expense_capitalization,
2718                    ),
2719                    (FraudType::SplitTransaction, dist.split_transaction),
2720                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2721                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2722                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2723                ];
2724                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2725                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2726                    if weights_sum <= 0.0 {
2727                        FraudType::FictitiousEntry
2728                    } else {
2729                        *weighted_select(rng, &fraud_type_weights)
2730                    }
2731                };
2732
2733                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2734                let mut doc_tagged = 0usize;
2735                macro_rules! inject_into {
2736                    ($collection:expr) => {{
2737                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2738                            $collection.iter_mut().map(|d| &mut d.header).collect();
2739                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2740                    }};
2741                }
2742                inject_into!(document_flows.purchase_orders);
2743                inject_into!(document_flows.goods_receipts);
2744                inject_into!(document_flows.vendor_invoices);
2745                inject_into!(document_flows.payments);
2746                inject_into!(document_flows.sales_orders);
2747                inject_into!(document_flows.deliveries);
2748                inject_into!(document_flows.customer_invoices);
2749                if doc_tagged > 0 {
2750                    info!(
2751                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2752                    );
2753                }
2754
2755                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2756                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2757                        Vec::new();
2758                    headers.extend(
2759                        document_flows
2760                            .purchase_orders
2761                            .iter()
2762                            .map(|d| d.header.clone()),
2763                    );
2764                    headers.extend(
2765                        document_flows
2766                            .goods_receipts
2767                            .iter()
2768                            .map(|d| d.header.clone()),
2769                    );
2770                    headers.extend(
2771                        document_flows
2772                            .vendor_invoices
2773                            .iter()
2774                            .map(|d| d.header.clone()),
2775                    );
2776                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2777                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2778                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2779                    headers.extend(
2780                        document_flows
2781                            .customer_invoices
2782                            .iter()
2783                            .map(|d| d.header.clone()),
2784                    );
2785                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2786                    if propagated > 0 {
2787                        info!(
2788                            "Propagated document-level fraud to {propagated} derived journal entries"
2789                        );
2790                    }
2791                }
2792            }
2793        }
2794
2795        // Phase 8: Anomaly Injection (after all JE-generating phases)
2796        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2797
2798        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2799        // through the anomaly injector.
2800        //
2801        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2802        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2803        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2804        //   - Any external mutation that sets is_fraud after the fact
2805        //
2806        // The anomaly injector already applies the same bias inline when it
2807        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2808        // so gating this sweep on `!is_anomaly` avoids double-application.
2809        //
2810        // Without this sweep, fraud entries from these paths show 0 lift on
2811        // the canonical forensic signals (is_round_1000, is_off_hours,
2812        // is_weekend, is_post_close), which is exactly what the SDK-side
2813        // evaluator caught in v3.1 — fraud features had worse lift than
2814        // baseline. See DS-3.1 post-deploy feedback.
2815        {
2816            use datasynth_core::fraud_bias::{
2817                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2818            };
2819            use rand_chacha::rand_core::SeedableRng;
2820            let cfg = FraudBehavioralBiasConfig::default();
2821            if cfg.enabled {
2822                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2823                let mut swept = 0usize;
2824                for entry in entries.iter_mut() {
2825                    if entry.header.is_fraud && !entry.header.is_anomaly {
2826                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2827                        swept += 1;
2828                    }
2829                }
2830                if swept > 0 {
2831                    info!(
2832                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2833                         (doc-propagated + je_generator intrinsic fraud)"
2834                    );
2835                }
2836            }
2837        }
2838
2839        // Emit anomaly labels to stream sink
2840        self.emit_phase_items(
2841            "anomaly_injection",
2842            "LabeledAnomaly",
2843            &anomaly_labels.labels,
2844        );
2845
2846        // Propagate fraud labels from journal entries to source documents.
2847        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2848        // instead of tracing through document_references.json.
2849        //
2850        // Gated by `fraud.propagate_to_document` (default true) — disable when
2851        // downstream consumers want document fraud flags to reflect only
2852        // document-level injection, not line-level.
2853        if self.config.fraud.propagate_to_document {
2854            use std::collections::HashMap;
2855            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2856            //
2857            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2858            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2859            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2860            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2861            // we register BOTH the prefixed form (raw reference) AND the bare form
2862            // (post-colon portion) in the map. Also register the JE's document_id
2863            // UUID so documents that set `journal_entry_id` match via that path.
2864            //
2865            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2866            // looked up "foo", silently producing 0 propagations.
2867            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2868            for je in &entries {
2869                if je.header.is_fraud {
2870                    if let Some(ref fraud_type) = je.header.fraud_type {
2871                        if let Some(ref reference) = je.header.reference {
2872                            // Register the full reference ("GR:PO-2024-000001")
2873                            fraud_map.insert(reference.clone(), *fraud_type);
2874                            // Also register the bare document ID ("PO-2024-000001")
2875                            // by stripping the "PREFIX:" if present.
2876                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2877                                if !bare.is_empty() {
2878                                    fraud_map.insert(bare.to_string(), *fraud_type);
2879                                }
2880                            }
2881                        }
2882                        // Also tag via journal_entry_id on document headers
2883                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2884                    }
2885                }
2886            }
2887            if !fraud_map.is_empty() {
2888                let mut propagated = 0usize;
2889                // Use DocumentHeader::propagate_fraud method for each doc type
2890                macro_rules! propagate_to {
2891                    ($collection:expr) => {
2892                        for doc in &mut $collection {
2893                            if doc.header.propagate_fraud(&fraud_map) {
2894                                propagated += 1;
2895                            }
2896                        }
2897                    };
2898                }
2899                propagate_to!(document_flows.purchase_orders);
2900                propagate_to!(document_flows.goods_receipts);
2901                propagate_to!(document_flows.vendor_invoices);
2902                propagate_to!(document_flows.payments);
2903                propagate_to!(document_flows.sales_orders);
2904                propagate_to!(document_flows.deliveries);
2905                propagate_to!(document_flows.customer_invoices);
2906                if propagated > 0 {
2907                    info!(
2908                        "Propagated fraud labels to {} document flow records",
2909                        propagated
2910                    );
2911                }
2912            }
2913        }
2914
2915        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2916        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2917
2918        // Emit red flags to stream sink
2919        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2920
2921        // Phase 26b: Collusion Ring Generation (after red flags)
2922        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2923
2924        // Emit collusion rings to stream sink
2925        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2926
2927        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2928        let balance_validation = self.phase_balance_validation(&entries)?;
2929
2930        // Phase 9b: GL-to-Subledger Reconciliation
2931        let subledger_reconciliation =
2932            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2933
2934        // Phase 10: Data Quality Injection
2935        let (data_quality_stats, quality_issues) =
2936            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2937
2938        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2939        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2940
2941        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2942        {
2943            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2944
2945            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2946            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2947            let mut unbalanced_clean = 0usize;
2948            for je in &entries {
2949                if je.header.is_fraud || je.header.is_anomaly {
2950                    continue;
2951                }
2952                let diff = (je.total_debit() - je.total_credit()).abs();
2953                if diff > tolerance {
2954                    unbalanced_clean += 1;
2955                    if unbalanced_clean <= 3 {
2956                        warn!(
2957                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2958                            je.header.document_id,
2959                            je.total_debit(),
2960                            je.total_credit(),
2961                            diff
2962                        );
2963                    }
2964                }
2965            }
2966            if unbalanced_clean > 0 {
2967                return Err(datasynth_core::error::SynthError::generation(format!(
2968                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2969                     First few logged above. Tolerance={}",
2970                    unbalanced_clean, tolerance
2971                )));
2972            }
2973            debug!(
2974                "Phase 10c: All {} non-anomaly JEs individually balanced",
2975                entries
2976                    .iter()
2977                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2978                    .count()
2979            );
2980
2981            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2982            let company_codes: Vec<String> = self
2983                .config
2984                .companies
2985                .iter()
2986                .map(|c| c.code.clone())
2987                .collect();
2988            for company_code in &company_codes {
2989                let mut assets = rust_decimal::Decimal::ZERO;
2990                let mut liab_equity = rust_decimal::Decimal::ZERO;
2991
2992                for entry in &entries {
2993                    if entry.header.company_code != *company_code {
2994                        continue;
2995                    }
2996                    for line in &entry.lines {
2997                        let acct = &line.gl_account;
2998                        let net = line.debit_amount - line.credit_amount;
2999                        // Asset accounts (1xxx): normal debit balance
3000                        if acct.starts_with('1') {
3001                            assets += net;
3002                        }
3003                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3004                        else if acct.starts_with('2') || acct.starts_with('3') {
3005                            liab_equity -= net; // credit-normal, so negate debit-net
3006                        }
3007                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3008                        // so they net to zero after closing entries
3009                    }
3010                }
3011
3012                let bs_diff = (assets - liab_equity).abs();
3013                if bs_diff > tolerance {
3014                    warn!(
3015                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3016                         revenue/expense closing entries may not fully offset",
3017                        company_code, assets, liab_equity, bs_diff
3018                    );
3019                    // Warn rather than error: multi-period datasets may have timing
3020                    // differences from accruals/deferrals that resolve in later periods.
3021                    // The TB footing check (Assert 1) is the hard gate.
3022                } else {
3023                    debug!(
3024                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3025                        company_code, assets, liab_equity, bs_diff
3026                    );
3027                }
3028            }
3029
3030            info!("Phase 10c: All generation-time accounting assertions passed");
3031        }
3032
3033        // Phase 11: Audit Data
3034        let audit = self.phase_audit_data(&entries, &mut stats)?;
3035
3036        // Phase 12: Banking KYC/AML Data
3037        let mut banking = self.phase_banking_data(&mut stats)?;
3038
3039        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3040        // Creates coherence between the accounting layer (payments, JEs) and the
3041        // banking layer (bank transactions). A vendor invoice payment now appears
3042        // on both sides with cross-references and fraud labels propagated.
3043        if self.phase_config.generate_banking
3044            && !document_flows.payments.is_empty()
3045            && !banking.accounts.is_empty()
3046        {
3047            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3048            if bridge_rate > 0.0 {
3049                let mut bridge =
3050                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3051                        self.seed,
3052                    );
3053                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3054                    &document_flows.payments,
3055                    &banking.customers,
3056                    &banking.accounts,
3057                    bridge_rate,
3058                );
3059                info!(
3060                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3061                    bridge_stats.bridged_count,
3062                    bridge_stats.transactions_emitted,
3063                    bridge_stats.fraud_propagated,
3064                );
3065                let bridged_count = bridged_txns.len();
3066                banking.transactions.extend(bridged_txns);
3067
3068                // Re-run velocity computation so bridged txns also get features
3069                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3070                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3071                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3072                        &mut banking.transactions,
3073                    );
3074                }
3075
3076                // Recompute suspicious count after bridging
3077                banking.suspicious_count = banking
3078                    .transactions
3079                    .iter()
3080                    .filter(|t| t.is_suspicious)
3081                    .count();
3082                stats.banking_transaction_count = banking.transactions.len();
3083                stats.banking_suspicious_count = banking.suspicious_count;
3084            }
3085        }
3086
3087        // Phase 13: Graph Export
3088        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3089
3090        // Phase 14: LLM Enrichment
3091        self.phase_llm_enrichment(&mut stats);
3092
3093        // Phase 15: Diffusion Enhancement
3094        self.phase_diffusion_enhancement(&entries, &mut stats);
3095
3096        // Phase 16: Causal Overlay
3097        self.phase_causal_overlay(&mut stats);
3098
3099        // Phase 17: Bank Reconciliation + Financial Statements
3100        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3101        // provision data (from accounting_standards / tax snapshots) can be wired in.
3102        let mut financial_reporting = self.phase_financial_reporting(
3103            &document_flows,
3104            &entries,
3105            &coa,
3106            &hr,
3107            &audit,
3108            &mut stats,
3109        )?;
3110
3111        // BS coherence check: assets = liabilities + equity
3112        {
3113            use datasynth_core::models::StatementType;
3114            for stmt in &financial_reporting.consolidated_statements {
3115                if stmt.statement_type == StatementType::BalanceSheet {
3116                    let total_assets: rust_decimal::Decimal = stmt
3117                        .line_items
3118                        .iter()
3119                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3120                        .map(|li| li.amount)
3121                        .sum();
3122                    let total_le: rust_decimal::Decimal = stmt
3123                        .line_items
3124                        .iter()
3125                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3126                        .map(|li| li.amount)
3127                        .sum();
3128                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3129                        warn!(
3130                            "BS equation imbalance: assets={}, L+E={}",
3131                            total_assets, total_le
3132                        );
3133                    }
3134                }
3135            }
3136        }
3137
3138        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3139        let accounting_standards =
3140            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3141
3142        // Phase 18a: Merge ECL journal entries into main GL
3143        if !accounting_standards.ecl_journal_entries.is_empty() {
3144            debug!(
3145                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3146                accounting_standards.ecl_journal_entries.len()
3147            );
3148            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3149        }
3150
3151        // Phase 18a: Merge provision journal entries into main GL
3152        if !accounting_standards.provision_journal_entries.is_empty() {
3153            debug!(
3154                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3155                accounting_standards.provision_journal_entries.len()
3156            );
3157            entries.extend(
3158                accounting_standards
3159                    .provision_journal_entries
3160                    .iter()
3161                    .cloned(),
3162            );
3163        }
3164
3165        // Phase 18b: OCPM Events (after all process data is available)
3166        let mut ocpm = self.phase_ocpm_events(
3167            &document_flows,
3168            &sourcing,
3169            &hr,
3170            &manufacturing_snap,
3171            &banking,
3172            &audit,
3173            &financial_reporting,
3174            &mut stats,
3175        )?;
3176
3177        // Emit OCPM events to stream sink
3178        if let Some(ref event_log) = ocpm.event_log {
3179            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3180        }
3181
3182        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3183        if let Some(ref event_log) = ocpm.event_log {
3184            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3185            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3186                std::collections::HashMap::new();
3187            for (idx, event) in event_log.events.iter().enumerate() {
3188                if let Some(ref doc_ref) = event.document_ref {
3189                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3190                }
3191            }
3192
3193            if !doc_index.is_empty() {
3194                let mut annotated = 0usize;
3195                for entry in &mut entries {
3196                    let doc_id_str = entry.header.document_id.to_string();
3197                    // Collect matching event indices from document_id and reference
3198                    let mut matched_indices: Vec<usize> = Vec::new();
3199                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3200                        matched_indices.extend(indices);
3201                    }
3202                    if let Some(ref reference) = entry.header.reference {
3203                        let bare_ref = reference
3204                            .find(':')
3205                            .map(|i| &reference[i + 1..])
3206                            .unwrap_or(reference.as_str());
3207                        if let Some(indices) = doc_index.get(bare_ref) {
3208                            for &idx in indices {
3209                                if !matched_indices.contains(&idx) {
3210                                    matched_indices.push(idx);
3211                                }
3212                            }
3213                        }
3214                    }
3215                    // Apply matches to JE header
3216                    if !matched_indices.is_empty() {
3217                        for &idx in &matched_indices {
3218                            let event = &event_log.events[idx];
3219                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3220                                entry.header.ocpm_event_ids.push(event.event_id);
3221                            }
3222                            for obj_ref in &event.object_refs {
3223                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3224                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3225                                }
3226                            }
3227                            if entry.header.ocpm_case_id.is_none() {
3228                                entry.header.ocpm_case_id = event.case_id;
3229                            }
3230                        }
3231                        annotated += 1;
3232                    }
3233                }
3234                debug!(
3235                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3236                    annotated
3237                );
3238            }
3239        }
3240
3241        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3242        // IC eliminations, opening balances, standards-driven entries) so
3243        // every JournalEntry carries at least one `ocpm_event_ids` link.
3244        if let Some(ref mut event_log) = ocpm.event_log {
3245            let synthesized =
3246                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3247            if synthesized > 0 {
3248                info!(
3249                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3250                );
3251            }
3252
3253            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3254            // events and their owning CaseTrace. Without this, every exported
3255            // OCEL event has `is_anomaly = false` even when the underlying JE
3256            // was flagged.
3257            let anomaly_events =
3258                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3259            if anomaly_events > 0 {
3260                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3261            }
3262
3263            // Phase 18f: Inject process-variant imperfections (rework, skipped
3264            // steps, out-of-order events) so conformance checkers see
3265            // realistic variant counts and fitness < 1.0. Uses the P2P
3266            // process rates as the single source of truth.
3267            let p2p_cfg = &self.config.ocpm.p2p_process;
3268            let any_imperfection = p2p_cfg.rework_probability > 0.0
3269                || p2p_cfg.skip_step_probability > 0.0
3270                || p2p_cfg.out_of_order_probability > 0.0;
3271            if any_imperfection {
3272                use rand_chacha::rand_core::SeedableRng;
3273                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3274                    rework_rate: p2p_cfg.rework_probability,
3275                    skip_rate: p2p_cfg.skip_step_probability,
3276                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3277                };
3278                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3279                let stats =
3280                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3281                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3282                    info!(
3283                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3284                        stats.rework, stats.skipped, stats.out_of_order
3285                    );
3286                }
3287            }
3288        }
3289
3290        // Phase 19: Sales Quotes, Management KPIs, Budgets
3291        let sales_kpi_budgets =
3292            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3293
3294        // Phase 22: Treasury Data Generation
3295        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3296        // are included in the pre-tax income used by phase_tax_generation.
3297        let treasury =
3298            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3299
3300        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3301        if !treasury.journal_entries.is_empty() {
3302            debug!(
3303                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3304                treasury.journal_entries.len()
3305            );
3306            entries.extend(treasury.journal_entries.iter().cloned());
3307        }
3308
3309        // Phase 20: Tax Generation
3310        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3311
3312        // Phase 20 JEs: Merge tax posting journal entries into main GL
3313        if !tax.tax_posting_journal_entries.is_empty() {
3314            debug!(
3315                "Merging {} tax posting JEs into GL",
3316                tax.tax_posting_journal_entries.len()
3317            );
3318            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3319        }
3320
3321        // Phase 20b: FINAL fraud behavioral bias sweep.
3322        //
3323        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3324        // period close) extend `entries` with new journal entries that may
3325        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3326        // already-fraudulent transactions). Those late additions miss the
3327        // Phase 8b sweep and ship without bias applied — which is exactly
3328        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3329        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3330        //
3331        // Running the sweep one more time here guarantees every is_fraud
3332        // entry — regardless of which phase added it — has bias applied.
3333        // `!is_anomaly` gates out anomaly-injector entries (which already
3334        // got biased inline); the sweep is otherwise idempotent-ish:
3335        // weekend / off_hours re-fire to another valid weekend / off-hour,
3336        // post_close is guarded by `!is_post_close`, and round-dollar
3337        // rescaling on an already-round amount is a no-op (ratio = 1).
3338        {
3339            use datasynth_core::fraud_bias::{
3340                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3341            };
3342            use rand_chacha::rand_core::SeedableRng;
3343            let cfg = FraudBehavioralBiasConfig::default();
3344            if cfg.enabled {
3345                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3346                let mut swept = 0usize;
3347                for entry in entries.iter_mut() {
3348                    if entry.header.is_fraud && !entry.header.is_anomaly {
3349                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3350                        swept += 1;
3351                    }
3352                }
3353                if swept > 0 {
3354                    info!(
3355                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3356                         non-anomaly fraud entries (covers late-added JEs from \
3357                         ECL / provisions / treasury / tax / period-close)"
3358                    );
3359                }
3360            }
3361        }
3362
3363        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3364        // Build supplementary cash flow items from upstream JE data (depreciation,
3365        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3366        {
3367            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3368
3369            let framework_str = {
3370                use datasynth_config::schema::AccountingFrameworkConfig;
3371                match self
3372                    .config
3373                    .accounting_standards
3374                    .framework
3375                    .unwrap_or_default()
3376                {
3377                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3378                        "IFRS"
3379                    }
3380                    _ => "US_GAAP",
3381                }
3382            };
3383
3384            // Sum depreciation debits (account 6000) from close JEs
3385            let depreciation_total: rust_decimal::Decimal = entries
3386                .iter()
3387                .filter(|je| je.header.document_type == "CL")
3388                .flat_map(|je| je.lines.iter())
3389                .filter(|l| l.gl_account.starts_with("6000"))
3390                .map(|l| l.debit_amount)
3391                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3392
3393            // Sum interest expense debits (account 7100)
3394            let interest_paid: rust_decimal::Decimal = entries
3395                .iter()
3396                .flat_map(|je| je.lines.iter())
3397                .filter(|l| l.gl_account.starts_with("7100"))
3398                .map(|l| l.debit_amount)
3399                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3400
3401            // Sum tax expense debits (account 8000)
3402            let tax_paid: rust_decimal::Decimal = entries
3403                .iter()
3404                .flat_map(|je| je.lines.iter())
3405                .filter(|l| l.gl_account.starts_with("8000"))
3406                .map(|l| l.debit_amount)
3407                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3408
3409            // Sum capex debits on fixed assets (account 1500)
3410            let capex: rust_decimal::Decimal = entries
3411                .iter()
3412                .flat_map(|je| je.lines.iter())
3413                .filter(|l| l.gl_account.starts_with("1500"))
3414                .map(|l| l.debit_amount)
3415                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3416
3417            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3418            let dividends_paid: rust_decimal::Decimal = entries
3419                .iter()
3420                .flat_map(|je| je.lines.iter())
3421                .filter(|l| l.gl_account == "2170")
3422                .map(|l| l.debit_amount)
3423                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3424
3425            let cf_data = CashFlowSourceData {
3426                depreciation_total,
3427                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3428                delta_ar: rust_decimal::Decimal::ZERO,
3429                delta_ap: rust_decimal::Decimal::ZERO,
3430                delta_inventory: rust_decimal::Decimal::ZERO,
3431                capex,
3432                debt_issuance: rust_decimal::Decimal::ZERO,
3433                debt_repayment: rust_decimal::Decimal::ZERO,
3434                interest_paid,
3435                tax_paid,
3436                dividends_paid,
3437                framework: framework_str.to_string(),
3438            };
3439
3440            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3441            if !enhanced_cf_items.is_empty() {
3442                // Merge into ALL cash flow statements (standalone + consolidated)
3443                use datasynth_core::models::StatementType;
3444                let merge_count = enhanced_cf_items.len();
3445                for stmt in financial_reporting
3446                    .financial_statements
3447                    .iter_mut()
3448                    .chain(financial_reporting.consolidated_statements.iter_mut())
3449                    .chain(
3450                        financial_reporting
3451                            .standalone_statements
3452                            .values_mut()
3453                            .flat_map(|v| v.iter_mut()),
3454                    )
3455                {
3456                    if stmt.statement_type == StatementType::CashFlowStatement {
3457                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3458                    }
3459                }
3460                info!(
3461                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3462                    merge_count
3463                );
3464            }
3465        }
3466
3467        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3468        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3469        self.generate_notes_to_financial_statements(
3470            &mut financial_reporting,
3471            &accounting_standards,
3472            &tax,
3473            &hr,
3474            &audit,
3475            &treasury,
3476        );
3477
3478        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3479        // When we have 2+ companies, derive segment data from actual journal entries
3480        // to complement or replace the FS-generator-based segments.
3481        if self.config.companies.len() >= 2 && !entries.is_empty() {
3482            let companies: Vec<(String, String)> = self
3483                .config
3484                .companies
3485                .iter()
3486                .map(|c| (c.code.clone(), c.name.clone()))
3487                .collect();
3488            let ic_elim: rust_decimal::Decimal =
3489                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3490            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3491                .unwrap_or(NaiveDate::MIN);
3492            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3493            let period_label = format!(
3494                "{}-{:02}",
3495                end_date.year(),
3496                (end_date - chrono::Days::new(1)).month()
3497            );
3498
3499            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3500            let (je_segments, je_recon) =
3501                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3502            if !je_segments.is_empty() {
3503                info!(
3504                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3505                    je_segments.len(),
3506                    ic_elim,
3507                );
3508                // Replace if existing segment_reports were empty; otherwise supplement
3509                if financial_reporting.segment_reports.is_empty() {
3510                    financial_reporting.segment_reports = je_segments;
3511                    financial_reporting.segment_reconciliations = vec![je_recon];
3512                } else {
3513                    financial_reporting.segment_reports.extend(je_segments);
3514                    financial_reporting.segment_reconciliations.push(je_recon);
3515                }
3516            }
3517        }
3518
3519        // Phase 21: ESG Data Generation
3520        let esg_snap =
3521            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3522
3523        // Phase 23: Project Accounting Data Generation
3524        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3525
3526        // Phase 24: Process Evolution + Organizational Events
3527        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3528
3529        // Phase 24b: Disruption Events
3530        let disruption_events = self.phase_disruption_events(&mut stats)?;
3531
3532        // Phase 27: Bi-Temporal Vendor Version Chains
3533        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3534
3535        // Phase 28: Entity Relationship Graph + Cross-Process Links
3536        let (entity_relationship_graph, cross_process_links) =
3537            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3538
3539        // Phase 29: Industry-specific GL accounts
3540        let industry_output = self.phase_industry_data(&mut stats);
3541
3542        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3543        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3544
3545        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3546        //
3547        // The neural / hybrid diffusion path was a documented L2 stub
3548        // in v3.x; actual neural-network training requires ML
3549        // infrastructure (PyTorch / candle bindings, GPU access,
3550        // training loops) that was never wired through the
3551        // orchestrator. Rather than keep a silently-no-op block that
3552        // misleads users into thinking neural training happens, v4.0
3553        // acknowledges the config — exposing stats so downstream
3554        // tooling can see the request — but emits a clear warning
3555        // when a non-statistical backend is requested. The statistical
3556        // diffusion backend continues to run via
3557        // `phase_diffusion_enhancement`.
3558        //
3559        // Users who need real neural diffusion: track the roadmap item
3560        // in the v4.x backlog and consider contributing the backend
3561        // (the `DiffusionBackend` trait is the integration point).
3562        if self.config.diffusion.enabled
3563            && (self.config.diffusion.backend == "neural"
3564                || self.config.diffusion.backend == "hybrid")
3565        {
3566            let neural = &self.config.diffusion.neural;
3567            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3568            stats.neural_hybrid_weight = Some(weight);
3569            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3570            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3571            warn!(
3572                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3573                 the neural/hybrid training path is not yet shipped. Config \
3574                 is captured in stats (weight={weight:.2}, strategy={}, \
3575                 columns={}) but no neural training runs. Statistical \
3576                 diffusion (backend='statistical') continues to work.",
3577                self.config.diffusion.backend,
3578                neural.hybrid_strategy,
3579                neural.neural_columns.len(),
3580            );
3581        }
3582
3583        // Phase 19b: Hypergraph Export (after all data is available)
3584        self.phase_hypergraph_export(
3585            &coa,
3586            &entries,
3587            &document_flows,
3588            &sourcing,
3589            &hr,
3590            &manufacturing_snap,
3591            &banking,
3592            &audit,
3593            &financial_reporting,
3594            &ocpm,
3595            &compliance_regulations,
3596            &mut stats,
3597        )?;
3598
3599        // Phase 10c: Additional graph builders (approval, entity, banking)
3600        // These run after all data is available since they need banking/IC data.
3601        if self.phase_config.generate_graph_export {
3602            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3603        }
3604
3605        // Log informational messages for config sections not yet fully wired
3606        if self.config.streaming.enabled {
3607            info!("Note: streaming config is enabled but batch mode does not use it");
3608        }
3609        if self.config.vendor_network.enabled {
3610            debug!("Vendor network config available; relationship graph generation is partial");
3611        }
3612        if self.config.customer_segmentation.enabled {
3613            debug!("Customer segmentation config available; segment-aware generation is partial");
3614        }
3615
3616        // Log final resource statistics
3617        let resource_stats = self.resource_guard.stats();
3618        info!(
3619            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3620            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3621            resource_stats.disk.estimated_bytes_written,
3622            resource_stats.degradation_level
3623        );
3624
3625        // Flush any remaining stream sink data
3626        if let Some(ref sink) = self.phase_sink {
3627            if let Err(e) = sink.flush() {
3628                warn!("Stream sink flush failed: {e}");
3629            }
3630        }
3631
3632        // Build data lineage graph
3633        let lineage = self.build_lineage_graph();
3634
3635        // Evaluate quality gates if enabled in config
3636        let gate_result = if self.config.quality_gates.enabled {
3637            let profile_name = &self.config.quality_gates.profile;
3638            match datasynth_eval::gates::get_profile(profile_name) {
3639                Some(profile) => {
3640                    // Build an evaluation populated with actual generation metrics.
3641                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3642
3643                    // Populate balance sheet evaluation from balance validation results
3644                    if balance_validation.validated {
3645                        eval.coherence.balance =
3646                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3647                                equation_balanced: balance_validation.is_balanced,
3648                                max_imbalance: (balance_validation.total_debits
3649                                    - balance_validation.total_credits)
3650                                    .abs(),
3651                                periods_evaluated: 1,
3652                                periods_imbalanced: if balance_validation.is_balanced {
3653                                    0
3654                                } else {
3655                                    1
3656                                },
3657                                period_results: Vec::new(),
3658                                companies_evaluated: self.config.companies.len(),
3659                            });
3660                    }
3661
3662                    // Set coherence passes based on balance validation
3663                    eval.coherence.passes = balance_validation.is_balanced;
3664                    if !balance_validation.is_balanced {
3665                        eval.coherence
3666                            .failures
3667                            .push("Balance sheet equation not satisfied".to_string());
3668                    }
3669
3670                    // Set statistical score based on entry count (basic sanity)
3671                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3672                    eval.statistical.passes = !entries.is_empty();
3673
3674                    // Set quality score from data quality stats
3675                    eval.quality.overall_score = 0.9; // Default high for generated data
3676                    eval.quality.passes = true;
3677
3678                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3679                    info!(
3680                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3681                        profile_name, result.gates_passed, result.gates_total, result.summary
3682                    );
3683                    Some(result)
3684                }
3685                None => {
3686                    warn!(
3687                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3688                        profile_name
3689                    );
3690                    None
3691                }
3692            }
3693        } else {
3694            None
3695        };
3696
3697        // Generate internal controls if enabled
3698        let internal_controls = if self.config.internal_controls.enabled {
3699            InternalControl::standard_controls()
3700        } else {
3701            Vec::new()
3702        };
3703
3704        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3705        // phases (including fraud-bias sweep at Phase 20b) so derived
3706        // outputs reflect final data.
3707        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3708
3709        // v3.5.1: statistical validation over the final amount
3710        // distribution. Runs *after* all JE-adding phases so the report
3711        // reflects everything the user will see in the output. Returns
3712        // `None` unless `distributions.validation.enabled = true`.
3713        let statistical_validation = self.phase_statistical_validation(&entries)?;
3714
3715        // v4.1.3+: interconnectivity snapshot — tier assignments,
3716        // value-segment labels, industry-specific metadata. Runs after
3717        // master data is settled so it can index stable IDs.
3718        let interconnectivity = self.phase_interconnectivity();
3719
3720        Ok(EnhancedGenerationResult {
3721            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3722            master_data: std::mem::take(&mut self.master_data),
3723            document_flows,
3724            subledger,
3725            ocpm,
3726            audit,
3727            banking,
3728            graph_export,
3729            sourcing,
3730            financial_reporting,
3731            hr,
3732            accounting_standards,
3733            manufacturing: manufacturing_snap,
3734            sales_kpi_budgets,
3735            tax,
3736            esg: esg_snap,
3737            treasury,
3738            project_accounting,
3739            process_evolution,
3740            organizational_events,
3741            disruption_events,
3742            intercompany,
3743            journal_entries: entries,
3744            anomaly_labels,
3745            balance_validation,
3746            data_quality_stats,
3747            quality_issues,
3748            statistics: stats,
3749            lineage: Some(lineage),
3750            gate_result,
3751            internal_controls,
3752            sod_violations,
3753            opening_balances,
3754            subledger_reconciliation,
3755            counterfactual_pairs,
3756            red_flags,
3757            collusion_rings,
3758            temporal_vendor_chains,
3759            entity_relationship_graph,
3760            cross_process_links,
3761            industry_output,
3762            compliance_regulations,
3763            analytics_metadata,
3764            statistical_validation,
3765            interconnectivity,
3766        })
3767    }
3768
3769    /// v4.1.3+: populate the interconnectivity snapshot from
3770    /// previously-inert schema sections. Empty when all sections are
3771    /// disabled.
3772    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3773        use rand::{RngExt, SeedableRng};
3774        use rand_chacha::ChaCha8Rng;
3775
3776        let mut snap = InterconnectivitySnapshot::default();
3777        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3778
3779        // --- Vendor network ---
3780        let vn = &self.config.vendor_network;
3781        if vn.enabled {
3782            let total = self.master_data.vendors.len();
3783            if total > 0 {
3784                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3785                let remaining_after_t1 = total.saturating_sub(tier1_count);
3786                let depth = vn.depth.clamp(1, 3);
3787                let tier2_count = if depth >= 2 {
3788                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3789                    (tier1_count * avg).min(remaining_after_t1)
3790                } else {
3791                    0
3792                };
3793                let tier3_count = total
3794                    .saturating_sub(tier1_count)
3795                    .saturating_sub(tier2_count);
3796
3797                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3798                    let tier = if idx < tier1_count {
3799                        1
3800                    } else if idx < tier1_count + tier2_count {
3801                        2
3802                    } else {
3803                        3
3804                    };
3805                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3806
3807                    // Cluster assignment via configured ratios.
3808                    let cl = &vn.clusters;
3809                    let roll: f64 = rng.random();
3810                    let cluster = if roll < cl.reliable_strategic {
3811                        "reliable_strategic"
3812                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3813                        "standard_operational"
3814                    } else if roll
3815                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3816                    {
3817                        "transactional"
3818                    } else {
3819                        "problematic"
3820                    };
3821                    snap.vendor_clusters
3822                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3823                }
3824                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3825            }
3826        }
3827
3828        // --- Customer segmentation ---
3829        let cs = &self.config.customer_segmentation;
3830        if cs.enabled {
3831            let seg = &cs.value_segments;
3832            for customer in &self.master_data.customers {
3833                let roll: f64 = rng.random();
3834                let value_segment = if roll < seg.enterprise.customer_share {
3835                    "enterprise"
3836                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3837                    "mid_market"
3838                } else if roll
3839                    < seg.enterprise.customer_share
3840                        + seg.mid_market.customer_share
3841                        + seg.smb.customer_share
3842                {
3843                    "smb"
3844                } else {
3845                    "consumer"
3846                };
3847                snap.customer_value_segments
3848                    .push((customer.customer_id.clone(), value_segment.to_string()));
3849
3850                let roll2: f64 = rng.random();
3851                let life = &cs.lifecycle;
3852                let lifecycle = if roll2 < life.prospect_rate {
3853                    "prospect"
3854                } else if roll2 < life.prospect_rate + life.new_rate {
3855                    "new"
3856                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3857                    "growth"
3858                } else if roll2
3859                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3860                {
3861                    "mature"
3862                } else if roll2
3863                    < life.prospect_rate
3864                        + life.new_rate
3865                        + life.growth_rate
3866                        + life.mature_rate
3867                        + life.at_risk_rate
3868                {
3869                    "at_risk"
3870                } else if roll2
3871                    < life.prospect_rate
3872                        + life.new_rate
3873                        + life.growth_rate
3874                        + life.mature_rate
3875                        + life.at_risk_rate
3876                        + life.churned_rate
3877                {
3878                    "churned"
3879                } else {
3880                    "won_back"
3881                };
3882                snap.customer_lifecycle_stages
3883                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3884            }
3885        }
3886
3887        // --- Industry-specific metadata (minimal) ---
3888        let is = &self.config.industry_specific;
3889        if is.enabled {
3890            snap.industry_metadata.push(format!(
3891                "industry_specific.enabled=true (industry={:?})",
3892                self.config.global.industry
3893            ));
3894        }
3895
3896        snap
3897    }
3898
3899    // ========================================================================
3900    // Generation Phase Methods
3901    // ========================================================================
3902
3903    /// Phase 1: Generate Chart of Accounts and update statistics.
3904    fn phase_chart_of_accounts(
3905        &mut self,
3906        stats: &mut EnhancedGenerationStatistics,
3907    ) -> SynthResult<Arc<ChartOfAccounts>> {
3908        info!("Phase 1: Generating Chart of Accounts");
3909        let coa = self.generate_coa()?;
3910        stats.accounts_count = coa.account_count();
3911        info!(
3912            "Chart of Accounts generated: {} accounts",
3913            stats.accounts_count
3914        );
3915        self.check_resources_with_log("post-coa")?;
3916        Ok(coa)
3917    }
3918
3919    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3920    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3921        if self.phase_config.generate_master_data {
3922            info!("Phase 2: Generating Master Data");
3923            self.generate_master_data()?;
3924            stats.vendor_count = self.master_data.vendors.len();
3925            stats.customer_count = self.master_data.customers.len();
3926            stats.material_count = self.master_data.materials.len();
3927            stats.asset_count = self.master_data.assets.len();
3928            stats.employee_count = self.master_data.employees.len();
3929            info!(
3930                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3931                stats.vendor_count, stats.customer_count, stats.material_count,
3932                stats.asset_count, stats.employee_count
3933            );
3934            self.check_resources_with_log("post-master-data")?;
3935        } else {
3936            debug!("Phase 2: Skipped (master data generation disabled)");
3937        }
3938        Ok(())
3939    }
3940
3941    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3942    fn phase_document_flows(
3943        &mut self,
3944        stats: &mut EnhancedGenerationStatistics,
3945    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3946        let mut document_flows = DocumentFlowSnapshot::default();
3947        let mut subledger = SubledgerSnapshot::default();
3948        // Dunning JEs (interest + charges) accumulated here and merged into the
3949        // main FA-JE list below so they appear in the GL.
3950        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3951
3952        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3953            info!("Phase 3: Generating Document Flows");
3954            self.generate_document_flows(&mut document_flows)?;
3955            stats.p2p_chain_count = document_flows.p2p_chains.len();
3956            stats.o2c_chain_count = document_flows.o2c_chains.len();
3957            info!(
3958                "Document flows generated: {} P2P chains, {} O2C chains",
3959                stats.p2p_chain_count, stats.o2c_chain_count
3960            );
3961
3962            // Phase 3b: Link document flows to subledgers (for data coherence)
3963            debug!("Phase 3b: Linking document flows to subledgers");
3964            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3965            stats.ap_invoice_count = subledger.ap_invoices.len();
3966            stats.ar_invoice_count = subledger.ar_invoices.len();
3967            debug!(
3968                "Subledgers linked: {} AP invoices, {} AR invoices",
3969                stats.ap_invoice_count, stats.ar_invoice_count
3970            );
3971
3972            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3973            // Without this step the subledger is systematically overstated because
3974            // amount_remaining is set at invoice creation and never reduced by
3975            // the payments that were generated in the document-flow phase.
3976            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3977            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3978            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3979            debug!("Payment settlements applied to AP and AR subledgers");
3980
3981            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3982            // The as-of date is the last day of the configured period.
3983            if let Ok(start_date) =
3984                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3985            {
3986                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3987                    - chrono::Days::new(1);
3988                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3989                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3990                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3991                // derived from JE-level aggregation and will typically differ. This is a known
3992                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3993                // generated independently. A future reconciliation phase should align them by
3994                // using subledger totals as the authoritative source for BS Receivables.
3995                for company in &self.config.companies {
3996                    let ar_report = ARAgingReport::from_invoices(
3997                        company.code.clone(),
3998                        &subledger.ar_invoices,
3999                        as_of_date,
4000                    );
4001                    subledger.ar_aging_reports.push(ar_report);
4002
4003                    let ap_report = APAgingReport::from_invoices(
4004                        company.code.clone(),
4005                        &subledger.ap_invoices,
4006                        as_of_date,
4007                    );
4008                    subledger.ap_aging_reports.push(ap_report);
4009                }
4010                debug!(
4011                    "AR/AP aging reports built: {} AR, {} AP",
4012                    subledger.ar_aging_reports.len(),
4013                    subledger.ap_aging_reports.len()
4014                );
4015
4016                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4017                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4018                {
4019                    use datasynth_generators::DunningGenerator;
4020                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4021                    for company in &self.config.companies {
4022                        let currency = company.currency.as_str();
4023                        // Collect mutable references to AR invoices for this company
4024                        // (dunning generator updates dunning_info on invoices in-place).
4025                        let mut company_invoices: Vec<
4026                            datasynth_core::models::subledger::ar::ARInvoice,
4027                        > = subledger
4028                            .ar_invoices
4029                            .iter()
4030                            .filter(|inv| inv.company_code == company.code)
4031                            .cloned()
4032                            .collect();
4033
4034                        if company_invoices.is_empty() {
4035                            continue;
4036                        }
4037
4038                        let result = dunning_gen.execute_dunning_run(
4039                            &company.code,
4040                            as_of_date,
4041                            &mut company_invoices,
4042                            currency,
4043                        );
4044
4045                        // Write back updated dunning info to the main AR invoice list
4046                        for updated in &company_invoices {
4047                            if let Some(orig) = subledger
4048                                .ar_invoices
4049                                .iter_mut()
4050                                .find(|i| i.invoice_number == updated.invoice_number)
4051                            {
4052                                orig.dunning_info = updated.dunning_info.clone();
4053                            }
4054                        }
4055
4056                        subledger.dunning_runs.push(result.dunning_run);
4057                        subledger.dunning_letters.extend(result.letters);
4058                        // Dunning JEs (interest + charges) collected into local buffer.
4059                        dunning_journal_entries.extend(result.journal_entries);
4060                    }
4061                    debug!(
4062                        "Dunning runs complete: {} runs, {} letters",
4063                        subledger.dunning_runs.len(),
4064                        subledger.dunning_letters.len()
4065                    );
4066                }
4067            }
4068
4069            self.check_resources_with_log("post-document-flows")?;
4070        } else {
4071            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4072        }
4073
4074        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4075        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4076        if !self.master_data.assets.is_empty() {
4077            debug!("Generating FA subledger records");
4078            let company_code = self
4079                .config
4080                .companies
4081                .first()
4082                .map(|c| c.code.as_str())
4083                .unwrap_or("1000");
4084            let currency = self
4085                .config
4086                .companies
4087                .first()
4088                .map(|c| c.currency.as_str())
4089                .unwrap_or("USD");
4090
4091            let mut fa_gen = datasynth_generators::FAGenerator::new(
4092                datasynth_generators::FAGeneratorConfig::default(),
4093                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4094            );
4095
4096            for asset in &self.master_data.assets {
4097                let (record, je) = fa_gen.generate_asset_acquisition(
4098                    company_code,
4099                    &format!("{:?}", asset.asset_class),
4100                    &asset.description,
4101                    asset.acquisition_date,
4102                    currency,
4103                    asset.cost_center.as_deref(),
4104                );
4105                subledger.fa_records.push(record);
4106                fa_journal_entries.push(je);
4107            }
4108
4109            stats.fa_subledger_count = subledger.fa_records.len();
4110            debug!(
4111                "FA subledger records generated: {} (with {} acquisition JEs)",
4112                stats.fa_subledger_count,
4113                fa_journal_entries.len()
4114            );
4115        }
4116
4117        // Generate Inventory subledger records from master data materials
4118        if !self.master_data.materials.is_empty() {
4119            debug!("Generating Inventory subledger records");
4120            let first_company = self.config.companies.first();
4121            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4122            let inv_currency = first_company
4123                .map(|c| c.currency.clone())
4124                .unwrap_or_else(|| "USD".to_string());
4125
4126            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4127                datasynth_generators::InventoryGeneratorConfig::default(),
4128                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4129                inv_currency.clone(),
4130            );
4131
4132            for (i, material) in self.master_data.materials.iter().enumerate() {
4133                let plant = format!("PLANT{:02}", (i % 3) + 1);
4134                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4135                let initial_qty = rust_decimal::Decimal::from(
4136                    material
4137                        .safety_stock
4138                        .to_string()
4139                        .parse::<i64>()
4140                        .unwrap_or(100),
4141                );
4142
4143                let position = inv_gen.generate_position(
4144                    company_code,
4145                    &plant,
4146                    &storage_loc,
4147                    &material.material_id,
4148                    &material.description,
4149                    initial_qty,
4150                    Some(material.standard_cost),
4151                    &inv_currency,
4152                );
4153                subledger.inventory_positions.push(position);
4154            }
4155
4156            stats.inventory_subledger_count = subledger.inventory_positions.len();
4157            debug!(
4158                "Inventory subledger records generated: {}",
4159                stats.inventory_subledger_count
4160            );
4161        }
4162
4163        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4164        if !subledger.fa_records.is_empty() {
4165            if let Ok(start_date) =
4166                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4167            {
4168                let company_code = self
4169                    .config
4170                    .companies
4171                    .first()
4172                    .map(|c| c.code.as_str())
4173                    .unwrap_or("1000");
4174                let fiscal_year = start_date.year();
4175                let start_period = start_date.month();
4176                let end_period =
4177                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4178
4179                let depr_cfg = FaDepreciationScheduleConfig {
4180                    fiscal_year,
4181                    start_period,
4182                    end_period,
4183                    seed_offset: 800,
4184                };
4185                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4186                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4187                let run_count = runs.len();
4188                subledger.depreciation_runs = runs;
4189                debug!(
4190                    "Depreciation runs generated: {} runs for {} periods",
4191                    run_count, self.config.global.period_months
4192                );
4193            }
4194        }
4195
4196        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4197        if !subledger.inventory_positions.is_empty() {
4198            if let Ok(start_date) =
4199                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4200            {
4201                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4202                    - chrono::Days::new(1);
4203
4204                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4205                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4206
4207                for company in &self.config.companies {
4208                    let result = inv_val_gen.generate(
4209                        &company.code,
4210                        &subledger.inventory_positions,
4211                        as_of_date,
4212                    );
4213                    subledger.inventory_valuations.push(result);
4214                }
4215                debug!(
4216                    "Inventory valuations generated: {} company reports",
4217                    subledger.inventory_valuations.len()
4218                );
4219            }
4220        }
4221
4222        Ok((document_flows, subledger, fa_journal_entries))
4223    }
4224
4225    /// Phase 3c: Generate OCPM events from document flows.
4226    #[allow(clippy::too_many_arguments)]
4227    fn phase_ocpm_events(
4228        &mut self,
4229        document_flows: &DocumentFlowSnapshot,
4230        sourcing: &SourcingSnapshot,
4231        hr: &HrSnapshot,
4232        manufacturing: &ManufacturingSnapshot,
4233        banking: &BankingSnapshot,
4234        audit: &AuditSnapshot,
4235        financial_reporting: &FinancialReportingSnapshot,
4236        stats: &mut EnhancedGenerationStatistics,
4237    ) -> SynthResult<OcpmSnapshot> {
4238        let degradation = self.check_resources()?;
4239        if degradation >= DegradationLevel::Reduced {
4240            debug!(
4241                "Phase skipped due to resource pressure (degradation: {:?})",
4242                degradation
4243            );
4244            return Ok(OcpmSnapshot::default());
4245        }
4246        if self.phase_config.generate_ocpm_events {
4247            info!("Phase 3c: Generating OCPM Events");
4248            let ocpm_snapshot = self.generate_ocpm_events(
4249                document_flows,
4250                sourcing,
4251                hr,
4252                manufacturing,
4253                banking,
4254                audit,
4255                financial_reporting,
4256            )?;
4257            stats.ocpm_event_count = ocpm_snapshot.event_count;
4258            stats.ocpm_object_count = ocpm_snapshot.object_count;
4259            stats.ocpm_case_count = ocpm_snapshot.case_count;
4260            info!(
4261                "OCPM events generated: {} events, {} objects, {} cases",
4262                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4263            );
4264            self.check_resources_with_log("post-ocpm")?;
4265            Ok(ocpm_snapshot)
4266        } else {
4267            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4268            Ok(OcpmSnapshot::default())
4269        }
4270    }
4271
4272    /// Phase 4: Generate journal entries from document flows and standalone generation.
4273    fn phase_journal_entries(
4274        &mut self,
4275        coa: &Arc<ChartOfAccounts>,
4276        document_flows: &DocumentFlowSnapshot,
4277        _stats: &mut EnhancedGenerationStatistics,
4278    ) -> SynthResult<Vec<JournalEntry>> {
4279        let mut entries = Vec::new();
4280
4281        // Phase 4a: Generate JEs from document flows (for data coherence)
4282        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4283            debug!("Phase 4a: Generating JEs from document flows");
4284            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4285            debug!("Generated {} JEs from document flows", flow_entries.len());
4286            entries.extend(flow_entries);
4287        }
4288
4289        // Phase 4b: Generate standalone journal entries
4290        if self.phase_config.generate_journal_entries {
4291            info!("Phase 4: Generating Journal Entries");
4292            let je_entries = self.generate_journal_entries(coa)?;
4293            info!("Generated {} standalone journal entries", je_entries.len());
4294            entries.extend(je_entries);
4295        } else {
4296            debug!("Phase 4: Skipped (journal entry generation disabled)");
4297        }
4298
4299        // Phase 4c (shard mode): inject pre-built IC journal entries from
4300        // `ShardContext`. When running standalone (no group engine), this
4301        // is a no-op. See crate::shard_context::ShardContext for rationale.
4302        if let Some(ctx) = &self.shard_context {
4303            if !ctx.extra_journal_entries.is_empty() {
4304                debug!(
4305                    "Phase 4c: appending {} shard-mode IC journal entries",
4306                    ctx.extra_journal_entries.len()
4307                );
4308                entries.extend(ctx.extra_journal_entries.iter().cloned());
4309            }
4310        }
4311
4312        if !entries.is_empty() {
4313            // Note: stats.total_entries/total_line_items are set in generate()
4314            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4315            self.check_resources_with_log("post-journal-entries")?;
4316        }
4317
4318        Ok(entries)
4319    }
4320
4321    /// Phase 5: Inject anomalies into journal entries.
4322    fn phase_anomaly_injection(
4323        &mut self,
4324        entries: &mut [JournalEntry],
4325        actions: &DegradationActions,
4326        stats: &mut EnhancedGenerationStatistics,
4327    ) -> SynthResult<AnomalyLabels> {
4328        if self.phase_config.inject_anomalies
4329            && !entries.is_empty()
4330            && !actions.skip_anomaly_injection
4331        {
4332            info!("Phase 5: Injecting Anomalies");
4333            let result = self.inject_anomalies(entries)?;
4334            stats.anomalies_injected = result.labels.len();
4335            info!("Injected {} anomalies", stats.anomalies_injected);
4336            self.check_resources_with_log("post-anomaly-injection")?;
4337            Ok(result)
4338        } else if actions.skip_anomaly_injection {
4339            warn!("Phase 5: Skipped due to resource degradation");
4340            Ok(AnomalyLabels::default())
4341        } else {
4342            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4343            Ok(AnomalyLabels::default())
4344        }
4345    }
4346
4347    /// Phase 6: Validate balance sheet equation on journal entries.
4348    fn phase_balance_validation(
4349        &mut self,
4350        entries: &[JournalEntry],
4351    ) -> SynthResult<BalanceValidationResult> {
4352        if self.phase_config.validate_balances && !entries.is_empty() {
4353            debug!("Phase 6: Validating Balances");
4354            let balance_validation = self.validate_journal_entries(entries)?;
4355            if balance_validation.is_balanced {
4356                debug!("Balance validation passed");
4357            } else {
4358                warn!(
4359                    "Balance validation found {} errors",
4360                    balance_validation.validation_errors.len()
4361                );
4362            }
4363            Ok(balance_validation)
4364        } else {
4365            Ok(BalanceValidationResult::default())
4366        }
4367    }
4368
4369    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4370    fn phase_data_quality_injection(
4371        &mut self,
4372        entries: &mut [JournalEntry],
4373        actions: &DegradationActions,
4374        stats: &mut EnhancedGenerationStatistics,
4375    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4376        if self.phase_config.inject_data_quality
4377            && !entries.is_empty()
4378            && !actions.skip_data_quality
4379        {
4380            info!("Phase 7: Injecting Data Quality Variations");
4381            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4382            stats.data_quality_issues = dq_stats.records_with_issues;
4383            info!("Injected {} data quality issues", stats.data_quality_issues);
4384            self.check_resources_with_log("post-data-quality")?;
4385            Ok((dq_stats, quality_issues))
4386        } else if actions.skip_data_quality {
4387            warn!("Phase 7: Skipped due to resource degradation");
4388            // v4.4.1: report the denominator (entries seen) even when
4389            // injection is skipped, so downstream consumers can tell
4390            // "skipped, 0/N" apart from "ran but found nothing".
4391            Ok((stats_with_denominator(entries.len()), Vec::new()))
4392        } else {
4393            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4394            Ok((stats_with_denominator(entries.len()), Vec::new()))
4395        }
4396    }
4397
4398    /// Phase 10b: Generate period-close journal entries.
4399    ///
4400    /// Generates:
4401    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4402    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4403    ///    for the configured period.
4404    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4405    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4406    ///    earnings via the Income Summary (3600) clearing account.
4407    fn phase_period_close(
4408        &mut self,
4409        entries: &mut Vec<JournalEntry>,
4410        subledger: &SubledgerSnapshot,
4411        stats: &mut EnhancedGenerationStatistics,
4412    ) -> SynthResult<()> {
4413        if !self.phase_config.generate_period_close || entries.is_empty() {
4414            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4415            return Ok(());
4416        }
4417
4418        info!("Phase 10b: Generating period-close journal entries");
4419
4420        use datasynth_core::accounts::{
4421            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4422        };
4423        use rust_decimal::Decimal;
4424
4425        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4426            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4427        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4428        // Posting date for close entries is the last day of the period
4429        let close_date = end_date - chrono::Days::new(1);
4430
4431        // Statutory tax rate (21% — configurable rates come in later tiers)
4432        let tax_rate = Decimal::new(21, 2); // 0.21
4433
4434        // Collect company codes from config
4435        let company_codes: Vec<String> = self
4436            .config
4437            .companies
4438            .iter()
4439            .map(|c| c.code.clone())
4440            .collect();
4441
4442        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4443        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4444        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4445
4446        // --- Depreciation JEs (per asset) ---
4447        // Compute period depreciation for each active fixed asset using straight-line method.
4448        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4449        let period_months = self.config.global.period_months;
4450        for asset in &subledger.fa_records {
4451            // Skip assets that are inactive / fully depreciated / non-depreciable
4452            use datasynth_core::models::subledger::fa::AssetStatus;
4453            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4454                continue;
4455            }
4456            let useful_life_months = asset.useful_life_months();
4457            if useful_life_months == 0 {
4458                // Land or CIP — not depreciated
4459                continue;
4460            }
4461            let salvage_value = asset.salvage_value();
4462            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4463            if depreciable_base == Decimal::ZERO {
4464                continue;
4465            }
4466            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4467                * Decimal::from(period_months))
4468            .round_dp(2);
4469            if period_depr <= Decimal::ZERO {
4470                continue;
4471            }
4472
4473            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4474            depr_header.document_type = "CL".to_string();
4475            depr_header.header_text = Some(format!(
4476                "Depreciation - {} {}",
4477                asset.asset_number, asset.description
4478            ));
4479            depr_header.created_by = "CLOSE_ENGINE".to_string();
4480            depr_header.source = TransactionSource::Automated;
4481            depr_header.business_process = Some(BusinessProcess::R2R);
4482
4483            let doc_id = depr_header.document_id;
4484            let mut depr_je = JournalEntry::new(depr_header);
4485
4486            // DR Depreciation Expense (6000)
4487            depr_je.add_line(JournalEntryLine::debit(
4488                doc_id,
4489                1,
4490                expense_accounts::DEPRECIATION.to_string(),
4491                period_depr,
4492            ));
4493            // CR Accumulated Depreciation (1510)
4494            depr_je.add_line(JournalEntryLine::credit(
4495                doc_id,
4496                2,
4497                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4498                period_depr,
4499            ));
4500
4501            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4502            close_jes.push(depr_je);
4503        }
4504
4505        if !subledger.fa_records.is_empty() {
4506            debug!(
4507                "Generated {} depreciation JEs from {} FA records",
4508                close_jes.len(),
4509                subledger.fa_records.len()
4510            );
4511        }
4512
4513        // --- Accrual entries (standard period-end accruals per company) ---
4514        // Generate standard accrued expense entries (utilities, rent, interest) using
4515        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4516        {
4517            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4518            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4519            // v3.4.3: snap reversal dates to business days. No-op when
4520            // temporal_patterns.business_days is disabled.
4521            if let Some(ctx) = &self.temporal_context {
4522                accrual_gen.set_temporal_context(Arc::clone(ctx));
4523            }
4524
4525            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4526            let accrual_items: &[(&str, &str, &str)] = &[
4527                ("Accrued Utilities", "6200", "2100"),
4528                ("Accrued Rent", "6300", "2100"),
4529                ("Accrued Interest", "6100", "2150"),
4530            ];
4531
4532            for company_code in &company_codes {
4533                // Estimate company revenue from existing JEs
4534                let company_revenue: Decimal = entries
4535                    .iter()
4536                    .filter(|e| e.header.company_code == *company_code)
4537                    .flat_map(|e| e.lines.iter())
4538                    .filter(|l| l.gl_account.starts_with('4'))
4539                    .map(|l| l.credit_amount - l.debit_amount)
4540                    .fold(Decimal::ZERO, |acc, v| acc + v);
4541
4542                if company_revenue <= Decimal::ZERO {
4543                    continue;
4544                }
4545
4546                // Use 0.5% of period revenue per accrual item as a proxy
4547                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4548                if accrual_base <= Decimal::ZERO {
4549                    continue;
4550                }
4551
4552                for (description, expense_acct, liability_acct) in accrual_items {
4553                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4554                        company_code,
4555                        description,
4556                        accrual_base,
4557                        expense_acct,
4558                        liability_acct,
4559                        close_date,
4560                        None,
4561                    );
4562                    close_jes.push(accrual_je);
4563                    if let Some(rev_je) = reversal_je {
4564                        close_jes.push(rev_je);
4565                    }
4566                }
4567            }
4568
4569            debug!(
4570                "Generated accrual entries for {} companies",
4571                company_codes.len()
4572            );
4573        }
4574
4575        for company_code in &company_codes {
4576            // Calculate net income for this company from existing JEs:
4577            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4578            // Revenue (4xxx): credit-normal, so net = credits - debits
4579            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4580            let mut total_revenue = Decimal::ZERO;
4581            let mut total_expenses = Decimal::ZERO;
4582
4583            for entry in entries.iter() {
4584                if entry.header.company_code != *company_code {
4585                    continue;
4586                }
4587                for line in &entry.lines {
4588                    let category = AccountCategory::from_account(&line.gl_account);
4589                    match category {
4590                        AccountCategory::Revenue => {
4591                            // Revenue is credit-normal: net revenue = credits - debits
4592                            total_revenue += line.credit_amount - line.debit_amount;
4593                        }
4594                        AccountCategory::Cogs
4595                        | AccountCategory::OperatingExpense
4596                        | AccountCategory::OtherIncomeExpense
4597                        | AccountCategory::Tax => {
4598                            // Expenses are debit-normal: net expense = debits - credits
4599                            total_expenses += line.debit_amount - line.credit_amount;
4600                        }
4601                        _ => {}
4602                    }
4603                }
4604            }
4605
4606            let pre_tax_income = total_revenue - total_expenses;
4607
4608            // Skip if no income statement activity
4609            if pre_tax_income == Decimal::ZERO {
4610                debug!(
4611                    "Company {}: no pre-tax income, skipping period close",
4612                    company_code
4613                );
4614                continue;
4615            }
4616
4617            // --- Tax provision / DTA JE ---
4618            if pre_tax_income > Decimal::ZERO {
4619                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4620                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4621
4622                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4623                tax_header.document_type = "CL".to_string();
4624                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4625                tax_header.created_by = "CLOSE_ENGINE".to_string();
4626                tax_header.source = TransactionSource::Automated;
4627                tax_header.business_process = Some(BusinessProcess::R2R);
4628
4629                let doc_id = tax_header.document_id;
4630                let mut tax_je = JournalEntry::new(tax_header);
4631
4632                // DR Tax Expense (8000)
4633                tax_je.add_line(JournalEntryLine::debit(
4634                    doc_id,
4635                    1,
4636                    tax_accounts::TAX_EXPENSE.to_string(),
4637                    tax_amount,
4638                ));
4639                // CR Income Tax Payable (2130)
4640                tax_je.add_line(JournalEntryLine::credit(
4641                    doc_id,
4642                    2,
4643                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4644                    tax_amount,
4645                ));
4646
4647                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4648                close_jes.push(tax_je);
4649            } else {
4650                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4651                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4652                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4653                if dta_amount > Decimal::ZERO {
4654                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4655                    dta_header.document_type = "CL".to_string();
4656                    dta_header.header_text =
4657                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4658                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4659                    dta_header.source = TransactionSource::Automated;
4660                    dta_header.business_process = Some(BusinessProcess::R2R);
4661
4662                    let doc_id = dta_header.document_id;
4663                    let mut dta_je = JournalEntry::new(dta_header);
4664
4665                    // DR Deferred Tax Asset (1600)
4666                    dta_je.add_line(JournalEntryLine::debit(
4667                        doc_id,
4668                        1,
4669                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4670                        dta_amount,
4671                    ));
4672                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4673                    // reflecting the benefit of the future deductible temporary difference.
4674                    dta_je.add_line(JournalEntryLine::credit(
4675                        doc_id,
4676                        2,
4677                        tax_accounts::TAX_EXPENSE.to_string(),
4678                        dta_amount,
4679                    ));
4680
4681                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4682                    close_jes.push(dta_je);
4683                    debug!(
4684                        "Company {}: loss year — recognised DTA of {}",
4685                        company_code, dta_amount
4686                    );
4687                }
4688            }
4689
4690            // --- Dividend JEs (v2.4) ---
4691            // If the entity is profitable after tax, declare a 10% dividend payout.
4692            // This runs AFTER tax provision so the dividend is based on post-tax income
4693            // but BEFORE the retained earnings close so the RE transfer reflects the
4694            // reduced balance.
4695            let tax_provision = if pre_tax_income > Decimal::ZERO {
4696                (pre_tax_income * tax_rate).round_dp(2)
4697            } else {
4698                Decimal::ZERO
4699            };
4700            let net_income = pre_tax_income - tax_provision;
4701
4702            if net_income > Decimal::ZERO {
4703                use datasynth_generators::DividendGenerator;
4704                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4705                let mut div_gen = DividendGenerator::new(self.seed + 460);
4706                let currency_str = self
4707                    .config
4708                    .companies
4709                    .iter()
4710                    .find(|c| c.code == *company_code)
4711                    .map(|c| c.currency.as_str())
4712                    .unwrap_or("USD");
4713                let div_result = div_gen.generate(
4714                    company_code,
4715                    close_date,
4716                    Decimal::new(1, 0), // $1 per share placeholder
4717                    dividend_amount,
4718                    currency_str,
4719                );
4720                let div_je_count = div_result.journal_entries.len();
4721                close_jes.extend(div_result.journal_entries);
4722                debug!(
4723                    "Company {}: declared dividend of {} ({} JEs)",
4724                    company_code, dividend_amount, div_je_count
4725                );
4726            }
4727
4728            // --- Income statement closing JE ---
4729            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4730            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4731            // close the pre-tax loss into Retained Earnings as-is.
4732            if net_income != Decimal::ZERO {
4733                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4734                close_header.document_type = "CL".to_string();
4735                close_header.header_text =
4736                    Some(format!("Income statement close - {}", company_code));
4737                close_header.created_by = "CLOSE_ENGINE".to_string();
4738                close_header.source = TransactionSource::Automated;
4739                close_header.business_process = Some(BusinessProcess::R2R);
4740
4741                let doc_id = close_header.document_id;
4742                let mut close_je = JournalEntry::new(close_header);
4743
4744                let abs_net_income = net_income.abs();
4745
4746                if net_income > Decimal::ZERO {
4747                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4748                    close_je.add_line(JournalEntryLine::debit(
4749                        doc_id,
4750                        1,
4751                        equity_accounts::INCOME_SUMMARY.to_string(),
4752                        abs_net_income,
4753                    ));
4754                    close_je.add_line(JournalEntryLine::credit(
4755                        doc_id,
4756                        2,
4757                        equity_accounts::RETAINED_EARNINGS.to_string(),
4758                        abs_net_income,
4759                    ));
4760                } else {
4761                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4762                    close_je.add_line(JournalEntryLine::debit(
4763                        doc_id,
4764                        1,
4765                        equity_accounts::RETAINED_EARNINGS.to_string(),
4766                        abs_net_income,
4767                    ));
4768                    close_je.add_line(JournalEntryLine::credit(
4769                        doc_id,
4770                        2,
4771                        equity_accounts::INCOME_SUMMARY.to_string(),
4772                        abs_net_income,
4773                    ));
4774                }
4775
4776                debug_assert!(
4777                    close_je.is_balanced(),
4778                    "Income statement closing JE must be balanced"
4779                );
4780                close_jes.push(close_je);
4781            }
4782        }
4783
4784        let close_count = close_jes.len();
4785        if close_count > 0 {
4786            info!("Generated {} period-close journal entries", close_count);
4787            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4788            entries.extend(close_jes);
4789            stats.period_close_je_count = close_count;
4790
4791            // Update total entry/line-item stats
4792            stats.total_entries = entries.len() as u64;
4793            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4794        } else {
4795            debug!("No period-close entries generated (no income statement activity)");
4796        }
4797
4798        Ok(())
4799    }
4800
4801    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4802    fn phase_audit_data(
4803        &mut self,
4804        entries: &[JournalEntry],
4805        stats: &mut EnhancedGenerationStatistics,
4806    ) -> SynthResult<AuditSnapshot> {
4807        if self.phase_config.generate_audit {
4808            info!("Phase 8: Generating Audit Data");
4809            let audit_snapshot = self.generate_audit_data(entries)?;
4810            stats.audit_engagement_count = audit_snapshot.engagements.len();
4811            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4812            stats.audit_evidence_count = audit_snapshot.evidence.len();
4813            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4814            stats.audit_finding_count = audit_snapshot.findings.len();
4815            stats.audit_judgment_count = audit_snapshot.judgments.len();
4816            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4817            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4818            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4819            stats.audit_sample_count = audit_snapshot.samples.len();
4820            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4821            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4822            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4823            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4824            stats.audit_related_party_transaction_count =
4825                audit_snapshot.related_party_transactions.len();
4826            info!(
4827                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4828                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4829                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4830                 {} RP transactions",
4831                stats.audit_engagement_count,
4832                stats.audit_workpaper_count,
4833                stats.audit_evidence_count,
4834                stats.audit_risk_count,
4835                stats.audit_finding_count,
4836                stats.audit_judgment_count,
4837                stats.audit_confirmation_count,
4838                stats.audit_procedure_step_count,
4839                stats.audit_sample_count,
4840                stats.audit_analytical_result_count,
4841                stats.audit_ia_function_count,
4842                stats.audit_ia_report_count,
4843                stats.audit_related_party_count,
4844                stats.audit_related_party_transaction_count,
4845            );
4846            self.check_resources_with_log("post-audit")?;
4847            Ok(audit_snapshot)
4848        } else {
4849            debug!("Phase 8: Skipped (audit generation disabled)");
4850            Ok(AuditSnapshot::default())
4851        }
4852    }
4853
4854    /// Phase 9: Generate banking KYC/AML data.
4855    fn phase_banking_data(
4856        &mut self,
4857        stats: &mut EnhancedGenerationStatistics,
4858    ) -> SynthResult<BankingSnapshot> {
4859        if self.phase_config.generate_banking {
4860            info!("Phase 9: Generating Banking KYC/AML Data");
4861            let banking_snapshot = self.generate_banking_data()?;
4862            stats.banking_customer_count = banking_snapshot.customers.len();
4863            stats.banking_account_count = banking_snapshot.accounts.len();
4864            stats.banking_transaction_count = banking_snapshot.transactions.len();
4865            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4866            info!(
4867                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4868                stats.banking_customer_count, stats.banking_account_count,
4869                stats.banking_transaction_count, stats.banking_suspicious_count
4870            );
4871            self.check_resources_with_log("post-banking")?;
4872            Ok(banking_snapshot)
4873        } else {
4874            debug!("Phase 9: Skipped (banking generation disabled)");
4875            Ok(BankingSnapshot::default())
4876        }
4877    }
4878
4879    /// Phase 10: Export accounting network graphs for ML training.
4880    fn phase_graph_export(
4881        &mut self,
4882        entries: &[JournalEntry],
4883        coa: &Arc<ChartOfAccounts>,
4884        stats: &mut EnhancedGenerationStatistics,
4885    ) -> SynthResult<GraphExportSnapshot> {
4886        if self.phase_config.generate_graph_export && !entries.is_empty() {
4887            info!("Phase 10: Exporting Accounting Network Graphs");
4888            match self.export_graphs(entries, coa, stats) {
4889                Ok(snapshot) => {
4890                    info!(
4891                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4892                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4893                    );
4894                    Ok(snapshot)
4895                }
4896                Err(e) => {
4897                    warn!("Phase 10: Graph export failed: {}", e);
4898                    Ok(GraphExportSnapshot::default())
4899                }
4900            }
4901        } else {
4902            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4903            Ok(GraphExportSnapshot::default())
4904        }
4905    }
4906
4907    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4908    #[allow(clippy::too_many_arguments)]
4909    fn phase_hypergraph_export(
4910        &self,
4911        coa: &Arc<ChartOfAccounts>,
4912        entries: &[JournalEntry],
4913        document_flows: &DocumentFlowSnapshot,
4914        sourcing: &SourcingSnapshot,
4915        hr: &HrSnapshot,
4916        manufacturing: &ManufacturingSnapshot,
4917        banking: &BankingSnapshot,
4918        audit: &AuditSnapshot,
4919        financial_reporting: &FinancialReportingSnapshot,
4920        ocpm: &OcpmSnapshot,
4921        compliance: &ComplianceRegulationsSnapshot,
4922        stats: &mut EnhancedGenerationStatistics,
4923    ) -> SynthResult<()> {
4924        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4925            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4926            match self.export_hypergraph(
4927                coa,
4928                entries,
4929                document_flows,
4930                sourcing,
4931                hr,
4932                manufacturing,
4933                banking,
4934                audit,
4935                financial_reporting,
4936                ocpm,
4937                compliance,
4938                stats,
4939            ) {
4940                Ok(info) => {
4941                    info!(
4942                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4943                        info.node_count, info.edge_count, info.hyperedge_count
4944                    );
4945                }
4946                Err(e) => {
4947                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4948                }
4949            }
4950        } else {
4951            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4952        }
4953        Ok(())
4954    }
4955
4956    /// Phase 11: LLM Enrichment.
4957    ///
4958    /// Uses an LLM provider (mock by default) to enrich vendor names with
4959    /// realistic, context-aware names. This phase is non-blocking: failures
4960    /// log a warning but do not stop the generation pipeline.
4961    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4962        if !self.config.llm.enabled {
4963            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4964            return;
4965        }
4966
4967        info!("Phase 11: Starting LLM Enrichment");
4968        let start = std::time::Instant::now();
4969
4970        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4971            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4972            // and the corresponding API key environment variable is present.
4973            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4974                let schema_provider = &self.config.llm.provider;
4975                let api_key_env = match schema_provider.as_str() {
4976                    "openai" => Some("OPENAI_API_KEY"),
4977                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4978                    "custom" => Some("LLM_API_KEY"),
4979                    _ => None,
4980                };
4981                if let Some(key_env) = api_key_env {
4982                    if std::env::var(key_env).is_ok() {
4983                        let llm_config = datasynth_core::llm::LlmConfig {
4984                            model: self.config.llm.model.clone(),
4985                            api_key_env: key_env.to_string(),
4986                            ..datasynth_core::llm::LlmConfig::default()
4987                        };
4988                        match HttpLlmProvider::new(llm_config) {
4989                            Ok(p) => Arc::new(p),
4990                            Err(e) => {
4991                                warn!(
4992                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4993                                    e
4994                                );
4995                                Arc::new(MockLlmProvider::new(self.seed))
4996                            }
4997                        }
4998                    } else {
4999                        Arc::new(MockLlmProvider::new(self.seed))
5000                    }
5001                } else {
5002                    Arc::new(MockLlmProvider::new(self.seed))
5003                }
5004            };
5005            // v4.1.1+: multi-category enrichment. Vendors remain the
5006            // default path; customers and materials opt in via
5007            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5008            let industry = format!("{:?}", self.config.global.industry);
5009
5010            let vendor_enricher =
5011                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5012            let max_vendors = self
5013                .config
5014                .llm
5015                .max_vendor_enrichments
5016                .min(self.master_data.vendors.len());
5017            let mut vendors_enriched = 0usize;
5018            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5019                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5020                    Ok(name) => {
5021                        vendor.name = name;
5022                        vendors_enriched += 1;
5023                    }
5024                    Err(e) => warn!(
5025                        "LLM vendor enrichment failed for {}: {}",
5026                        vendor.vendor_id, e
5027                    ),
5028                }
5029            }
5030
5031            let mut customers_enriched = 0usize;
5032            if self.config.llm.enrich_customers {
5033                let customer_enricher =
5034                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5035                        &provider,
5036                    ));
5037                let max_customers = self
5038                    .config
5039                    .llm
5040                    .max_customer_enrichments
5041                    .min(self.master_data.customers.len());
5042                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5043                    match customer_enricher.enrich_customer_name(
5044                        &industry,
5045                        "general",
5046                        &customer.country,
5047                    ) {
5048                        Ok(name) => {
5049                            customer.name = name;
5050                            customers_enriched += 1;
5051                        }
5052                        Err(e) => warn!(
5053                            "LLM customer enrichment failed for {}: {}",
5054                            customer.customer_id, e
5055                        ),
5056                    }
5057                }
5058            }
5059
5060            let mut materials_enriched = 0usize;
5061            if self.config.llm.enrich_materials {
5062                let material_enricher =
5063                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5064                        &provider,
5065                    ));
5066                let max_materials = self
5067                    .config
5068                    .llm
5069                    .max_material_enrichments
5070                    .min(self.master_data.materials.len());
5071                for material in self.master_data.materials.iter_mut().take(max_materials) {
5072                    let material_type = format!("{:?}", material.material_type);
5073                    match material_enricher.enrich_material_description(&material_type, &industry) {
5074                        Ok(desc) => {
5075                            material.description = desc;
5076                            materials_enriched += 1;
5077                        }
5078                        Err(e) => warn!(
5079                            "LLM material enrichment failed for {}: {}",
5080                            material.material_id, e
5081                        ),
5082                    }
5083                }
5084            }
5085
5086            (vendors_enriched, customers_enriched, materials_enriched)
5087        }));
5088
5089        match result {
5090            Ok((v, c, m)) => {
5091                stats.llm_vendors_enriched = v;
5092                stats.llm_customers_enriched = c;
5093                stats.llm_materials_enriched = m;
5094                let elapsed = start.elapsed();
5095                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5096                info!(
5097                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5098                    v, c, m, stats.llm_enrichment_ms
5099                );
5100            }
5101            Err(_) => {
5102                let elapsed = start.elapsed();
5103                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5104                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5105            }
5106        }
5107    }
5108
5109    /// Phase 12: Diffusion Enhancement.
5110    ///
5111    /// Generates a sample set matching distribution properties from the
5112    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5113    /// - `"statistical"` (default) — moment-matching backend, always fast.
5114    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5115    ///   the `neural` Cargo feature; falls back to statistical when the
5116    ///   feature isn't compiled in, with a loud warning.
5117    ///
5118    /// This phase is non-blocking: failures log a warning but do not
5119    /// stop the pipeline.
5120    fn phase_diffusion_enhancement(
5121        &self,
5122        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5123        stats: &mut EnhancedGenerationStatistics,
5124    ) {
5125        if !self.config.diffusion.enabled {
5126            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5127            return;
5128        }
5129
5130        info!("Phase 12: Starting Diffusion Enhancement");
5131        let start = std::time::Instant::now();
5132
5133        let backend_choice = self.config.diffusion.backend.as_str();
5134        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5135
5136        if use_neural {
5137            #[cfg(feature = "neural")]
5138            {
5139                match self.run_neural_diffusion_phase(entries) {
5140                    Ok(sample_count) => {
5141                        stats.diffusion_samples_generated = sample_count;
5142                        let elapsed = start.elapsed();
5143                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5144                        info!(
5145                            "Phase 12 complete ({}): {} samples in {}ms",
5146                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5147                        );
5148                        return;
5149                    }
5150                    Err(e) => {
5151                        warn!(
5152                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5153                        );
5154                        // Fall through to statistical path below.
5155                    }
5156                }
5157            }
5158            #[cfg(not(feature = "neural"))]
5159            {
5160                warn!(
5161                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5162                     not compiled in — falling back to statistical. Rebuild with \
5163                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5164                    backend_choice
5165                );
5166            }
5167        } else if !matches!(backend_choice, "statistical" | "") {
5168            warn!(
5169                "Phase 12: unknown backend '{}', falling back to statistical",
5170                backend_choice
5171            );
5172        }
5173
5174        // Statistical path (default + fallback).
5175        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5176            let means = vec![5000.0, 3.0, 2.0];
5177            let stds = vec![2000.0, 1.5, 1.0];
5178
5179            let diffusion_config = DiffusionConfig {
5180                n_steps: self.config.diffusion.n_steps,
5181                seed: self.seed,
5182                ..Default::default()
5183            };
5184
5185            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5186            let n_samples = self.config.diffusion.sample_size;
5187            let n_features = 3;
5188            backend.generate(n_samples, n_features, self.seed).len()
5189        }));
5190
5191        match result {
5192            Ok(sample_count) => {
5193                stats.diffusion_samples_generated = sample_count;
5194                let elapsed = start.elapsed();
5195                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5196                info!(
5197                    "Phase 12 complete (statistical): {} samples in {}ms",
5198                    sample_count, stats.diffusion_enhancement_ms
5199                );
5200            }
5201            Err(_) => {
5202                let elapsed = start.elapsed();
5203                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5204                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5205            }
5206        }
5207    }
5208
5209    /// Neural-backend execution — either load a pre-trained checkpoint
5210    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5211    /// from the first batch of JE amounts. Returns the sample count
5212    /// produced; any error bubbles up to the statistical fallback.
5213    #[cfg(feature = "neural")]
5214    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5215        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5216
5217        if entries.is_empty() {
5218            return Err(SynthError::generation(
5219                "neural diffusion: no journal entries available as training data",
5220            ));
5221        }
5222
5223        let training_data: Vec<Vec<f64>> = entries
5224            .iter()
5225            .take(5000)
5226            .map(|je| {
5227                let total_amount: f64 = je
5228                    .lines
5229                    .iter()
5230                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5231                    .map(|l| {
5232                        use rust_decimal::prelude::ToPrimitive;
5233                        l.debit_amount.to_f64().unwrap_or(0.0)
5234                    })
5235                    .sum();
5236                let line_count = je.lines.len() as f64;
5237                // Use the approval-workflow depth as the third feature
5238                // (proxy for complexity / risk). `None` → 1.
5239                let approval_level = je
5240                    .header
5241                    .approval_workflow
5242                    .as_ref()
5243                    .map(|w| w.required_levels as f64)
5244                    .unwrap_or(1.0);
5245                vec![total_amount, line_count, approval_level]
5246            })
5247            .collect();
5248
5249        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5250
5251        let cfg = &self.config.diffusion;
5252        let neural_cfg = &cfg.neural;
5253
5254        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5255            neural_cfg.checkpoint_path.as_ref()
5256        {
5257            let path = std::path::Path::new(ckpt_path);
5258            info!(
5259                "  Neural diffusion: loading checkpoint from {}",
5260                path.display()
5261            );
5262            NeuralDiffusionBackend::load(path)
5263                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5264        } else {
5265            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5266            info!(
5267                "  Neural diffusion: training score network on {} rows × {} features, \
5268                     {} epochs, hidden_dims={:?}",
5269                training_data.len(),
5270                n_features,
5271                neural_cfg.training_epochs,
5272                neural_cfg.hidden_dims
5273            );
5274            let training_config = NeuralTrainingConfig {
5275                n_steps: cfg.n_steps,
5276                schedule: cfg.schedule.clone(),
5277                hidden_dims: neural_cfg.hidden_dims.clone(),
5278                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5279                learning_rate: neural_cfg.learning_rate,
5280                epochs: neural_cfg.training_epochs,
5281                batch_size: neural_cfg.batch_size,
5282            };
5283            let (backend, report) =
5284                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5285                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5286            info!(
5287                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5288                report.epochs_completed, report.final_loss
5289            );
5290            backend
5291        };
5292
5293        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5294        Ok(samples.len())
5295    }
5296
5297    /// Phase 13: Causal Overlay.
5298    ///
5299    /// Builds a structural causal model from a built-in template (e.g.,
5300    /// fraud_detection) and generates causal samples. Optionally validates
5301    /// that the output respects the causal structure. This phase is
5302    /// non-blocking: failures log a warning but do not stop the pipeline.
5303    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5304        if !self.config.causal.enabled {
5305            debug!("Phase 13: Skipped (causal generation disabled)");
5306            return;
5307        }
5308
5309        info!("Phase 13: Starting Causal Overlay");
5310        let start = std::time::Instant::now();
5311
5312        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5313            // Select template based on config
5314            let graph = match self.config.causal.template.as_str() {
5315                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5316                _ => CausalGraph::fraud_detection_template(),
5317            };
5318
5319            let scm = StructuralCausalModel::new(graph.clone())
5320                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5321
5322            let n_samples = self.config.causal.sample_size;
5323            let samples = scm
5324                .generate(n_samples, self.seed)
5325                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5326
5327            // Optionally validate causal structure
5328            let validation_passed = if self.config.causal.validate {
5329                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5330                if report.valid {
5331                    info!(
5332                        "Causal validation passed: all {} checks OK",
5333                        report.checks.len()
5334                    );
5335                } else {
5336                    warn!(
5337                        "Causal validation: {} violations detected: {:?}",
5338                        report.violations.len(),
5339                        report.violations
5340                    );
5341                }
5342                Some(report.valid)
5343            } else {
5344                None
5345            };
5346
5347            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5348        }));
5349
5350        match result {
5351            Ok(Ok((sample_count, validation_passed))) => {
5352                stats.causal_samples_generated = sample_count;
5353                stats.causal_validation_passed = validation_passed;
5354                let elapsed = start.elapsed();
5355                stats.causal_generation_ms = elapsed.as_millis() as u64;
5356                info!(
5357                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5358                    sample_count, stats.causal_generation_ms, validation_passed,
5359                );
5360            }
5361            Ok(Err(e)) => {
5362                let elapsed = start.elapsed();
5363                stats.causal_generation_ms = elapsed.as_millis() as u64;
5364                warn!("Phase 13: Causal generation failed: {}", e);
5365            }
5366            Err(_) => {
5367                let elapsed = start.elapsed();
5368                stats.causal_generation_ms = elapsed.as_millis() as u64;
5369                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5370            }
5371        }
5372    }
5373
5374    /// Phase 14: Generate S2C sourcing data.
5375    fn phase_sourcing_data(
5376        &mut self,
5377        stats: &mut EnhancedGenerationStatistics,
5378    ) -> SynthResult<SourcingSnapshot> {
5379        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5380            debug!("Phase 14: Skipped (sourcing generation disabled)");
5381            return Ok(SourcingSnapshot::default());
5382        }
5383        let degradation = self.check_resources()?;
5384        if degradation >= DegradationLevel::Reduced {
5385            debug!(
5386                "Phase skipped due to resource pressure (degradation: {:?})",
5387                degradation
5388            );
5389            return Ok(SourcingSnapshot::default());
5390        }
5391
5392        info!("Phase 14: Generating S2C Sourcing Data");
5393        let seed = self.seed;
5394
5395        // Gather vendor data from master data
5396        let vendor_ids: Vec<String> = self
5397            .master_data
5398            .vendors
5399            .iter()
5400            .map(|v| v.vendor_id.clone())
5401            .collect();
5402        if vendor_ids.is_empty() {
5403            debug!("Phase 14: Skipped (no vendors available)");
5404            return Ok(SourcingSnapshot::default());
5405        }
5406
5407        let categories: Vec<(String, String)> = vec![
5408            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5409            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5410            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5411            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5412            ("CAT-LOG".to_string(), "Logistics".to_string()),
5413        ];
5414        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5415            .iter()
5416            .map(|(id, name)| {
5417                (
5418                    id.clone(),
5419                    name.clone(),
5420                    rust_decimal::Decimal::from(100_000),
5421                )
5422            })
5423            .collect();
5424
5425        let company_code = self
5426            .config
5427            .companies
5428            .first()
5429            .map(|c| c.code.as_str())
5430            .unwrap_or("1000");
5431        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5432            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5433        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5434        let fiscal_year = start_date.year() as u16;
5435        let owner_ids: Vec<String> = self
5436            .master_data
5437            .employees
5438            .iter()
5439            .take(5)
5440            .map(|e| e.employee_id.clone())
5441            .collect();
5442        let owner_id = owner_ids
5443            .first()
5444            .map(std::string::String::as_str)
5445            .unwrap_or("BUYER-001");
5446
5447        // Step 1: Spend Analysis
5448        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5449        let spend_analyses =
5450            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5451
5452        // Step 2: Sourcing Projects
5453        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5454        let sourcing_projects = if owner_ids.is_empty() {
5455            Vec::new()
5456        } else {
5457            project_gen.generate(
5458                company_code,
5459                &categories_with_spend,
5460                &owner_ids,
5461                start_date,
5462                self.config.global.period_months,
5463            )
5464        };
5465        stats.sourcing_project_count = sourcing_projects.len();
5466
5467        // Step 3: Qualifications
5468        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5469        let mut qual_gen = QualificationGenerator::new(seed + 2);
5470        let qualifications = qual_gen.generate(
5471            company_code,
5472            &qual_vendor_ids,
5473            sourcing_projects.first().map(|p| p.project_id.as_str()),
5474            owner_id,
5475            start_date,
5476        );
5477
5478        // Step 4: RFx Events
5479        let mut rfx_gen = RfxGenerator::new(seed + 3);
5480        let rfx_events: Vec<RfxEvent> = sourcing_projects
5481            .iter()
5482            .map(|proj| {
5483                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5484                rfx_gen.generate(
5485                    company_code,
5486                    &proj.project_id,
5487                    &proj.category_id,
5488                    &qualified_vids,
5489                    owner_id,
5490                    start_date,
5491                    50000.0,
5492                )
5493            })
5494            .collect();
5495        stats.rfx_event_count = rfx_events.len();
5496
5497        // Step 5: Bids
5498        let mut bid_gen = BidGenerator::new(seed + 4);
5499        let mut all_bids = Vec::new();
5500        for rfx in &rfx_events {
5501            let bidder_count = vendor_ids.len().clamp(2, 5);
5502            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5503            let bids = bid_gen.generate(rfx, &responding, start_date);
5504            all_bids.extend(bids);
5505        }
5506        stats.bid_count = all_bids.len();
5507
5508        // Step 6: Bid Evaluations
5509        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5510        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5511            .iter()
5512            .map(|rfx| {
5513                let rfx_bids: Vec<SupplierBid> = all_bids
5514                    .iter()
5515                    .filter(|b| b.rfx_id == rfx.rfx_id)
5516                    .cloned()
5517                    .collect();
5518                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5519            })
5520            .collect();
5521
5522        // Step 7: Contracts from winning bids
5523        let mut contract_gen = ContractGenerator::new(seed + 6);
5524        let contracts: Vec<ProcurementContract> = bid_evaluations
5525            .iter()
5526            .zip(rfx_events.iter())
5527            .filter_map(|(eval, rfx)| {
5528                eval.ranked_bids.first().and_then(|winner| {
5529                    all_bids
5530                        .iter()
5531                        .find(|b| b.bid_id == winner.bid_id)
5532                        .map(|winning_bid| {
5533                            contract_gen.generate_from_bid(
5534                                winning_bid,
5535                                Some(&rfx.sourcing_project_id),
5536                                &rfx.category_id,
5537                                owner_id,
5538                                start_date,
5539                            )
5540                        })
5541                })
5542            })
5543            .collect();
5544        stats.contract_count = contracts.len();
5545
5546        // Step 8: Catalog Items
5547        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5548        let catalog_items = catalog_gen.generate(&contracts);
5549        stats.catalog_item_count = catalog_items.len();
5550
5551        // Step 9: Scorecards
5552        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5553        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5554            .iter()
5555            .fold(
5556                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5557                |mut acc, c| {
5558                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5559                    acc
5560                },
5561            )
5562            .into_iter()
5563            .collect();
5564        let scorecards = scorecard_gen.generate(
5565            company_code,
5566            &vendor_contracts,
5567            start_date,
5568            end_date,
5569            owner_id,
5570        );
5571        stats.scorecard_count = scorecards.len();
5572
5573        // Back-populate cross-references on sourcing projects (Task 35)
5574        // Link each project to its RFx events, contracts, and spend analyses
5575        let mut sourcing_projects = sourcing_projects;
5576        for project in &mut sourcing_projects {
5577            // Link RFx events generated for this project
5578            project.rfx_ids = rfx_events
5579                .iter()
5580                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5581                .map(|rfx| rfx.rfx_id.clone())
5582                .collect();
5583
5584            // Link contract awarded from this project's RFx
5585            project.contract_id = contracts
5586                .iter()
5587                .find(|c| {
5588                    c.sourcing_project_id
5589                        .as_deref()
5590                        .is_some_and(|sp| sp == project.project_id)
5591                })
5592                .map(|c| c.contract_id.clone());
5593
5594            // Link spend analysis for matching category (use category_id as the reference)
5595            project.spend_analysis_id = spend_analyses
5596                .iter()
5597                .find(|sa| sa.category_id == project.category_id)
5598                .map(|sa| sa.category_id.clone());
5599        }
5600
5601        info!(
5602            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5603            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5604            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5605        );
5606        self.check_resources_with_log("post-sourcing")?;
5607
5608        Ok(SourcingSnapshot {
5609            spend_analyses,
5610            sourcing_projects,
5611            qualifications,
5612            rfx_events,
5613            bids: all_bids,
5614            bid_evaluations,
5615            contracts,
5616            catalog_items,
5617            scorecards,
5618        })
5619    }
5620
5621    /// Build a [`GroupStructure`] from the current company configuration.
5622    ///
5623    /// The first company in the configuration is treated as the ultimate parent.
5624    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5625    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5626    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5627        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5628
5629        let parent_code = self
5630            .config
5631            .companies
5632            .first()
5633            .map(|c| c.code.clone())
5634            .unwrap_or_else(|| "PARENT".to_string());
5635
5636        let mut group = GroupStructure::new(parent_code);
5637
5638        for company in self.config.companies.iter().skip(1) {
5639            let sub =
5640                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5641            group.add_subsidiary(sub);
5642        }
5643
5644        group
5645    }
5646
5647    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5648    fn phase_intercompany(
5649        &mut self,
5650        journal_entries: &[JournalEntry],
5651        stats: &mut EnhancedGenerationStatistics,
5652    ) -> SynthResult<IntercompanySnapshot> {
5653        // Skip if intercompany is disabled in config
5654        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5655            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5656            return Ok(IntercompanySnapshot::default());
5657        }
5658
5659        // Intercompany requires at least 2 companies
5660        if self.config.companies.len() < 2 {
5661            debug!(
5662                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5663                self.config.companies.len()
5664            );
5665            return Ok(IntercompanySnapshot::default());
5666        }
5667
5668        info!("Phase 14b: Generating Intercompany Transactions");
5669
5670        // Build the group structure early — used by ISA 600 component auditor scope
5671        // and consolidated financial statement generators downstream.
5672        let group_structure = self.build_group_structure();
5673        debug!(
5674            "Group structure built: parent={}, subsidiaries={}",
5675            group_structure.parent_entity,
5676            group_structure.subsidiaries.len()
5677        );
5678
5679        let seed = self.seed;
5680        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5681            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5682        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5683
5684        // Build ownership structure from company configs
5685        // First company is treated as the parent, remaining are subsidiaries
5686        let parent_code = self.config.companies[0].code.clone();
5687        let mut ownership_structure =
5688            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5689
5690        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5691            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5692                format!("REL{:03}", i + 1),
5693                parent_code.clone(),
5694                company.code.clone(),
5695                rust_decimal::Decimal::from(100), // Default 100% ownership
5696                start_date,
5697            );
5698            ownership_structure.add_relationship(relationship);
5699        }
5700
5701        // Convert config transfer pricing method to core model enum
5702        let tp_method = match self.config.intercompany.transfer_pricing_method {
5703            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5704                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5705            }
5706            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5707                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5708            }
5709            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5710                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5711            }
5712            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5713                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5714            }
5715            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5716                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5717            }
5718        };
5719
5720        // Build IC generator config from schema config
5721        let ic_currency = self
5722            .config
5723            .companies
5724            .first()
5725            .map(|c| c.currency.clone())
5726            .unwrap_or_else(|| "USD".to_string());
5727        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5728            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5729            transfer_pricing_method: tp_method,
5730            markup_percent: rust_decimal::Decimal::from_f64_retain(
5731                self.config.intercompany.markup_percent,
5732            )
5733            .unwrap_or(rust_decimal::Decimal::from(5)),
5734            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5735            default_currency: ic_currency,
5736            ..Default::default()
5737        };
5738
5739        // Create IC generator
5740        let mut ic_generator = datasynth_generators::ICGenerator::new(
5741            ic_gen_config,
5742            ownership_structure.clone(),
5743            seed + 50,
5744        );
5745
5746        // Generate IC transactions for the period
5747        // Use ~3 transactions per day as a reasonable default
5748        let transactions_per_day = 3;
5749        let matched_pairs = ic_generator.generate_transactions_for_period(
5750            start_date,
5751            end_date,
5752            transactions_per_day,
5753        );
5754
5755        // Generate IC source P2P/O2C documents
5756        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5757        debug!(
5758            "Generated {} IC seller invoices, {} IC buyer POs",
5759            ic_doc_chains.seller_invoices.len(),
5760            ic_doc_chains.buyer_orders.len()
5761        );
5762
5763        // Generate journal entries from matched pairs
5764        let mut seller_entries = Vec::new();
5765        let mut buyer_entries = Vec::new();
5766        let fiscal_year = start_date.year();
5767
5768        for pair in &matched_pairs {
5769            let fiscal_period = pair.posting_date.month();
5770            let (seller_je, buyer_je) =
5771                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5772            seller_entries.push(seller_je);
5773            buyer_entries.push(buyer_je);
5774        }
5775
5776        // Run matching engine
5777        let matching_config = datasynth_generators::ICMatchingConfig {
5778            base_currency: self
5779                .config
5780                .companies
5781                .first()
5782                .map(|c| c.currency.clone())
5783                .unwrap_or_else(|| "USD".to_string()),
5784            ..Default::default()
5785        };
5786        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5787        matching_engine.load_matched_pairs(&matched_pairs);
5788        let matching_result = matching_engine.run_matching(end_date);
5789
5790        // Generate elimination entries if configured
5791        let mut elimination_entries = Vec::new();
5792        if self.config.intercompany.generate_eliminations {
5793            let elim_config = datasynth_generators::EliminationConfig {
5794                consolidation_entity: "GROUP".to_string(),
5795                base_currency: self
5796                    .config
5797                    .companies
5798                    .first()
5799                    .map(|c| c.currency.clone())
5800                    .unwrap_or_else(|| "USD".to_string()),
5801                ..Default::default()
5802            };
5803
5804            let mut elim_generator =
5805                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5806
5807            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5808            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5809                matching_result
5810                    .matched_balances
5811                    .iter()
5812                    .chain(matching_result.unmatched_balances.iter())
5813                    .cloned()
5814                    .collect();
5815
5816            // Build investment and equity maps from the group structure so that the
5817            // elimination generator can produce equity-investment elimination entries
5818            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5819            //
5820            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5821            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5822            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5823            //
5824            // Net assets are derived from the journal entries using account-range heuristics:
5825            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5826            // no JE data is available (IC phase runs early in the generation pipeline).
5827            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5828                std::collections::HashMap::new();
5829            let mut equity_amounts: std::collections::HashMap<
5830                String,
5831                std::collections::HashMap<String, rust_decimal::Decimal>,
5832            > = std::collections::HashMap::new();
5833            {
5834                use rust_decimal::Decimal;
5835                let hundred = Decimal::from(100u32);
5836                let ten_pct = Decimal::new(10, 2); // 0.10
5837                let thirty_pct = Decimal::new(30, 2); // 0.30
5838                let sixty_pct = Decimal::new(60, 2); // 0.60
5839                let parent_code = &group_structure.parent_entity;
5840                for sub in &group_structure.subsidiaries {
5841                    let net_assets = {
5842                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5843                        if na > Decimal::ZERO {
5844                            na
5845                        } else {
5846                            Decimal::from(1_000_000u64)
5847                        }
5848                    };
5849                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5850                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5851                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5852
5853                    // Split subsidiary equity into conventional components:
5854                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5855                    let mut eq_map = std::collections::HashMap::new();
5856                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5857                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5858                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5859                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5860                }
5861            }
5862
5863            let journal = elim_generator.generate_eliminations(
5864                &fiscal_period,
5865                end_date,
5866                &all_balances,
5867                &matched_pairs,
5868                &investment_amounts,
5869                &equity_amounts,
5870            );
5871
5872            elimination_entries = journal.entries.clone();
5873        }
5874
5875        let matched_pair_count = matched_pairs.len();
5876        let elimination_entry_count = elimination_entries.len();
5877        let match_rate = matching_result.match_rate;
5878
5879        stats.ic_matched_pair_count = matched_pair_count;
5880        stats.ic_elimination_count = elimination_entry_count;
5881        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5882
5883        info!(
5884            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5885            matched_pair_count,
5886            stats.ic_transaction_count,
5887            seller_entries.len(),
5888            buyer_entries.len(),
5889            elimination_entry_count,
5890            match_rate * 100.0
5891        );
5892        self.check_resources_with_log("post-intercompany")?;
5893
5894        // ----------------------------------------------------------------
5895        // NCI measurements: derive from group structure ownership percentages
5896        // ----------------------------------------------------------------
5897        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5898            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5899            use rust_decimal::Decimal;
5900
5901            let eight_pct = Decimal::new(8, 2); // 0.08
5902
5903            group_structure
5904                .subsidiaries
5905                .iter()
5906                .filter(|sub| {
5907                    sub.nci_percentage > Decimal::ZERO
5908                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5909                })
5910                .map(|sub| {
5911                    // Compute net assets from actual journal entries for this subsidiary.
5912                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5913                    // IC phase runs before the main JE batch has been populated).
5914                    let net_assets_from_jes =
5915                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5916
5917                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5918                        net_assets_from_jes.round_dp(2)
5919                    } else {
5920                        // Fallback: use a plausible base amount
5921                        Decimal::from(1_000_000u64)
5922                    };
5923
5924                    // Net income approximated as 8% of net assets
5925                    let net_income = (net_assets * eight_pct).round_dp(2);
5926
5927                    NciMeasurement::compute(
5928                        sub.entity_code.clone(),
5929                        sub.nci_percentage,
5930                        net_assets,
5931                        net_income,
5932                    )
5933                })
5934                .collect()
5935        };
5936
5937        if !nci_measurements.is_empty() {
5938            info!(
5939                "NCI measurements: {} subsidiaries with non-controlling interests",
5940                nci_measurements.len()
5941            );
5942        }
5943
5944        Ok(IntercompanySnapshot {
5945            group_structure: Some(group_structure),
5946            matched_pairs,
5947            seller_journal_entries: seller_entries,
5948            buyer_journal_entries: buyer_entries,
5949            elimination_entries,
5950            nci_measurements,
5951            ic_document_chains: Some(ic_doc_chains),
5952            matched_pair_count,
5953            elimination_entry_count,
5954            match_rate,
5955        })
5956    }
5957
5958    /// Phase 15: Generate bank reconciliations and financial statements.
5959    fn phase_financial_reporting(
5960        &mut self,
5961        document_flows: &DocumentFlowSnapshot,
5962        journal_entries: &[JournalEntry],
5963        coa: &Arc<ChartOfAccounts>,
5964        _hr: &HrSnapshot,
5965        _audit: &AuditSnapshot,
5966        stats: &mut EnhancedGenerationStatistics,
5967    ) -> SynthResult<FinancialReportingSnapshot> {
5968        let fs_enabled = self.phase_config.generate_financial_statements
5969            || self.config.financial_reporting.enabled;
5970        let br_enabled = self.phase_config.generate_bank_reconciliation;
5971
5972        if !fs_enabled && !br_enabled {
5973            debug!("Phase 15: Skipped (financial reporting disabled)");
5974            return Ok(FinancialReportingSnapshot::default());
5975        }
5976
5977        info!("Phase 15: Generating Financial Reporting Data");
5978
5979        let seed = self.seed;
5980        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5981            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5982
5983        let mut financial_statements = Vec::new();
5984        let mut bank_reconciliations = Vec::new();
5985        let mut trial_balances = Vec::new();
5986        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5987        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5988            Vec::new();
5989        // Standalone statements keyed by entity code
5990        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5991            std::collections::HashMap::new();
5992        // Consolidated statements (one per period)
5993        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5994        // Consolidation schedules (one per period)
5995        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5996
5997        // Generate financial statements from JE-derived trial balances.
5998        //
5999        // When journal entries are available, we use cumulative trial balances for
6000        // balance sheet accounts and current-period trial balances for income
6001        // statement accounts. We also track prior-period trial balances so the
6002        // generator can produce comparative amounts, and we build a proper
6003        // cash flow statement from working capital changes rather than random data.
6004        if fs_enabled {
6005            let has_journal_entries = !journal_entries.is_empty();
6006
6007            // Use FinancialStatementGenerator for balance sheet and income statement,
6008            // but build cash flow ourselves from TB data when JEs are available.
6009            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6010            // Separate generator for consolidated statements (different seed offset)
6011            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6012
6013            // Collect elimination JEs once (reused across periods)
6014            let elimination_entries: Vec<&JournalEntry> = journal_entries
6015                .iter()
6016                .filter(|je| je.header.is_elimination)
6017                .collect();
6018
6019            // Generate one set of statements per period, per entity
6020            for period in 0..self.config.global.period_months {
6021                let period_start = start_date + chrono::Months::new(period);
6022                let period_end =
6023                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6024                let fiscal_year = period_end.year() as u16;
6025                let fiscal_period = period_end.month() as u8;
6026                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6027
6028                // Build per-entity trial balances for this period (non-elimination JEs)
6029                // We accumulate them for the consolidation step.
6030                let mut entity_tb_map: std::collections::HashMap<
6031                    String,
6032                    std::collections::HashMap<String, rust_decimal::Decimal>,
6033                > = std::collections::HashMap::new();
6034
6035                // --- Standalone: one set of statements per company ---
6036                for (company_idx, company) in self.config.companies.iter().enumerate() {
6037                    let company_code = company.code.as_str();
6038                    let currency = company.currency.as_str();
6039                    // Use a unique seed offset per company to keep statements deterministic
6040                    // and distinct across companies
6041                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6042                    let mut company_fs_gen =
6043                        FinancialStatementGenerator::new(seed + company_seed_offset);
6044
6045                    if has_journal_entries {
6046                        let tb_entries = Self::build_cumulative_trial_balance(
6047                            journal_entries,
6048                            coa,
6049                            company_code,
6050                            start_date,
6051                            period_end,
6052                            fiscal_year,
6053                            fiscal_period,
6054                        );
6055
6056                        // Accumulate per-entity category balances for consolidation
6057                        let entity_cat_map =
6058                            entity_tb_map.entry(company_code.to_string()).or_default();
6059                        for tb_entry in &tb_entries {
6060                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6061                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6062                        }
6063
6064                        let stmts = company_fs_gen.generate(
6065                            company_code,
6066                            currency,
6067                            &tb_entries,
6068                            period_start,
6069                            period_end,
6070                            fiscal_year,
6071                            fiscal_period,
6072                            None,
6073                            "SYS-AUTOCLOSE",
6074                        );
6075
6076                        let mut entity_stmts = Vec::new();
6077                        for stmt in stmts {
6078                            if stmt.statement_type == StatementType::CashFlowStatement {
6079                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6080                                let cf_items = Self::build_cash_flow_from_trial_balances(
6081                                    &tb_entries,
6082                                    None,
6083                                    net_income,
6084                                );
6085                                entity_stmts.push(FinancialStatement {
6086                                    cash_flow_items: cf_items,
6087                                    ..stmt
6088                                });
6089                            } else {
6090                                entity_stmts.push(stmt);
6091                            }
6092                        }
6093
6094                        // Add to the flat financial_statements list (used by KPI/budget)
6095                        financial_statements.extend(entity_stmts.clone());
6096
6097                        // Store standalone per-entity
6098                        standalone_statements
6099                            .entry(company_code.to_string())
6100                            .or_default()
6101                            .extend(entity_stmts);
6102
6103                        // Only store trial balance for the first company in the period
6104                        // to avoid duplicates in the trial_balances list
6105                        if company_idx == 0 {
6106                            trial_balances.push(PeriodTrialBalance {
6107                                fiscal_year,
6108                                fiscal_period,
6109                                period_start,
6110                                period_end,
6111                                entries: tb_entries,
6112                            });
6113                        }
6114                    } else {
6115                        // Fallback: no JEs available
6116                        let tb_entries = Self::build_trial_balance_from_entries(
6117                            journal_entries,
6118                            coa,
6119                            company_code,
6120                            fiscal_year,
6121                            fiscal_period,
6122                        );
6123
6124                        let stmts = company_fs_gen.generate(
6125                            company_code,
6126                            currency,
6127                            &tb_entries,
6128                            period_start,
6129                            period_end,
6130                            fiscal_year,
6131                            fiscal_period,
6132                            None,
6133                            "SYS-AUTOCLOSE",
6134                        );
6135                        financial_statements.extend(stmts.clone());
6136                        standalone_statements
6137                            .entry(company_code.to_string())
6138                            .or_default()
6139                            .extend(stmts);
6140
6141                        if company_idx == 0 && !tb_entries.is_empty() {
6142                            trial_balances.push(PeriodTrialBalance {
6143                                fiscal_year,
6144                                fiscal_period,
6145                                period_start,
6146                                period_end,
6147                                entries: tb_entries,
6148                            });
6149                        }
6150                    }
6151                }
6152
6153                // --- Consolidated: aggregate all entities + apply eliminations ---
6154                // Use the primary (first) company's currency for the consolidated statement
6155                let group_currency = self
6156                    .config
6157                    .companies
6158                    .first()
6159                    .map(|c| c.currency.as_str())
6160                    .unwrap_or("USD");
6161
6162                // Build owned elimination entries for this period
6163                let period_eliminations: Vec<JournalEntry> = elimination_entries
6164                    .iter()
6165                    .filter(|je| {
6166                        je.header.fiscal_year == fiscal_year
6167                            && je.header.fiscal_period == fiscal_period
6168                    })
6169                    .map(|je| (*je).clone())
6170                    .collect();
6171
6172                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6173                    &entity_tb_map,
6174                    &period_eliminations,
6175                    &period_label,
6176                );
6177
6178                // Build a pseudo trial balance from consolidated line items for the
6179                // FinancialStatementGenerator to use (only for cash flow direction).
6180                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6181                    .line_items
6182                    .iter()
6183                    .map(|li| {
6184                        let net = li.post_elimination_total;
6185                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6186                            (net, rust_decimal::Decimal::ZERO)
6187                        } else {
6188                            (rust_decimal::Decimal::ZERO, -net)
6189                        };
6190                        datasynth_generators::TrialBalanceEntry {
6191                            account_code: li.account_category.clone(),
6192                            account_name: li.account_category.clone(),
6193                            category: li.account_category.clone(),
6194                            debit_balance: debit,
6195                            credit_balance: credit,
6196                        }
6197                    })
6198                    .collect();
6199
6200                let mut cons_stmts = cons_gen.generate(
6201                    "GROUP",
6202                    group_currency,
6203                    &cons_tb,
6204                    period_start,
6205                    period_end,
6206                    fiscal_year,
6207                    fiscal_period,
6208                    None,
6209                    "SYS-AUTOCLOSE",
6210                );
6211
6212                // Split consolidated line items by statement type.
6213                // The consolidation generator returns BS items first, then IS items,
6214                // identified by their CONS- prefix and category.
6215                let bs_categories: &[&str] = &[
6216                    "CASH",
6217                    "RECEIVABLES",
6218                    "INVENTORY",
6219                    "FIXEDASSETS",
6220                    "PAYABLES",
6221                    "ACCRUEDLIABILITIES",
6222                    "LONGTERMDEBT",
6223                    "EQUITY",
6224                ];
6225                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6226                    cons_line_items.into_iter().partition(|li| {
6227                        let upper = li.label.to_uppercase();
6228                        bs_categories.iter().any(|c| upper == *c)
6229                    });
6230
6231                for stmt in &mut cons_stmts {
6232                    stmt.is_consolidated = true;
6233                    match stmt.statement_type {
6234                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6235                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6236                        _ => {} // CF and equity change statements keep generator output
6237                    }
6238                }
6239
6240                consolidated_statements.extend(cons_stmts);
6241                consolidation_schedules.push(schedule);
6242            }
6243
6244            // Backward compat: if only 1 company, use existing code path logic
6245            // (prior_cumulative_tb for comparative amounts). Already handled above;
6246            // the prior_ref is omitted to keep this change minimal.
6247            let _ = &mut fs_gen; // suppress unused warning
6248
6249            stats.financial_statement_count = financial_statements.len();
6250            info!(
6251                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6252                stats.financial_statement_count,
6253                consolidated_statements.len(),
6254                has_journal_entries
6255            );
6256
6257            // ----------------------------------------------------------------
6258            // IFRS 8 / ASC 280: Operating Segment Reporting
6259            // ----------------------------------------------------------------
6260            // Build entity seeds from the company configuration.
6261            let entity_seeds: Vec<SegmentSeed> = self
6262                .config
6263                .companies
6264                .iter()
6265                .map(|c| SegmentSeed {
6266                    code: c.code.clone(),
6267                    name: c.name.clone(),
6268                    currency: c.currency.clone(),
6269                })
6270                .collect();
6271
6272            let mut seg_gen = SegmentGenerator::new(seed + 30);
6273
6274            // Generate one set of segment reports per period.
6275            // We extract consolidated revenue / profit / assets from the consolidated
6276            // financial statements produced above, falling back to simple sums when
6277            // no consolidated statements were generated (single-entity path).
6278            for period in 0..self.config.global.period_months {
6279                let period_end =
6280                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6281                let fiscal_year = period_end.year() as u16;
6282                let fiscal_period = period_end.month() as u8;
6283                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6284
6285                use datasynth_core::models::StatementType;
6286
6287                // Try to find consolidated income statement for this period
6288                let cons_is = consolidated_statements.iter().find(|s| {
6289                    s.fiscal_year == fiscal_year
6290                        && s.fiscal_period == fiscal_period
6291                        && s.statement_type == StatementType::IncomeStatement
6292                });
6293                let cons_bs = consolidated_statements.iter().find(|s| {
6294                    s.fiscal_year == fiscal_year
6295                        && s.fiscal_period == fiscal_period
6296                        && s.statement_type == StatementType::BalanceSheet
6297                });
6298
6299                // If consolidated statements not available fall back to the flat list
6300                let is_stmt = cons_is.or_else(|| {
6301                    financial_statements.iter().find(|s| {
6302                        s.fiscal_year == fiscal_year
6303                            && s.fiscal_period == fiscal_period
6304                            && s.statement_type == StatementType::IncomeStatement
6305                    })
6306                });
6307                let bs_stmt = cons_bs.or_else(|| {
6308                    financial_statements.iter().find(|s| {
6309                        s.fiscal_year == fiscal_year
6310                            && s.fiscal_period == fiscal_period
6311                            && s.statement_type == StatementType::BalanceSheet
6312                    })
6313                });
6314
6315                let consolidated_revenue = is_stmt
6316                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6317                    .map(|li| -li.amount) // revenue is stored as negative in IS
6318                    .unwrap_or(rust_decimal::Decimal::ZERO);
6319
6320                let consolidated_profit = is_stmt
6321                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6322                    .map(|li| li.amount)
6323                    .unwrap_or(rust_decimal::Decimal::ZERO);
6324
6325                let consolidated_assets = bs_stmt
6326                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6327                    .map(|li| li.amount)
6328                    .unwrap_or(rust_decimal::Decimal::ZERO);
6329
6330                // Skip periods where we have no financial data
6331                if consolidated_revenue == rust_decimal::Decimal::ZERO
6332                    && consolidated_assets == rust_decimal::Decimal::ZERO
6333                {
6334                    continue;
6335                }
6336
6337                let group_code = self
6338                    .config
6339                    .companies
6340                    .first()
6341                    .map(|c| c.code.as_str())
6342                    .unwrap_or("GROUP");
6343
6344                // Compute period depreciation from JEs with document type "CL" hitting account
6345                // 6000 (depreciation expense).  These are generated by phase_period_close.
6346                let total_depr: rust_decimal::Decimal = journal_entries
6347                    .iter()
6348                    .filter(|je| je.header.document_type == "CL")
6349                    .flat_map(|je| je.lines.iter())
6350                    .filter(|l| l.gl_account.starts_with("6000"))
6351                    .map(|l| l.debit_amount)
6352                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6353                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6354                    Some(total_depr)
6355                } else {
6356                    None
6357                };
6358
6359                let (segs, recon) = seg_gen.generate(
6360                    group_code,
6361                    &period_label,
6362                    consolidated_revenue,
6363                    consolidated_profit,
6364                    consolidated_assets,
6365                    &entity_seeds,
6366                    depr_param,
6367                );
6368                segment_reports.extend(segs);
6369                segment_reconciliations.push(recon);
6370            }
6371
6372            info!(
6373                "Segment reports generated: {} segments, {} reconciliations",
6374                segment_reports.len(),
6375                segment_reconciliations.len()
6376            );
6377        }
6378
6379        // Generate bank reconciliations from payment data
6380        if br_enabled && !document_flows.payments.is_empty() {
6381            let employee_ids: Vec<String> = self
6382                .master_data
6383                .employees
6384                .iter()
6385                .map(|e| e.employee_id.clone())
6386                .collect();
6387            let mut br_gen =
6388                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6389
6390            // Group payments by company code and period
6391            for company in &self.config.companies {
6392                let company_payments: Vec<PaymentReference> = document_flows
6393                    .payments
6394                    .iter()
6395                    .filter(|p| p.header.company_code == company.code)
6396                    .map(|p| PaymentReference {
6397                        id: p.header.document_id.clone(),
6398                        amount: if p.is_vendor { p.amount } else { -p.amount },
6399                        date: p.header.document_date,
6400                        reference: p
6401                            .check_number
6402                            .clone()
6403                            .or_else(|| p.wire_reference.clone())
6404                            .unwrap_or_else(|| p.header.document_id.clone()),
6405                    })
6406                    .collect();
6407
6408                if company_payments.is_empty() {
6409                    continue;
6410                }
6411
6412                let bank_account_id = format!("{}-MAIN", company.code);
6413
6414                // Generate one reconciliation per period
6415                for period in 0..self.config.global.period_months {
6416                    let period_start = start_date + chrono::Months::new(period);
6417                    let period_end =
6418                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6419
6420                    let period_payments: Vec<PaymentReference> = company_payments
6421                        .iter()
6422                        .filter(|p| p.date >= period_start && p.date <= period_end)
6423                        .cloned()
6424                        .collect();
6425
6426                    let recon = br_gen.generate(
6427                        &company.code,
6428                        &bank_account_id,
6429                        period_start,
6430                        period_end,
6431                        &company.currency,
6432                        &period_payments,
6433                    );
6434                    bank_reconciliations.push(recon);
6435                }
6436            }
6437            info!(
6438                "Bank reconciliations generated: {} reconciliations",
6439                bank_reconciliations.len()
6440            );
6441        }
6442
6443        stats.bank_reconciliation_count = bank_reconciliations.len();
6444        self.check_resources_with_log("post-financial-reporting")?;
6445
6446        if !trial_balances.is_empty() {
6447            info!(
6448                "Period-close trial balances captured: {} periods",
6449                trial_balances.len()
6450            );
6451        }
6452
6453        // Notes to financial statements are generated in a separate post-processing step
6454        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6455        // phases have completed, so that deferred tax and provision data can be wired in.
6456        let notes_to_financial_statements = Vec::new();
6457
6458        Ok(FinancialReportingSnapshot {
6459            financial_statements,
6460            standalone_statements,
6461            consolidated_statements,
6462            consolidation_schedules,
6463            bank_reconciliations,
6464            trial_balances,
6465            segment_reports,
6466            segment_reconciliations,
6467            notes_to_financial_statements,
6468        })
6469    }
6470
6471    /// Populate notes to financial statements using fully-resolved snapshots.
6472    ///
6473    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6474    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6475    /// can be wired into the notes context.  The method mutates
6476    /// `financial_reporting.notes_to_financial_statements` in-place.
6477    fn generate_notes_to_financial_statements(
6478        &self,
6479        financial_reporting: &mut FinancialReportingSnapshot,
6480        accounting_standards: &AccountingStandardsSnapshot,
6481        tax: &TaxSnapshot,
6482        hr: &HrSnapshot,
6483        audit: &AuditSnapshot,
6484        treasury: &TreasurySnapshot,
6485    ) {
6486        use datasynth_config::schema::AccountingFrameworkConfig;
6487        use datasynth_core::models::StatementType;
6488        use datasynth_generators::period_close::notes_generator::{
6489            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6490        };
6491
6492        let seed = self.seed;
6493        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6494        {
6495            Ok(d) => d,
6496            Err(_) => return,
6497        };
6498
6499        let mut notes_gen = NotesGenerator::new(seed + 4235);
6500
6501        for company in &self.config.companies {
6502            let last_period_end = start_date
6503                + chrono::Months::new(self.config.global.period_months)
6504                - chrono::Days::new(1);
6505            let fiscal_year = last_period_end.year() as u16;
6506
6507            // Extract relevant amounts from the already-generated financial statements
6508            let entity_is = financial_reporting
6509                .standalone_statements
6510                .get(&company.code)
6511                .and_then(|stmts| {
6512                    stmts.iter().find(|s| {
6513                        s.fiscal_year == fiscal_year
6514                            && s.statement_type == StatementType::IncomeStatement
6515                    })
6516                });
6517            let entity_bs = financial_reporting
6518                .standalone_statements
6519                .get(&company.code)
6520                .and_then(|stmts| {
6521                    stmts.iter().find(|s| {
6522                        s.fiscal_year == fiscal_year
6523                            && s.statement_type == StatementType::BalanceSheet
6524                    })
6525                });
6526
6527            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6528            let revenue_amount = entity_is
6529                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6530                .map(|li| li.amount);
6531            let ppe_gross = entity_bs
6532                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6533                .map(|li| li.amount);
6534
6535            let framework = match self
6536                .config
6537                .accounting_standards
6538                .framework
6539                .unwrap_or_default()
6540            {
6541                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6542                    "IFRS".to_string()
6543                }
6544                _ => "US GAAP".to_string(),
6545            };
6546
6547            // ---- Deferred tax (IAS 12 / ASC 740) ----
6548            // Sum closing DTA and DTL from rollforward entries for this entity.
6549            let (entity_dta, entity_dtl) = {
6550                let mut dta = rust_decimal::Decimal::ZERO;
6551                let mut dtl = rust_decimal::Decimal::ZERO;
6552                for rf in &tax.deferred_tax.rollforwards {
6553                    if rf.entity_code == company.code {
6554                        dta += rf.closing_dta;
6555                        dtl += rf.closing_dtl;
6556                    }
6557                }
6558                (
6559                    if dta > rust_decimal::Decimal::ZERO {
6560                        Some(dta)
6561                    } else {
6562                        None
6563                    },
6564                    if dtl > rust_decimal::Decimal::ZERO {
6565                        Some(dtl)
6566                    } else {
6567                        None
6568                    },
6569                )
6570            };
6571
6572            // ---- Provisions (IAS 37 / ASC 450) ----
6573            // Filter provisions to this entity; sum best_estimate amounts.
6574            let entity_provisions: Vec<_> = accounting_standards
6575                .provisions
6576                .iter()
6577                .filter(|p| p.entity_code == company.code)
6578                .collect();
6579            let provision_count = entity_provisions.len();
6580            let total_provisions = if provision_count > 0 {
6581                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6582            } else {
6583                None
6584            };
6585
6586            // ---- Pension data from HR snapshot ----
6587            let entity_pension_plan_count = hr
6588                .pension_plans
6589                .iter()
6590                .filter(|p| p.entity_code == company.code)
6591                .count();
6592            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6593                let sum: rust_decimal::Decimal = hr
6594                    .pension_disclosures
6595                    .iter()
6596                    .filter(|d| {
6597                        hr.pension_plans
6598                            .iter()
6599                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6600                    })
6601                    .map(|d| d.net_pension_liability)
6602                    .sum();
6603                let plan_assets_sum: rust_decimal::Decimal = hr
6604                    .pension_plan_assets
6605                    .iter()
6606                    .filter(|a| {
6607                        hr.pension_plans
6608                            .iter()
6609                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6610                    })
6611                    .map(|a| a.fair_value_closing)
6612                    .sum();
6613                if entity_pension_plan_count > 0 {
6614                    Some(sum + plan_assets_sum)
6615                } else {
6616                    None
6617                }
6618            };
6619            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6620                let sum: rust_decimal::Decimal = hr
6621                    .pension_plan_assets
6622                    .iter()
6623                    .filter(|a| {
6624                        hr.pension_plans
6625                            .iter()
6626                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6627                    })
6628                    .map(|a| a.fair_value_closing)
6629                    .sum();
6630                if entity_pension_plan_count > 0 {
6631                    Some(sum)
6632                } else {
6633                    None
6634                }
6635            };
6636
6637            // ---- Audit data: related parties + subsequent events ----
6638            // Audit snapshot covers all entities; use total counts (common case = single entity).
6639            let rp_count = audit.related_party_transactions.len();
6640            let se_count = audit.subsequent_events.len();
6641            let adjusting_count = audit
6642                .subsequent_events
6643                .iter()
6644                .filter(|e| {
6645                    matches!(
6646                        e.classification,
6647                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6648                    )
6649                })
6650                .count();
6651
6652            let ctx = NotesGeneratorContext {
6653                entity_code: company.code.clone(),
6654                framework,
6655                period: format!("FY{}", fiscal_year),
6656                period_end: last_period_end,
6657                currency: company.currency.clone(),
6658                revenue_amount,
6659                total_ppe_gross: ppe_gross,
6660                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6661                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6662                deferred_tax_asset: entity_dta,
6663                deferred_tax_liability: entity_dtl,
6664                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6665                provision_count,
6666                total_provisions,
6667                // Pension data from HR snapshot
6668                pension_plan_count: entity_pension_plan_count,
6669                total_dbo: entity_total_dbo,
6670                total_plan_assets: entity_total_plan_assets,
6671                // Audit data
6672                related_party_transaction_count: rp_count,
6673                subsequent_event_count: se_count,
6674                adjusting_event_count: adjusting_count,
6675                ..NotesGeneratorContext::default()
6676            };
6677
6678            let entity_notes = notes_gen.generate(&ctx);
6679            let standard_note_count = entity_notes.len() as u32;
6680            info!(
6681                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6682                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6683            );
6684            financial_reporting
6685                .notes_to_financial_statements
6686                .extend(entity_notes);
6687
6688            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6689            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6690                .debt_instruments
6691                .iter()
6692                .filter(|d| d.entity_id == company.code)
6693                .map(|d| {
6694                    (
6695                        format!("{:?}", d.instrument_type),
6696                        d.principal,
6697                        d.maturity_date.to_string(),
6698                    )
6699                })
6700                .collect();
6701
6702            let hedge_count = treasury.hedge_relationships.len();
6703            let effective_hedges = treasury
6704                .hedge_relationships
6705                .iter()
6706                .filter(|h| h.is_effective)
6707                .count();
6708            let total_notional: rust_decimal::Decimal = treasury
6709                .hedging_instruments
6710                .iter()
6711                .map(|h| h.notional_amount)
6712                .sum();
6713            let total_fair_value: rust_decimal::Decimal = treasury
6714                .hedging_instruments
6715                .iter()
6716                .map(|h| h.fair_value)
6717                .sum();
6718
6719            // Join provision_movements with provisions to get entity/type info
6720            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6721                .provisions
6722                .iter()
6723                .filter(|p| p.entity_code == company.code)
6724                .map(|p| p.id.as_str())
6725                .collect();
6726            let provision_movements: Vec<(
6727                String,
6728                rust_decimal::Decimal,
6729                rust_decimal::Decimal,
6730                rust_decimal::Decimal,
6731            )> = accounting_standards
6732                .provision_movements
6733                .iter()
6734                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6735                .map(|m| {
6736                    let prov_type = accounting_standards
6737                        .provisions
6738                        .iter()
6739                        .find(|p| p.id == m.provision_id)
6740                        .map(|p| format!("{:?}", p.provision_type))
6741                        .unwrap_or_else(|| "Unknown".to_string());
6742                    (prov_type, m.opening, m.additions, m.closing)
6743                })
6744                .collect();
6745
6746            let enhanced_ctx = EnhancedNotesContext {
6747                entity_code: company.code.clone(),
6748                period: format!("FY{}", fiscal_year),
6749                currency: company.currency.clone(),
6750                // Inventory breakdown: best-effort using zero (would need balance tracker)
6751                finished_goods_value: rust_decimal::Decimal::ZERO,
6752                wip_value: rust_decimal::Decimal::ZERO,
6753                raw_materials_value: rust_decimal::Decimal::ZERO,
6754                debt_instruments,
6755                hedge_count,
6756                effective_hedges,
6757                total_notional,
6758                total_fair_value,
6759                provision_movements,
6760            };
6761
6762            let enhanced_notes =
6763                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6764            if !enhanced_notes.is_empty() {
6765                info!(
6766                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6767                    company.code,
6768                    enhanced_notes.len(),
6769                    enhanced_ctx.debt_instruments.len(),
6770                    hedge_count,
6771                    enhanced_ctx.provision_movements.len(),
6772                );
6773                financial_reporting
6774                    .notes_to_financial_statements
6775                    .extend(enhanced_notes);
6776            }
6777        }
6778    }
6779
6780    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6781    ///
6782    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6783    /// posted in the journal entries flows through to the trial balance, using the real
6784    /// GL account numbers from the CoA.
6785    fn build_trial_balance_from_entries(
6786        journal_entries: &[JournalEntry],
6787        coa: &ChartOfAccounts,
6788        company_code: &str,
6789        fiscal_year: u16,
6790        fiscal_period: u8,
6791    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6792        use rust_decimal::Decimal;
6793
6794        // Accumulate total debits and credits per GL account
6795        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6796        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6797
6798        for je in journal_entries {
6799            // Filter to matching company, fiscal year, and period
6800            if je.header.company_code != company_code
6801                || je.header.fiscal_year != fiscal_year
6802                || je.header.fiscal_period != fiscal_period
6803            {
6804                continue;
6805            }
6806
6807            for line in &je.lines {
6808                let acct = &line.gl_account;
6809                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6810                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6811            }
6812        }
6813
6814        // Build a TrialBalanceEntry for each account that had activity
6815        let mut all_accounts: Vec<&String> = account_debits
6816            .keys()
6817            .chain(account_credits.keys())
6818            .collect::<std::collections::HashSet<_>>()
6819            .into_iter()
6820            .collect();
6821        all_accounts.sort();
6822
6823        let mut entries = Vec::new();
6824
6825        for acct_number in all_accounts {
6826            let debit = account_debits
6827                .get(acct_number)
6828                .copied()
6829                .unwrap_or(Decimal::ZERO);
6830            let credit = account_credits
6831                .get(acct_number)
6832                .copied()
6833                .unwrap_or(Decimal::ZERO);
6834
6835            if debit.is_zero() && credit.is_zero() {
6836                continue;
6837            }
6838
6839            // Look up account name from CoA, fall back to "Account {code}"
6840            let account_name = coa
6841                .get_account(acct_number)
6842                .map(|gl| gl.short_description.clone())
6843                .unwrap_or_else(|| format!("Account {acct_number}"));
6844
6845            // Map account code prefix to the category strings expected by
6846            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6847            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6848            // OperatingExpenses).
6849            let category = Self::category_from_account_code(acct_number);
6850
6851            entries.push(datasynth_generators::TrialBalanceEntry {
6852                account_code: acct_number.clone(),
6853                account_name,
6854                category,
6855                debit_balance: debit,
6856                credit_balance: credit,
6857            });
6858        }
6859
6860        entries
6861    }
6862
6863    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6864    /// (and including) the given period end date.
6865    ///
6866    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6867    /// while income statement accounts (revenue, expenses) show only the current period.
6868    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6869    fn build_cumulative_trial_balance(
6870        journal_entries: &[JournalEntry],
6871        coa: &ChartOfAccounts,
6872        company_code: &str,
6873        start_date: NaiveDate,
6874        period_end: NaiveDate,
6875        fiscal_year: u16,
6876        fiscal_period: u8,
6877    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6878        use rust_decimal::Decimal;
6879
6880        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6881        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6882        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6883
6884        // Accumulate debits/credits for income statement accounts (current period only)
6885        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6886        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6887
6888        for je in journal_entries {
6889            if je.header.company_code != company_code {
6890                continue;
6891            }
6892
6893            for line in &je.lines {
6894                let acct = &line.gl_account;
6895                let category = Self::category_from_account_code(acct);
6896                let is_bs_account = matches!(
6897                    category.as_str(),
6898                    "Cash"
6899                        | "Receivables"
6900                        | "Inventory"
6901                        | "FixedAssets"
6902                        | "Payables"
6903                        | "AccruedLiabilities"
6904                        | "LongTermDebt"
6905                        | "Equity"
6906                );
6907
6908                if is_bs_account {
6909                    // Balance sheet: accumulate from start through period_end
6910                    if je.header.document_date <= period_end
6911                        && je.header.document_date >= start_date
6912                    {
6913                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6914                            line.debit_amount;
6915                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6916                            line.credit_amount;
6917                    }
6918                } else {
6919                    // Income statement: current period only
6920                    if je.header.fiscal_year == fiscal_year
6921                        && je.header.fiscal_period == fiscal_period
6922                    {
6923                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6924                            line.debit_amount;
6925                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6926                            line.credit_amount;
6927                    }
6928                }
6929            }
6930        }
6931
6932        // Merge all accounts
6933        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6934        all_accounts.extend(bs_debits.keys().cloned());
6935        all_accounts.extend(bs_credits.keys().cloned());
6936        all_accounts.extend(is_debits.keys().cloned());
6937        all_accounts.extend(is_credits.keys().cloned());
6938
6939        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6940        sorted_accounts.sort();
6941
6942        let mut entries = Vec::new();
6943
6944        for acct_number in &sorted_accounts {
6945            let category = Self::category_from_account_code(acct_number);
6946            let is_bs_account = matches!(
6947                category.as_str(),
6948                "Cash"
6949                    | "Receivables"
6950                    | "Inventory"
6951                    | "FixedAssets"
6952                    | "Payables"
6953                    | "AccruedLiabilities"
6954                    | "LongTermDebt"
6955                    | "Equity"
6956            );
6957
6958            let (debit, credit) = if is_bs_account {
6959                (
6960                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6961                    bs_credits
6962                        .get(acct_number)
6963                        .copied()
6964                        .unwrap_or(Decimal::ZERO),
6965                )
6966            } else {
6967                (
6968                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6969                    is_credits
6970                        .get(acct_number)
6971                        .copied()
6972                        .unwrap_or(Decimal::ZERO),
6973                )
6974            };
6975
6976            if debit.is_zero() && credit.is_zero() {
6977                continue;
6978            }
6979
6980            let account_name = coa
6981                .get_account(acct_number)
6982                .map(|gl| gl.short_description.clone())
6983                .unwrap_or_else(|| format!("Account {acct_number}"));
6984
6985            entries.push(datasynth_generators::TrialBalanceEntry {
6986                account_code: acct_number.clone(),
6987                account_name,
6988                category,
6989                debit_balance: debit,
6990                credit_balance: credit,
6991            });
6992        }
6993
6994        entries
6995    }
6996
6997    /// Build a JE-derived cash flow statement using the indirect method.
6998    ///
6999    /// Compares current and prior cumulative trial balances to derive working capital
7000    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7001    fn build_cash_flow_from_trial_balances(
7002        current_tb: &[datasynth_generators::TrialBalanceEntry],
7003        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7004        net_income: rust_decimal::Decimal,
7005    ) -> Vec<CashFlowItem> {
7006        use rust_decimal::Decimal;
7007
7008        // Helper: aggregate a TB by category and return net (debit - credit)
7009        let aggregate =
7010            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7011                let mut map: HashMap<String, Decimal> = HashMap::new();
7012                for entry in tb {
7013                    let net = entry.debit_balance - entry.credit_balance;
7014                    *map.entry(entry.category.clone()).or_default() += net;
7015                }
7016                map
7017            };
7018
7019        let current = aggregate(current_tb);
7020        let prior = prior_tb.map(aggregate);
7021
7022        // Get balance for a category, defaulting to zero
7023        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7024            *map.get(key).unwrap_or(&Decimal::ZERO)
7025        };
7026
7027        // Compute change: current - prior (or current if no prior)
7028        let change = |key: &str| -> Decimal {
7029            let curr = get(&current, key);
7030            match &prior {
7031                Some(p) => curr - get(p, key),
7032                None => curr,
7033            }
7034        };
7035
7036        // Operating activities (indirect method)
7037        // Depreciation add-back: approximate from FixedAssets decrease
7038        let fixed_asset_change = change("FixedAssets");
7039        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7040            -fixed_asset_change
7041        } else {
7042            Decimal::ZERO
7043        };
7044
7045        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7046        let ar_change = change("Receivables");
7047        let inventory_change = change("Inventory");
7048        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7049        let ap_change = change("Payables");
7050        let accrued_change = change("AccruedLiabilities");
7051
7052        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7053            + (-ap_change)
7054            + (-accrued_change);
7055
7056        // Investing activities
7057        let capex = if fixed_asset_change > Decimal::ZERO {
7058            -fixed_asset_change
7059        } else {
7060            Decimal::ZERO
7061        };
7062        let investing_cf = capex;
7063
7064        // Financing activities
7065        let debt_change = -change("LongTermDebt");
7066        let equity_change = -change("Equity");
7067        let financing_cf = debt_change + equity_change;
7068
7069        let net_change = operating_cf + investing_cf + financing_cf;
7070
7071        vec![
7072            CashFlowItem {
7073                item_code: "CF-NI".to_string(),
7074                label: "Net Income".to_string(),
7075                category: CashFlowCategory::Operating,
7076                amount: net_income,
7077                amount_prior: None,
7078                sort_order: 1,
7079                is_total: false,
7080            },
7081            CashFlowItem {
7082                item_code: "CF-DEP".to_string(),
7083                label: "Depreciation & Amortization".to_string(),
7084                category: CashFlowCategory::Operating,
7085                amount: depreciation_addback,
7086                amount_prior: None,
7087                sort_order: 2,
7088                is_total: false,
7089            },
7090            CashFlowItem {
7091                item_code: "CF-AR".to_string(),
7092                label: "Change in Accounts Receivable".to_string(),
7093                category: CashFlowCategory::Operating,
7094                amount: -ar_change,
7095                amount_prior: None,
7096                sort_order: 3,
7097                is_total: false,
7098            },
7099            CashFlowItem {
7100                item_code: "CF-AP".to_string(),
7101                label: "Change in Accounts Payable".to_string(),
7102                category: CashFlowCategory::Operating,
7103                amount: -ap_change,
7104                amount_prior: None,
7105                sort_order: 4,
7106                is_total: false,
7107            },
7108            CashFlowItem {
7109                item_code: "CF-INV".to_string(),
7110                label: "Change in Inventory".to_string(),
7111                category: CashFlowCategory::Operating,
7112                amount: -inventory_change,
7113                amount_prior: None,
7114                sort_order: 5,
7115                is_total: false,
7116            },
7117            CashFlowItem {
7118                item_code: "CF-OP".to_string(),
7119                label: "Net Cash from Operating Activities".to_string(),
7120                category: CashFlowCategory::Operating,
7121                amount: operating_cf,
7122                amount_prior: None,
7123                sort_order: 6,
7124                is_total: true,
7125            },
7126            CashFlowItem {
7127                item_code: "CF-CAPEX".to_string(),
7128                label: "Capital Expenditures".to_string(),
7129                category: CashFlowCategory::Investing,
7130                amount: capex,
7131                amount_prior: None,
7132                sort_order: 7,
7133                is_total: false,
7134            },
7135            CashFlowItem {
7136                item_code: "CF-INV-T".to_string(),
7137                label: "Net Cash from Investing Activities".to_string(),
7138                category: CashFlowCategory::Investing,
7139                amount: investing_cf,
7140                amount_prior: None,
7141                sort_order: 8,
7142                is_total: true,
7143            },
7144            CashFlowItem {
7145                item_code: "CF-DEBT".to_string(),
7146                label: "Net Borrowings / (Repayments)".to_string(),
7147                category: CashFlowCategory::Financing,
7148                amount: debt_change,
7149                amount_prior: None,
7150                sort_order: 9,
7151                is_total: false,
7152            },
7153            CashFlowItem {
7154                item_code: "CF-EQ".to_string(),
7155                label: "Equity Changes".to_string(),
7156                category: CashFlowCategory::Financing,
7157                amount: equity_change,
7158                amount_prior: None,
7159                sort_order: 10,
7160                is_total: false,
7161            },
7162            CashFlowItem {
7163                item_code: "CF-FIN-T".to_string(),
7164                label: "Net Cash from Financing Activities".to_string(),
7165                category: CashFlowCategory::Financing,
7166                amount: financing_cf,
7167                amount_prior: None,
7168                sort_order: 11,
7169                is_total: true,
7170            },
7171            CashFlowItem {
7172                item_code: "CF-NET".to_string(),
7173                label: "Net Change in Cash".to_string(),
7174                category: CashFlowCategory::Operating,
7175                amount: net_change,
7176                amount_prior: None,
7177                sort_order: 12,
7178                is_total: true,
7179            },
7180        ]
7181    }
7182
7183    /// Calculate net income from a set of trial balance entries.
7184    ///
7185    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7186    fn calculate_net_income_from_tb(
7187        tb: &[datasynth_generators::TrialBalanceEntry],
7188    ) -> rust_decimal::Decimal {
7189        use rust_decimal::Decimal;
7190
7191        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7192        for entry in tb {
7193            let net = entry.debit_balance - entry.credit_balance;
7194            *aggregated.entry(entry.category.clone()).or_default() += net;
7195        }
7196
7197        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7198        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7199        let opex = *aggregated
7200            .get("OperatingExpenses")
7201            .unwrap_or(&Decimal::ZERO);
7202        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7203        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7204
7205        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7206        // other_income is typically negative (credit), other_expenses is typically positive
7207        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7208        let tax_rate = Decimal::new(25, 2); // 0.25
7209        let tax = operating_income * tax_rate;
7210        operating_income - tax
7211    }
7212
7213    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7214    ///
7215    /// Uses the first two digits of the account code to classify into the categories
7216    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7217    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7218    /// OperatingExpenses, OtherIncome, OtherExpenses.
7219    fn category_from_account_code(code: &str) -> String {
7220        let prefix: String = code.chars().take(2).collect();
7221        match prefix.as_str() {
7222            "10" => "Cash",
7223            "11" => "Receivables",
7224            "12" | "13" | "14" => "Inventory",
7225            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7226            "20" => "Payables",
7227            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7228            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7229            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7230            "40" | "41" | "42" | "43" | "44" => "Revenue",
7231            "50" | "51" | "52" => "CostOfSales",
7232            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7233                "OperatingExpenses"
7234            }
7235            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7236            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7237            _ => "OperatingExpenses",
7238        }
7239        .to_string()
7240    }
7241
7242    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7243    fn phase_hr_data(
7244        &mut self,
7245        stats: &mut EnhancedGenerationStatistics,
7246    ) -> SynthResult<HrSnapshot> {
7247        if !self.phase_config.generate_hr {
7248            debug!("Phase 16: Skipped (HR generation disabled)");
7249            return Ok(HrSnapshot::default());
7250        }
7251
7252        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7253
7254        let seed = self.seed;
7255        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7256            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7257        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7258        let company_code = self
7259            .config
7260            .companies
7261            .first()
7262            .map(|c| c.code.as_str())
7263            .unwrap_or("1000");
7264        let currency = self
7265            .config
7266            .companies
7267            .first()
7268            .map(|c| c.currency.as_str())
7269            .unwrap_or("USD");
7270
7271        let employee_ids: Vec<String> = self
7272            .master_data
7273            .employees
7274            .iter()
7275            .map(|e| e.employee_id.clone())
7276            .collect();
7277
7278        if employee_ids.is_empty() {
7279            debug!("Phase 16: Skipped (no employees available)");
7280            return Ok(HrSnapshot::default());
7281        }
7282
7283        // Extract cost-center pool from master data employees for cross-reference
7284        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7285        let cost_center_ids: Vec<String> = self
7286            .master_data
7287            .employees
7288            .iter()
7289            .filter_map(|e| e.cost_center.clone())
7290            .collect::<std::collections::HashSet<_>>()
7291            .into_iter()
7292            .collect();
7293
7294        let mut snapshot = HrSnapshot::default();
7295
7296        // Generate payroll runs (one per month)
7297        if self.config.hr.payroll.enabled {
7298            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7299                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7300
7301            // Look up country pack for payroll deductions and labels
7302            let payroll_pack = self.primary_pack();
7303
7304            // Store the pack on the generator so generate() resolves
7305            // localized deduction rates and labels from it.
7306            payroll_gen.set_country_pack(payroll_pack.clone());
7307
7308            let employees_with_salary: Vec<(
7309                String,
7310                rust_decimal::Decimal,
7311                Option<String>,
7312                Option<String>,
7313            )> = self
7314                .master_data
7315                .employees
7316                .iter()
7317                .map(|e| {
7318                    // Use the employee's actual annual base salary.
7319                    // Fall back to $60,000 / yr if somehow zero.
7320                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7321                        e.base_salary
7322                    } else {
7323                        rust_decimal::Decimal::from(60_000)
7324                    };
7325                    (
7326                        e.employee_id.clone(),
7327                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7328                        e.cost_center.clone(),
7329                        e.department_id.clone(),
7330                    )
7331                })
7332                .collect();
7333
7334            // Use generate_with_changes when employee change history is available
7335            // so that salary adjustments, transfers, etc. are reflected in payroll.
7336            let change_history = &self.master_data.employee_change_history;
7337            let has_changes = !change_history.is_empty();
7338            if has_changes {
7339                debug!(
7340                    "Payroll will incorporate {} employee change events",
7341                    change_history.len()
7342                );
7343            }
7344
7345            for month in 0..self.config.global.period_months {
7346                let period_start = start_date + chrono::Months::new(month);
7347                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7348                let (run, items) = if has_changes {
7349                    payroll_gen.generate_with_changes(
7350                        company_code,
7351                        &employees_with_salary,
7352                        period_start,
7353                        period_end,
7354                        currency,
7355                        change_history,
7356                    )
7357                } else {
7358                    payroll_gen.generate(
7359                        company_code,
7360                        &employees_with_salary,
7361                        period_start,
7362                        period_end,
7363                        currency,
7364                    )
7365                };
7366                snapshot.payroll_runs.push(run);
7367                snapshot.payroll_run_count += 1;
7368                snapshot.payroll_line_item_count += items.len();
7369                snapshot.payroll_line_items.extend(items);
7370            }
7371        }
7372
7373        // Generate time entries
7374        if self.config.hr.time_attendance.enabled {
7375            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7376                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7377            // v3.4.2: when a temporal context is configured, time entries
7378            // respect holidays (not just weekends) and submitted_at lag
7379            // snaps to business days.
7380            if let Some(ctx) = &self.temporal_context {
7381                time_gen.set_temporal_context(Arc::clone(ctx));
7382            }
7383            let entries = time_gen.generate(
7384                &employee_ids,
7385                start_date,
7386                end_date,
7387                &self.config.hr.time_attendance,
7388            );
7389            snapshot.time_entry_count = entries.len();
7390            snapshot.time_entries = entries;
7391        }
7392
7393        // Generate expense reports
7394        if self.config.hr.expenses.enabled {
7395            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7396                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7397            expense_gen.set_country_pack(self.primary_pack().clone());
7398            // v3.4.2: snap submission / approval / paid / line-item dates
7399            // to business days when temporal_context is present.
7400            if let Some(ctx) = &self.temporal_context {
7401                expense_gen.set_temporal_context(Arc::clone(ctx));
7402            }
7403            let company_currency = self
7404                .config
7405                .companies
7406                .first()
7407                .map(|c| c.currency.as_str())
7408                .unwrap_or("USD");
7409            let reports = expense_gen.generate_with_currency(
7410                &employee_ids,
7411                start_date,
7412                end_date,
7413                &self.config.hr.expenses,
7414                company_currency,
7415            );
7416            snapshot.expense_report_count = reports.len();
7417            snapshot.expense_reports = reports;
7418        }
7419
7420        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7421        if self.config.hr.payroll.enabled {
7422            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7423            let employee_pairs: Vec<(String, String)> = self
7424                .master_data
7425                .employees
7426                .iter()
7427                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7428                .collect();
7429            let enrollments =
7430                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7431            snapshot.benefit_enrollment_count = enrollments.len();
7432            snapshot.benefit_enrollments = enrollments;
7433        }
7434
7435        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7436        if self.phase_config.generate_hr {
7437            let entity_name = self
7438                .config
7439                .companies
7440                .first()
7441                .map(|c| c.name.as_str())
7442                .unwrap_or("Entity");
7443            let period_months = self.config.global.period_months;
7444            let period_label = {
7445                let y = start_date.year();
7446                let m = start_date.month();
7447                if period_months >= 12 {
7448                    format!("FY{y}")
7449                } else {
7450                    format!("{y}-{m:02}")
7451                }
7452            };
7453            let reporting_date =
7454                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7455
7456            // Compute average annual salary from actual payroll data when available.
7457            // PayrollRun.total_gross covers all employees for one pay period; we sum
7458            // across all runs and divide by employee_count to get per-employee total,
7459            // then annualise for sub-annual periods.
7460            let avg_salary: Option<rust_decimal::Decimal> = {
7461                let employee_count = employee_ids.len();
7462                if self.config.hr.payroll.enabled
7463                    && employee_count > 0
7464                    && !snapshot.payroll_runs.is_empty()
7465                {
7466                    // Sum total gross pay across all payroll runs for this company
7467                    let total_gross: rust_decimal::Decimal = snapshot
7468                        .payroll_runs
7469                        .iter()
7470                        .filter(|r| r.company_code == company_code)
7471                        .map(|r| r.total_gross)
7472                        .sum();
7473                    if total_gross > rust_decimal::Decimal::ZERO {
7474                        // Annualise: total_gross covers `period_months` months of pay
7475                        let annual_total = if period_months > 0 && period_months < 12 {
7476                            total_gross * rust_decimal::Decimal::from(12u32)
7477                                / rust_decimal::Decimal::from(period_months)
7478                        } else {
7479                            total_gross
7480                        };
7481                        Some(
7482                            (annual_total / rust_decimal::Decimal::from(employee_count))
7483                                .round_dp(2),
7484                        )
7485                    } else {
7486                        None
7487                    }
7488                } else {
7489                    None
7490                }
7491            };
7492
7493            let mut pension_gen =
7494                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7495            let pension_snap = pension_gen.generate(
7496                company_code,
7497                entity_name,
7498                &period_label,
7499                reporting_date,
7500                employee_ids.len(),
7501                currency,
7502                avg_salary,
7503                period_months,
7504            );
7505            snapshot.pension_plan_count = pension_snap.plans.len();
7506            snapshot.pension_plans = pension_snap.plans;
7507            snapshot.pension_obligations = pension_snap.obligations;
7508            snapshot.pension_plan_assets = pension_snap.plan_assets;
7509            snapshot.pension_disclosures = pension_snap.disclosures;
7510            // Pension JEs are returned here so they can be added to entries
7511            // in the caller (stored temporarily on snapshot for transfer).
7512            // We embed them in the hr snapshot for simplicity; the orchestrator
7513            // will extract and extend `entries`.
7514            snapshot.pension_journal_entries = pension_snap.journal_entries;
7515        }
7516
7517        // Generate stock-based compensation (ASC 718 / IFRS 2)
7518        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7519            let period_months = self.config.global.period_months;
7520            let period_label = {
7521                let y = start_date.year();
7522                let m = start_date.month();
7523                if period_months >= 12 {
7524                    format!("FY{y}")
7525                } else {
7526                    format!("{y}-{m:02}")
7527                }
7528            };
7529            let reporting_date =
7530                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7531
7532            let mut stock_comp_gen =
7533                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7534            let stock_snap = stock_comp_gen.generate(
7535                company_code,
7536                &employee_ids,
7537                start_date,
7538                &period_label,
7539                reporting_date,
7540                currency,
7541            );
7542            snapshot.stock_grant_count = stock_snap.grants.len();
7543            snapshot.stock_grants = stock_snap.grants;
7544            snapshot.stock_comp_expenses = stock_snap.expenses;
7545            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7546        }
7547
7548        stats.payroll_run_count = snapshot.payroll_run_count;
7549        stats.time_entry_count = snapshot.time_entry_count;
7550        stats.expense_report_count = snapshot.expense_report_count;
7551        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7552        stats.pension_plan_count = snapshot.pension_plan_count;
7553        stats.stock_grant_count = snapshot.stock_grant_count;
7554
7555        info!(
7556            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7557            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7558            snapshot.time_entry_count, snapshot.expense_report_count,
7559            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7560            snapshot.stock_grant_count
7561        );
7562        self.check_resources_with_log("post-hr")?;
7563
7564        Ok(snapshot)
7565    }
7566
7567    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7568    fn phase_accounting_standards(
7569        &mut self,
7570        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7571        journal_entries: &[JournalEntry],
7572        stats: &mut EnhancedGenerationStatistics,
7573    ) -> SynthResult<AccountingStandardsSnapshot> {
7574        if !self.phase_config.generate_accounting_standards {
7575            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7576            return Ok(AccountingStandardsSnapshot::default());
7577        }
7578        info!("Phase 17: Generating Accounting Standards Data");
7579
7580        let seed = self.seed;
7581        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7582            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7583        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7584        let company_code = self
7585            .config
7586            .companies
7587            .first()
7588            .map(|c| c.code.as_str())
7589            .unwrap_or("1000");
7590        let currency = self
7591            .config
7592            .companies
7593            .first()
7594            .map(|c| c.currency.as_str())
7595            .unwrap_or("USD");
7596
7597        // Convert config framework to standards framework.
7598        // If the user explicitly set a framework in the YAML config, use that.
7599        // Otherwise, fall back to the country pack's accounting.framework field,
7600        // and if that is also absent or unrecognised, default to US GAAP.
7601        let framework = match self.config.accounting_standards.framework {
7602            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7603                datasynth_standards::framework::AccountingFramework::UsGaap
7604            }
7605            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7606                datasynth_standards::framework::AccountingFramework::Ifrs
7607            }
7608            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7609                datasynth_standards::framework::AccountingFramework::DualReporting
7610            }
7611            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7612                datasynth_standards::framework::AccountingFramework::FrenchGaap
7613            }
7614            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7615                datasynth_standards::framework::AccountingFramework::GermanGaap
7616            }
7617            None => {
7618                // Derive framework from the primary company's country pack
7619                let pack = self.primary_pack();
7620                let pack_fw = pack.accounting.framework.as_str();
7621                match pack_fw {
7622                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7623                    "dual_reporting" => {
7624                        datasynth_standards::framework::AccountingFramework::DualReporting
7625                    }
7626                    "french_gaap" => {
7627                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7628                    }
7629                    "german_gaap" | "hgb" => {
7630                        datasynth_standards::framework::AccountingFramework::GermanGaap
7631                    }
7632                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7633                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7634                }
7635            }
7636        };
7637
7638        let mut snapshot = AccountingStandardsSnapshot::default();
7639
7640        // Revenue recognition
7641        if self.config.accounting_standards.revenue_recognition.enabled {
7642            let customer_ids: Vec<String> = self
7643                .master_data
7644                .customers
7645                .iter()
7646                .map(|c| c.customer_id.clone())
7647                .collect();
7648
7649            if !customer_ids.is_empty() {
7650                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7651                let contracts = rev_gen.generate(
7652                    company_code,
7653                    &customer_ids,
7654                    start_date,
7655                    end_date,
7656                    currency,
7657                    &self.config.accounting_standards.revenue_recognition,
7658                    framework,
7659                );
7660                snapshot.revenue_contract_count = contracts.len();
7661                snapshot.contracts = contracts;
7662            }
7663        }
7664
7665        // Impairment testing
7666        if self.config.accounting_standards.impairment.enabled {
7667            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7668                .master_data
7669                .assets
7670                .iter()
7671                .map(|a| {
7672                    (
7673                        a.asset_id.clone(),
7674                        a.description.clone(),
7675                        a.acquisition_cost,
7676                    )
7677                })
7678                .collect();
7679
7680            if !asset_data.is_empty() {
7681                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7682                let tests = imp_gen.generate(
7683                    company_code,
7684                    &asset_data,
7685                    end_date,
7686                    &self.config.accounting_standards.impairment,
7687                    framework,
7688                );
7689                snapshot.impairment_test_count = tests.len();
7690                snapshot.impairment_tests = tests;
7691            }
7692        }
7693
7694        // Business combinations (IFRS 3 / ASC 805)
7695        if self
7696            .config
7697            .accounting_standards
7698            .business_combinations
7699            .enabled
7700        {
7701            let bc_config = &self.config.accounting_standards.business_combinations;
7702            let framework_str = match framework {
7703                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7704                _ => "US_GAAP",
7705            };
7706            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7707            let bc_snap = bc_gen.generate(
7708                company_code,
7709                currency,
7710                start_date,
7711                end_date,
7712                bc_config.acquisition_count,
7713                framework_str,
7714            );
7715            snapshot.business_combination_count = bc_snap.combinations.len();
7716            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7717            snapshot.business_combinations = bc_snap.combinations;
7718        }
7719
7720        // Expected Credit Loss (IFRS 9 / ASC 326)
7721        if self
7722            .config
7723            .accounting_standards
7724            .expected_credit_loss
7725            .enabled
7726        {
7727            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7728            let framework_str = match framework {
7729                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7730                _ => "ASC_326",
7731            };
7732
7733            // Use AR aging data from the subledger snapshot if available;
7734            // otherwise generate synthetic bucket exposures.
7735            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7736
7737            let mut ecl_gen = EclGenerator::new(seed + 43);
7738
7739            // Collect combined bucket totals across all company AR aging reports.
7740            let bucket_exposures: Vec<(
7741                datasynth_core::models::subledger::ar::AgingBucket,
7742                rust_decimal::Decimal,
7743            )> = if ar_aging_reports.is_empty() {
7744                // No AR aging data — synthesise plausible bucket exposures.
7745                use datasynth_core::models::subledger::ar::AgingBucket;
7746                vec![
7747                    (
7748                        AgingBucket::Current,
7749                        rust_decimal::Decimal::from(500_000_u32),
7750                    ),
7751                    (
7752                        AgingBucket::Days1To30,
7753                        rust_decimal::Decimal::from(120_000_u32),
7754                    ),
7755                    (
7756                        AgingBucket::Days31To60,
7757                        rust_decimal::Decimal::from(45_000_u32),
7758                    ),
7759                    (
7760                        AgingBucket::Days61To90,
7761                        rust_decimal::Decimal::from(15_000_u32),
7762                    ),
7763                    (
7764                        AgingBucket::Over90Days,
7765                        rust_decimal::Decimal::from(8_000_u32),
7766                    ),
7767                ]
7768            } else {
7769                use datasynth_core::models::subledger::ar::AgingBucket;
7770                // Sum bucket totals from all reports.
7771                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7772                    std::collections::HashMap::new();
7773                for report in ar_aging_reports {
7774                    for (bucket, amount) in &report.bucket_totals {
7775                        *totals.entry(*bucket).or_default() += amount;
7776                    }
7777                }
7778                AgingBucket::all()
7779                    .into_iter()
7780                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7781                    .collect()
7782            };
7783
7784            let ecl_snap = ecl_gen.generate(
7785                company_code,
7786                end_date,
7787                &bucket_exposures,
7788                ecl_config,
7789                &period_label,
7790                framework_str,
7791            );
7792
7793            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7794            snapshot.ecl_models = ecl_snap.ecl_models;
7795            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7796            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7797        }
7798
7799        // Provisions and contingencies (IAS 37 / ASC 450)
7800        {
7801            let framework_str = match framework {
7802                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7803                _ => "US_GAAP",
7804            };
7805
7806            // Compute actual revenue from the journal entries generated so far.
7807            // The `journal_entries` slice passed to this phase contains all GL entries
7808            // up to and including Period Close. Fall back to a minimum of 100_000 to
7809            // avoid degenerate zero-based provision amounts on first-period datasets.
7810            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7811                .max(rust_decimal::Decimal::from(100_000_u32));
7812
7813            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7814
7815            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7816            let prov_snap = prov_gen.generate(
7817                company_code,
7818                currency,
7819                revenue_proxy,
7820                end_date,
7821                &period_label,
7822                framework_str,
7823                None, // prior_opening: no carry-forward data in single-period runs
7824            );
7825
7826            snapshot.provision_count = prov_snap.provisions.len();
7827            snapshot.provisions = prov_snap.provisions;
7828            snapshot.provision_movements = prov_snap.movements;
7829            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7830            snapshot.provision_journal_entries = prov_snap.journal_entries;
7831        }
7832
7833        // IAS 21 Functional Currency Translation
7834        // For each company whose functional currency differs from the presentation
7835        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7836        {
7837            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7838
7839            let presentation_currency = self
7840                .config
7841                .global
7842                .presentation_currency
7843                .clone()
7844                .unwrap_or_else(|| self.config.global.group_currency.clone());
7845
7846            // Build a minimal rate table populated with approximate rates from
7847            // the FX model base rates (USD-based) so we can do the translation.
7848            let mut rate_table = FxRateTable::new(&presentation_currency);
7849
7850            // Populate with base rates against USD; if presentation_currency is
7851            // not USD we do a best-effort two-step conversion using the table's
7852            // triangulation support.
7853            let base_rates = base_rates_usd();
7854            for (ccy, rate) in &base_rates {
7855                rate_table.add_rate(FxRate::new(
7856                    ccy,
7857                    "USD",
7858                    RateType::Closing,
7859                    end_date,
7860                    *rate,
7861                    "SYNTHETIC",
7862                ));
7863                // Average rate = 98% of closing (approximation).
7864                // 0.98 = 98/100 = Decimal::new(98, 2)
7865                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7866                rate_table.add_rate(FxRate::new(
7867                    ccy,
7868                    "USD",
7869                    RateType::Average,
7870                    end_date,
7871                    avg,
7872                    "SYNTHETIC",
7873                ));
7874            }
7875
7876            let mut translation_results = Vec::new();
7877            for company in &self.config.companies {
7878                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7879                // to ensure the translation produces non-trivial CTA amounts.
7880                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7881                    .max(rust_decimal::Decimal::from(100_000_u32));
7882
7883                let func_ccy = company
7884                    .functional_currency
7885                    .clone()
7886                    .unwrap_or_else(|| company.currency.clone());
7887
7888                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7889                    &company.code,
7890                    &func_ccy,
7891                    &presentation_currency,
7892                    &ias21_period_label,
7893                    end_date,
7894                    company_revenue,
7895                    &rate_table,
7896                );
7897                translation_results.push(result);
7898            }
7899
7900            snapshot.currency_translation_count = translation_results.len();
7901            snapshot.currency_translation_results = translation_results;
7902        }
7903
7904        stats.revenue_contract_count = snapshot.revenue_contract_count;
7905        stats.impairment_test_count = snapshot.impairment_test_count;
7906        stats.business_combination_count = snapshot.business_combination_count;
7907        stats.ecl_model_count = snapshot.ecl_model_count;
7908        stats.provision_count = snapshot.provision_count;
7909
7910        // ------------------------------------------------------------
7911        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7912        // ------------------------------------------------------------
7913        if self.config.accounting_standards.leases.enabled {
7914            use datasynth_generators::standards::LeaseGenerator;
7915            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7916                .unwrap_or_else(|_| {
7917                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7918                });
7919            let framework =
7920                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7921            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7922            for company in &self.config.companies {
7923                let leases = lease_gen.generate(
7924                    &company.code,
7925                    start_date,
7926                    &self.config.accounting_standards.leases,
7927                    framework,
7928                );
7929                snapshot.lease_count += leases.len();
7930                snapshot.leases.extend(leases);
7931            }
7932            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7933        }
7934
7935        // ------------------------------------------------------------
7936        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7937        // ------------------------------------------------------------
7938        if self.config.accounting_standards.fair_value.enabled {
7939            use datasynth_generators::standards::FairValueGenerator;
7940            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7941                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7942                + chrono::Months::new(self.config.global.period_months);
7943            let framework =
7944                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7945            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7946            for company in &self.config.companies {
7947                let measurements = fv_gen.generate(
7948                    &company.code,
7949                    end_date,
7950                    &company.currency,
7951                    &self.config.accounting_standards.fair_value,
7952                    framework,
7953                );
7954                snapshot.fair_value_measurement_count += measurements.len();
7955                snapshot.fair_value_measurements.extend(measurements);
7956            }
7957            info!(
7958                "v3.3.1 fair value measurements: {}",
7959                snapshot.fair_value_measurement_count
7960            );
7961        }
7962
7963        // ------------------------------------------------------------
7964        // v3.3.1: Framework reconciliation (dual reporting only)
7965        // ------------------------------------------------------------
7966        if self.config.accounting_standards.generate_differences
7967            && matches!(
7968                self.config.accounting_standards.framework,
7969                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7970            )
7971        {
7972            use datasynth_generators::standards::FrameworkReconciliationGenerator;
7973            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7974                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7975                + chrono::Months::new(self.config.global.period_months);
7976            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7977            for company in &self.config.companies {
7978                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7979                snapshot.framework_difference_count += records.len();
7980                snapshot.framework_differences.extend(records);
7981                snapshot.framework_reconciliations.push(reconciliation);
7982            }
7983            info!(
7984                "v3.3.1 framework reconciliation: {} differences across {} entities",
7985                snapshot.framework_difference_count,
7986                snapshot.framework_reconciliations.len()
7987            );
7988        }
7989
7990        info!(
7991            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7992            snapshot.revenue_contract_count,
7993            snapshot.impairment_test_count,
7994            snapshot.business_combination_count,
7995            snapshot.ecl_model_count,
7996            snapshot.provision_count,
7997            snapshot.currency_translation_count,
7998            snapshot.lease_count,
7999            snapshot.fair_value_measurement_count,
8000            snapshot.framework_difference_count,
8001        );
8002        self.check_resources_with_log("post-accounting-standards")?;
8003
8004        Ok(snapshot)
8005    }
8006
8007    /// v3.3.1: helper to resolve the accounting-standards framework enum
8008    /// from config into the `datasynth_standards::framework::AccountingFramework`
8009    /// type expected by standards generators. Falls back to US GAAP.
8010    fn resolve_accounting_framework(
8011        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8012    ) -> datasynth_standards::framework::AccountingFramework {
8013        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8014        use datasynth_standards::framework::AccountingFramework as Fw;
8015        match cfg {
8016            Some(Cfg::Ifrs) => Fw::Ifrs,
8017            Some(Cfg::DualReporting) => Fw::DualReporting,
8018            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8019            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8020            _ => Fw::UsGaap,
8021        }
8022    }
8023
8024    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8025    fn phase_manufacturing(
8026        &mut self,
8027        stats: &mut EnhancedGenerationStatistics,
8028    ) -> SynthResult<ManufacturingSnapshot> {
8029        if !self.phase_config.generate_manufacturing {
8030            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8031            return Ok(ManufacturingSnapshot::default());
8032        }
8033        info!("Phase 18: Generating Manufacturing Data");
8034
8035        let seed = self.seed;
8036        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8037            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8038        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8039        let company_code = self
8040            .config
8041            .companies
8042            .first()
8043            .map(|c| c.code.as_str())
8044            .unwrap_or("1000");
8045
8046        let material_data: Vec<(String, String)> = self
8047            .master_data
8048            .materials
8049            .iter()
8050            .map(|m| (m.material_id.clone(), m.description.clone()))
8051            .collect();
8052
8053        if material_data.is_empty() {
8054            debug!("Phase 18: Skipped (no materials available)");
8055            return Ok(ManufacturingSnapshot::default());
8056        }
8057
8058        let mut snapshot = ManufacturingSnapshot::default();
8059
8060        // Generate production orders
8061        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8062        // v3.4.3: snap planned / actual / operation dates to business days.
8063        if let Some(ctx) = &self.temporal_context {
8064            prod_gen.set_temporal_context(Arc::clone(ctx));
8065        }
8066        let production_orders = prod_gen.generate(
8067            company_code,
8068            &material_data,
8069            start_date,
8070            end_date,
8071            &self.config.manufacturing.production_orders,
8072            &self.config.manufacturing.costing,
8073            &self.config.manufacturing.routing,
8074        );
8075        snapshot.production_order_count = production_orders.len();
8076
8077        // Generate quality inspections from production orders
8078        let inspection_data: Vec<(String, String, String)> = production_orders
8079            .iter()
8080            .map(|po| {
8081                (
8082                    po.order_id.clone(),
8083                    po.material_id.clone(),
8084                    po.material_description.clone(),
8085                )
8086            })
8087            .collect();
8088
8089        snapshot.production_orders = production_orders;
8090
8091        if !inspection_data.is_empty() {
8092            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8093            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8094            snapshot.quality_inspection_count = inspections.len();
8095            snapshot.quality_inspections = inspections;
8096        }
8097
8098        // Generate cycle counts (one per month)
8099        let storage_locations: Vec<(String, String)> = material_data
8100            .iter()
8101            .enumerate()
8102            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8103            .collect();
8104
8105        let employee_ids: Vec<String> = self
8106            .master_data
8107            .employees
8108            .iter()
8109            .map(|e| e.employee_id.clone())
8110            .collect();
8111        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8112            .with_employee_pool(employee_ids);
8113        let mut cycle_count_total = 0usize;
8114        for month in 0..self.config.global.period_months {
8115            let count_date = start_date + chrono::Months::new(month);
8116            let items_per_count = storage_locations.len().clamp(10, 50);
8117            let cc = cc_gen.generate(
8118                company_code,
8119                &storage_locations,
8120                count_date,
8121                items_per_count,
8122            );
8123            snapshot.cycle_counts.push(cc);
8124            cycle_count_total += 1;
8125        }
8126        snapshot.cycle_count_count = cycle_count_total;
8127
8128        // Generate BOM components
8129        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8130        let bom_components = bom_gen.generate(company_code, &material_data);
8131        snapshot.bom_component_count = bom_components.len();
8132        snapshot.bom_components = bom_components;
8133
8134        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8135        let currency = self
8136            .config
8137            .companies
8138            .first()
8139            .map(|c| c.currency.as_str())
8140            .unwrap_or("USD");
8141        let production_order_ids: Vec<String> = snapshot
8142            .production_orders
8143            .iter()
8144            .map(|po| po.order_id.clone())
8145            .collect();
8146        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8147        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8148            company_code,
8149            &material_data,
8150            start_date,
8151            end_date,
8152            2,
8153            currency,
8154            &production_order_ids,
8155        );
8156        snapshot.inventory_movement_count = inventory_movements.len();
8157        snapshot.inventory_movements = inventory_movements;
8158
8159        stats.production_order_count = snapshot.production_order_count;
8160        stats.quality_inspection_count = snapshot.quality_inspection_count;
8161        stats.cycle_count_count = snapshot.cycle_count_count;
8162        stats.bom_component_count = snapshot.bom_component_count;
8163        stats.inventory_movement_count = snapshot.inventory_movement_count;
8164
8165        info!(
8166            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8167            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8168            snapshot.bom_component_count, snapshot.inventory_movement_count
8169        );
8170        self.check_resources_with_log("post-manufacturing")?;
8171
8172        Ok(snapshot)
8173    }
8174
8175    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8176    fn phase_sales_kpi_budgets(
8177        &mut self,
8178        coa: &Arc<ChartOfAccounts>,
8179        financial_reporting: &FinancialReportingSnapshot,
8180        stats: &mut EnhancedGenerationStatistics,
8181    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8182        if !self.phase_config.generate_sales_kpi_budgets {
8183            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8184            return Ok(SalesKpiBudgetsSnapshot::default());
8185        }
8186        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8187
8188        let seed = self.seed;
8189        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8190            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8191        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8192        let company_code = self
8193            .config
8194            .companies
8195            .first()
8196            .map(|c| c.code.as_str())
8197            .unwrap_or("1000");
8198
8199        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8200
8201        // Sales Quotes
8202        if self.config.sales_quotes.enabled {
8203            let customer_data: Vec<(String, String)> = self
8204                .master_data
8205                .customers
8206                .iter()
8207                .map(|c| (c.customer_id.clone(), c.name.clone()))
8208                .collect();
8209            let material_data: Vec<(String, String)> = self
8210                .master_data
8211                .materials
8212                .iter()
8213                .map(|m| (m.material_id.clone(), m.description.clone()))
8214                .collect();
8215
8216            if !customer_data.is_empty() && !material_data.is_empty() {
8217                let employee_ids: Vec<String> = self
8218                    .master_data
8219                    .employees
8220                    .iter()
8221                    .map(|e| e.employee_id.clone())
8222                    .collect();
8223                let customer_ids: Vec<String> = self
8224                    .master_data
8225                    .customers
8226                    .iter()
8227                    .map(|c| c.customer_id.clone())
8228                    .collect();
8229                let company_currency = self
8230                    .config
8231                    .companies
8232                    .first()
8233                    .map(|c| c.currency.as_str())
8234                    .unwrap_or("USD");
8235
8236                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8237                    .with_pools(employee_ids, customer_ids);
8238                let quotes = quote_gen.generate_with_currency(
8239                    company_code,
8240                    &customer_data,
8241                    &material_data,
8242                    start_date,
8243                    end_date,
8244                    &self.config.sales_quotes,
8245                    company_currency,
8246                );
8247                snapshot.sales_quote_count = quotes.len();
8248                snapshot.sales_quotes = quotes;
8249            }
8250        }
8251
8252        // Management KPIs
8253        if self.config.financial_reporting.management_kpis.enabled {
8254            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8255            let mut kpis = kpi_gen.generate(
8256                company_code,
8257                start_date,
8258                end_date,
8259                &self.config.financial_reporting.management_kpis,
8260            );
8261
8262            // Override financial KPIs with actual data from financial statements
8263            {
8264                use rust_decimal::Decimal;
8265
8266                if let Some(income_stmt) =
8267                    financial_reporting.financial_statements.iter().find(|fs| {
8268                        fs.statement_type == StatementType::IncomeStatement
8269                            && fs.company_code == company_code
8270                    })
8271                {
8272                    // Extract revenue and COGS from income statement line items
8273                    let total_revenue: Decimal = income_stmt
8274                        .line_items
8275                        .iter()
8276                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8277                        .map(|li| li.amount)
8278                        .sum();
8279                    let total_cogs: Decimal = income_stmt
8280                        .line_items
8281                        .iter()
8282                        .filter(|li| {
8283                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8284                                && !li.is_total
8285                        })
8286                        .map(|li| li.amount.abs())
8287                        .sum();
8288                    let total_opex: Decimal = income_stmt
8289                        .line_items
8290                        .iter()
8291                        .filter(|li| {
8292                            li.section.contains("Expense")
8293                                && !li.is_total
8294                                && !li.section.contains("Cost")
8295                        })
8296                        .map(|li| li.amount.abs())
8297                        .sum();
8298
8299                    if total_revenue > Decimal::ZERO {
8300                        let hundred = Decimal::from(100);
8301                        let gross_margin_pct =
8302                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8303                        let operating_income = total_revenue - total_cogs - total_opex;
8304                        let op_margin_pct =
8305                            (operating_income * hundred / total_revenue).round_dp(2);
8306
8307                        // Override gross margin and operating margin KPIs
8308                        for kpi in &mut kpis {
8309                            if kpi.name == "Gross Margin" {
8310                                kpi.value = gross_margin_pct;
8311                            } else if kpi.name == "Operating Margin" {
8312                                kpi.value = op_margin_pct;
8313                            }
8314                        }
8315                    }
8316                }
8317
8318                // Override Current Ratio from balance sheet
8319                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8320                    fs.statement_type == StatementType::BalanceSheet
8321                        && fs.company_code == company_code
8322                }) {
8323                    let current_assets: Decimal = bs
8324                        .line_items
8325                        .iter()
8326                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8327                        .map(|li| li.amount)
8328                        .sum();
8329                    let current_liabilities: Decimal = bs
8330                        .line_items
8331                        .iter()
8332                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8333                        .map(|li| li.amount.abs())
8334                        .sum();
8335
8336                    if current_liabilities > Decimal::ZERO {
8337                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8338                        for kpi in &mut kpis {
8339                            if kpi.name == "Current Ratio" {
8340                                kpi.value = current_ratio;
8341                            }
8342                        }
8343                    }
8344                }
8345            }
8346
8347            snapshot.kpi_count = kpis.len();
8348            snapshot.kpis = kpis;
8349        }
8350
8351        // Budgets
8352        if self.config.financial_reporting.budgets.enabled {
8353            let account_data: Vec<(String, String)> = coa
8354                .accounts
8355                .iter()
8356                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8357                .collect();
8358
8359            if !account_data.is_empty() {
8360                let fiscal_year = start_date.year() as u32;
8361                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8362                let budget = budget_gen.generate(
8363                    company_code,
8364                    fiscal_year,
8365                    &account_data,
8366                    &self.config.financial_reporting.budgets,
8367                );
8368                snapshot.budget_line_count = budget.line_items.len();
8369                snapshot.budgets.push(budget);
8370            }
8371        }
8372
8373        stats.sales_quote_count = snapshot.sales_quote_count;
8374        stats.kpi_count = snapshot.kpi_count;
8375        stats.budget_line_count = snapshot.budget_line_count;
8376
8377        info!(
8378            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8379            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8380        );
8381        self.check_resources_with_log("post-sales-kpi-budgets")?;
8382
8383        Ok(snapshot)
8384    }
8385
8386    /// Compute pre-tax income for a single company from actual journal entries.
8387    ///
8388    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8389    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8390    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8391    /// and the period-close engine so that all three use a consistent definition.
8392    fn compute_pre_tax_income(
8393        company_code: &str,
8394        journal_entries: &[JournalEntry],
8395    ) -> rust_decimal::Decimal {
8396        use datasynth_core::accounts::AccountCategory;
8397        use rust_decimal::Decimal;
8398
8399        let mut total_revenue = Decimal::ZERO;
8400        let mut total_expenses = Decimal::ZERO;
8401
8402        for je in journal_entries {
8403            if je.header.company_code != company_code {
8404                continue;
8405            }
8406            for line in &je.lines {
8407                let cat = AccountCategory::from_account(&line.gl_account);
8408                match cat {
8409                    AccountCategory::Revenue => {
8410                        total_revenue += line.credit_amount - line.debit_amount;
8411                    }
8412                    AccountCategory::Cogs
8413                    | AccountCategory::OperatingExpense
8414                    | AccountCategory::OtherIncomeExpense => {
8415                        total_expenses += line.debit_amount - line.credit_amount;
8416                    }
8417                    _ => {}
8418                }
8419            }
8420        }
8421
8422        let pti = (total_revenue - total_expenses).round_dp(2);
8423        if pti == rust_decimal::Decimal::ZERO {
8424            // No income statement activity yet — fall back to a synthetic value so the
8425            // tax provision generator can still produce meaningful output.
8426            rust_decimal::Decimal::from(1_000_000u32)
8427        } else {
8428            pti
8429        }
8430    }
8431
8432    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8433    fn phase_tax_generation(
8434        &mut self,
8435        document_flows: &DocumentFlowSnapshot,
8436        journal_entries: &[JournalEntry],
8437        stats: &mut EnhancedGenerationStatistics,
8438    ) -> SynthResult<TaxSnapshot> {
8439        if !self.phase_config.generate_tax {
8440            debug!("Phase 20: Skipped (tax generation disabled)");
8441            return Ok(TaxSnapshot::default());
8442        }
8443        info!("Phase 20: Generating Tax Data");
8444
8445        let seed = self.seed;
8446        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8447            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8448        let fiscal_year = start_date.year();
8449        let company_code = self
8450            .config
8451            .companies
8452            .first()
8453            .map(|c| c.code.as_str())
8454            .unwrap_or("1000");
8455
8456        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8457            seed + 370,
8458            self.config.tax.clone(),
8459        );
8460
8461        let pack = self.primary_pack().clone();
8462        let (jurisdictions, codes) =
8463            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8464
8465        // Generate tax provisions for each company
8466        let mut provisions = Vec::new();
8467        if self.config.tax.provisions.enabled {
8468            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8469            for company in &self.config.companies {
8470                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8471                let statutory_rate = rust_decimal::Decimal::new(
8472                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8473                    2,
8474                );
8475                let provision = provision_gen.generate(
8476                    &company.code,
8477                    start_date,
8478                    pre_tax_income,
8479                    statutory_rate,
8480                );
8481                provisions.push(provision);
8482            }
8483        }
8484
8485        // Generate tax lines from document invoices
8486        let mut tax_lines = Vec::new();
8487        if !codes.is_empty() {
8488            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8489                datasynth_generators::TaxLineGeneratorConfig::default(),
8490                codes.clone(),
8491                seed + 372,
8492            );
8493
8494            // Tax lines from vendor invoices (input tax)
8495            // Use the first company's country as buyer country
8496            let buyer_country = self
8497                .config
8498                .companies
8499                .first()
8500                .map(|c| c.country.as_str())
8501                .unwrap_or("US");
8502            for vi in &document_flows.vendor_invoices {
8503                let lines = tax_line_gen.generate_for_document(
8504                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8505                    &vi.header.document_id,
8506                    buyer_country, // seller approx same country
8507                    buyer_country,
8508                    vi.payable_amount,
8509                    vi.header.document_date,
8510                    None,
8511                );
8512                tax_lines.extend(lines);
8513            }
8514
8515            // Tax lines from customer invoices (output tax)
8516            for ci in &document_flows.customer_invoices {
8517                let lines = tax_line_gen.generate_for_document(
8518                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8519                    &ci.header.document_id,
8520                    buyer_country, // seller is the company
8521                    buyer_country,
8522                    ci.total_gross_amount,
8523                    ci.header.document_date,
8524                    None,
8525                );
8526                tax_lines.extend(lines);
8527            }
8528        }
8529
8530        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8531        let deferred_tax = {
8532            let companies: Vec<(&str, &str)> = self
8533                .config
8534                .companies
8535                .iter()
8536                .map(|c| (c.code.as_str(), c.country.as_str()))
8537                .collect();
8538            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8539            deferred_gen.generate(&companies, start_date, journal_entries)
8540        };
8541
8542        // Build a document_id → posting_date map so each tax JE uses its
8543        // source document's date rather than a blanket period-end date.
8544        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8545            std::collections::HashMap::new();
8546        for vi in &document_flows.vendor_invoices {
8547            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8548        }
8549        for ci in &document_flows.customer_invoices {
8550            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8551        }
8552
8553        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8554        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8555        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8556            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8557                &tax_lines,
8558                company_code,
8559                &doc_dates,
8560                end_date,
8561            );
8562            debug!("Generated {} tax posting JEs", jes.len());
8563            jes
8564        } else {
8565            Vec::new()
8566        };
8567
8568        let snapshot = TaxSnapshot {
8569            jurisdiction_count: jurisdictions.len(),
8570            code_count: codes.len(),
8571            jurisdictions,
8572            codes,
8573            tax_provisions: provisions,
8574            tax_lines,
8575            tax_returns: Vec::new(),
8576            withholding_records: Vec::new(),
8577            tax_anomaly_labels: Vec::new(),
8578            deferred_tax,
8579            tax_posting_journal_entries,
8580        };
8581
8582        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8583        stats.tax_code_count = snapshot.code_count;
8584        stats.tax_provision_count = snapshot.tax_provisions.len();
8585        stats.tax_line_count = snapshot.tax_lines.len();
8586
8587        info!(
8588            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8589            snapshot.jurisdiction_count,
8590            snapshot.code_count,
8591            snapshot.tax_provisions.len(),
8592            snapshot.deferred_tax.temporary_differences.len(),
8593            snapshot.deferred_tax.journal_entries.len(),
8594            snapshot.tax_posting_journal_entries.len(),
8595        );
8596        self.check_resources_with_log("post-tax")?;
8597
8598        Ok(snapshot)
8599    }
8600
8601    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8602    fn phase_esg_generation(
8603        &mut self,
8604        document_flows: &DocumentFlowSnapshot,
8605        manufacturing: &ManufacturingSnapshot,
8606        stats: &mut EnhancedGenerationStatistics,
8607    ) -> SynthResult<EsgSnapshot> {
8608        if !self.phase_config.generate_esg {
8609            debug!("Phase 21: Skipped (ESG generation disabled)");
8610            return Ok(EsgSnapshot::default());
8611        }
8612        let degradation = self.check_resources()?;
8613        if degradation >= DegradationLevel::Reduced {
8614            debug!(
8615                "Phase skipped due to resource pressure (degradation: {:?})",
8616                degradation
8617            );
8618            return Ok(EsgSnapshot::default());
8619        }
8620        info!("Phase 21: Generating ESG Data");
8621
8622        let seed = self.seed;
8623        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8624            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8625        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8626        let entity_id = self
8627            .config
8628            .companies
8629            .first()
8630            .map(|c| c.code.as_str())
8631            .unwrap_or("1000");
8632
8633        let esg_cfg = &self.config.esg;
8634        let mut snapshot = EsgSnapshot::default();
8635
8636        // Energy consumption (feeds into scope 1 & 2 emissions)
8637        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8638            esg_cfg.environmental.energy.clone(),
8639            seed + 80,
8640        );
8641        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8642
8643        // Water usage
8644        let facility_count = esg_cfg.environmental.energy.facility_count;
8645        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8646        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8647
8648        // Waste
8649        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8650            seed + 82,
8651            esg_cfg.environmental.waste.diversion_target,
8652            facility_count,
8653        );
8654        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8655
8656        // Emissions (scope 1, 2, 3)
8657        let mut emission_gen =
8658            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8659
8660        // Build EnergyInput from energy_records
8661        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8662            .iter()
8663            .map(|e| datasynth_generators::EnergyInput {
8664                facility_id: e.facility_id.clone(),
8665                energy_type: match e.energy_source {
8666                    EnergySourceType::NaturalGas => {
8667                        datasynth_generators::EnergyInputType::NaturalGas
8668                    }
8669                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8670                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8671                    _ => datasynth_generators::EnergyInputType::Electricity,
8672                },
8673                consumption_kwh: e.consumption_kwh,
8674                period: e.period,
8675            })
8676            .collect();
8677
8678        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8679        if !manufacturing.production_orders.is_empty() {
8680            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8681                &manufacturing.production_orders,
8682                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8683                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8684            );
8685            if !mfg_energy.is_empty() {
8686                info!(
8687                    "ESG: {} energy inputs derived from {} production orders",
8688                    mfg_energy.len(),
8689                    manufacturing.production_orders.len(),
8690                );
8691                energy_inputs.extend(mfg_energy);
8692            }
8693        }
8694
8695        let mut emissions = Vec::new();
8696        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8697        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8698
8699        // Scope 3: use vendor spend data from actual payments
8700        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8701            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8702            for payment in &document_flows.payments {
8703                if payment.is_vendor {
8704                    *totals
8705                        .entry(payment.business_partner_id.clone())
8706                        .or_default() += payment.amount;
8707                }
8708            }
8709            totals
8710        };
8711        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8712            .master_data
8713            .vendors
8714            .iter()
8715            .map(|v| {
8716                let spend = vendor_payment_totals
8717                    .get(&v.vendor_id)
8718                    .copied()
8719                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8720                datasynth_generators::VendorSpendInput {
8721                    vendor_id: v.vendor_id.clone(),
8722                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8723                    spend,
8724                    country: v.country.clone(),
8725                }
8726            })
8727            .collect();
8728        if !vendor_spend.is_empty() {
8729            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8730                entity_id,
8731                &vendor_spend,
8732                start_date,
8733                end_date,
8734            ));
8735        }
8736
8737        // Business travel & commuting (scope 3)
8738        let headcount = self.master_data.employees.len() as u32;
8739        if headcount > 0 {
8740            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8741            emissions.extend(emission_gen.generate_scope3_business_travel(
8742                entity_id,
8743                travel_spend,
8744                start_date,
8745            ));
8746            emissions
8747                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8748        }
8749
8750        snapshot.emission_count = emissions.len();
8751        snapshot.emissions = emissions;
8752        snapshot.energy = energy_records;
8753
8754        // Social: Workforce diversity, pay equity, safety
8755        let mut workforce_gen =
8756            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8757        let total_headcount = headcount.max(100);
8758        snapshot.diversity =
8759            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8760        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8761
8762        // v2.4: Derive additional workforce diversity metrics from actual employee data
8763        if !self.master_data.employees.is_empty() {
8764            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8765                entity_id,
8766                &self.master_data.employees,
8767                end_date,
8768            );
8769            if !hr_diversity.is_empty() {
8770                info!(
8771                    "ESG: {} diversity metrics derived from {} actual employees",
8772                    hr_diversity.len(),
8773                    self.master_data.employees.len(),
8774                );
8775                snapshot.diversity.extend(hr_diversity);
8776            }
8777        }
8778
8779        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8780            entity_id,
8781            facility_count,
8782            start_date,
8783            end_date,
8784        );
8785
8786        // Compute safety metrics
8787        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8788        let safety_metric = workforce_gen.compute_safety_metrics(
8789            entity_id,
8790            &snapshot.safety_incidents,
8791            total_hours,
8792            start_date,
8793        );
8794        snapshot.safety_metrics = vec![safety_metric];
8795
8796        // Governance
8797        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8798            seed + 85,
8799            esg_cfg.governance.board_size,
8800            esg_cfg.governance.independence_target,
8801        );
8802        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8803
8804        // Supplier ESG assessments
8805        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8806            esg_cfg.supply_chain_esg.clone(),
8807            seed + 86,
8808        );
8809        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8810            .master_data
8811            .vendors
8812            .iter()
8813            .map(|v| datasynth_generators::VendorInput {
8814                vendor_id: v.vendor_id.clone(),
8815                country: v.country.clone(),
8816                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8817                quality_score: None,
8818            })
8819            .collect();
8820        snapshot.supplier_assessments =
8821            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8822
8823        // Disclosures
8824        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8825            seed + 87,
8826            esg_cfg.reporting.clone(),
8827            esg_cfg.climate_scenarios.clone(),
8828        );
8829        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8830        snapshot.disclosures = disclosure_gen.generate_disclosures(
8831            entity_id,
8832            &snapshot.materiality,
8833            start_date,
8834            end_date,
8835        );
8836        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8837        snapshot.disclosure_count = snapshot.disclosures.len();
8838
8839        // Anomaly injection
8840        if esg_cfg.anomaly_rate > 0.0 {
8841            let mut anomaly_injector =
8842                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8843            let mut labels = Vec::new();
8844            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8845            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8846            labels.extend(
8847                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8848            );
8849            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8850            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8851            snapshot.anomaly_labels = labels;
8852        }
8853
8854        stats.esg_emission_count = snapshot.emission_count;
8855        stats.esg_disclosure_count = snapshot.disclosure_count;
8856
8857        info!(
8858            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8859            snapshot.emission_count,
8860            snapshot.disclosure_count,
8861            snapshot.supplier_assessments.len()
8862        );
8863        self.check_resources_with_log("post-esg")?;
8864
8865        Ok(snapshot)
8866    }
8867
8868    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8869    fn phase_treasury_data(
8870        &mut self,
8871        document_flows: &DocumentFlowSnapshot,
8872        subledger: &SubledgerSnapshot,
8873        intercompany: &IntercompanySnapshot,
8874        stats: &mut EnhancedGenerationStatistics,
8875    ) -> SynthResult<TreasurySnapshot> {
8876        if !self.phase_config.generate_treasury {
8877            debug!("Phase 22: Skipped (treasury generation disabled)");
8878            return Ok(TreasurySnapshot::default());
8879        }
8880        let degradation = self.check_resources()?;
8881        if degradation >= DegradationLevel::Reduced {
8882            debug!(
8883                "Phase skipped due to resource pressure (degradation: {:?})",
8884                degradation
8885            );
8886            return Ok(TreasurySnapshot::default());
8887        }
8888        info!("Phase 22: Generating Treasury Data");
8889
8890        let seed = self.seed;
8891        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8892            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8893        let currency = self
8894            .config
8895            .companies
8896            .first()
8897            .map(|c| c.currency.as_str())
8898            .unwrap_or("USD");
8899        let entity_id = self
8900            .config
8901            .companies
8902            .first()
8903            .map(|c| c.code.as_str())
8904            .unwrap_or("1000");
8905
8906        let mut snapshot = TreasurySnapshot::default();
8907
8908        // Generate debt instruments
8909        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8910            self.config.treasury.debt.clone(),
8911            seed + 90,
8912        );
8913        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8914
8915        // Generate hedging instruments (IR swaps for floating-rate debt)
8916        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8917            self.config.treasury.hedging.clone(),
8918            seed + 91,
8919        );
8920        for debt in &snapshot.debt_instruments {
8921            if debt.rate_type == InterestRateType::Variable {
8922                let swap = hedge_gen.generate_ir_swap(
8923                    currency,
8924                    debt.principal,
8925                    debt.origination_date,
8926                    debt.maturity_date,
8927                );
8928                snapshot.hedging_instruments.push(swap);
8929            }
8930        }
8931
8932        // Build FX exposures from foreign-currency payments and generate
8933        // FX forwards + hedge relationship designations via generate() API.
8934        {
8935            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8936            for payment in &document_flows.payments {
8937                if payment.currency != currency {
8938                    let entry = fx_map
8939                        .entry(payment.currency.clone())
8940                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8941                    entry.0 += payment.amount;
8942                    // Use the latest settlement date among grouped payments
8943                    if payment.header.document_date > entry.1 {
8944                        entry.1 = payment.header.document_date;
8945                    }
8946                }
8947            }
8948            if !fx_map.is_empty() {
8949                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8950                    .into_iter()
8951                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8952                        datasynth_generators::treasury::FxExposure {
8953                            currency_pair: format!("{foreign_ccy}/{currency}"),
8954                            foreign_currency: foreign_ccy,
8955                            net_amount,
8956                            settlement_date,
8957                            description: "AP payment FX exposure".to_string(),
8958                        }
8959                    })
8960                    .collect();
8961                let (fx_instruments, fx_relationships) =
8962                    hedge_gen.generate(start_date, &fx_exposures);
8963                snapshot.hedging_instruments.extend(fx_instruments);
8964                snapshot.hedge_relationships.extend(fx_relationships);
8965            }
8966        }
8967
8968        // Inject anomalies if configured
8969        if self.config.treasury.anomaly_rate > 0.0 {
8970            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8971                seed + 92,
8972                self.config.treasury.anomaly_rate,
8973            );
8974            let mut labels = Vec::new();
8975            labels.extend(
8976                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8977            );
8978            snapshot.treasury_anomaly_labels = labels;
8979        }
8980
8981        // Generate cash positions from payment flows
8982        if self.config.treasury.cash_positioning.enabled {
8983            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8984
8985            // AP payments as outflows
8986            for payment in &document_flows.payments {
8987                cash_flows.push(datasynth_generators::treasury::CashFlow {
8988                    date: payment.header.document_date,
8989                    account_id: format!("{entity_id}-MAIN"),
8990                    amount: payment.amount,
8991                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8992                });
8993            }
8994
8995            // Customer receipts (from O2C chains) as inflows
8996            for chain in &document_flows.o2c_chains {
8997                if let Some(ref receipt) = chain.customer_receipt {
8998                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8999                        date: receipt.header.document_date,
9000                        account_id: format!("{entity_id}-MAIN"),
9001                        amount: receipt.amount,
9002                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9003                    });
9004                }
9005                // Remainder receipts (follow-up to partial payments)
9006                for receipt in &chain.remainder_receipts {
9007                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9008                        date: receipt.header.document_date,
9009                        account_id: format!("{entity_id}-MAIN"),
9010                        amount: receipt.amount,
9011                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9012                    });
9013                }
9014            }
9015
9016            if !cash_flows.is_empty() {
9017                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9018                    self.config.treasury.cash_positioning.clone(),
9019                    seed + 93,
9020                );
9021                let account_id = format!("{entity_id}-MAIN");
9022                snapshot.cash_positions = cash_gen.generate(
9023                    entity_id,
9024                    &account_id,
9025                    currency,
9026                    &cash_flows,
9027                    start_date,
9028                    start_date + chrono::Months::new(self.config.global.period_months),
9029                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9030                );
9031            }
9032        }
9033
9034        // Generate cash forecasts from AR/AP aging
9035        if self.config.treasury.cash_forecasting.enabled {
9036            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9037
9038            // Build AR aging items from subledger AR invoices
9039            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9040                .ar_invoices
9041                .iter()
9042                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9043                .map(|inv| {
9044                    let days_past_due = if inv.due_date < end_date {
9045                        (end_date - inv.due_date).num_days().max(0) as u32
9046                    } else {
9047                        0
9048                    };
9049                    datasynth_generators::treasury::ArAgingItem {
9050                        expected_date: inv.due_date,
9051                        amount: inv.amount_remaining,
9052                        days_past_due,
9053                        document_id: inv.invoice_number.clone(),
9054                    }
9055                })
9056                .collect();
9057
9058            // Build AP aging items from subledger AP invoices
9059            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9060                .ap_invoices
9061                .iter()
9062                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9063                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9064                    payment_date: inv.due_date,
9065                    amount: inv.amount_remaining,
9066                    document_id: inv.invoice_number.clone(),
9067                })
9068                .collect();
9069
9070            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9071                self.config.treasury.cash_forecasting.clone(),
9072                seed + 94,
9073            );
9074            let forecast = forecast_gen.generate(
9075                entity_id,
9076                currency,
9077                end_date,
9078                &ar_items,
9079                &ap_items,
9080                &[], // scheduled disbursements - empty for now
9081            );
9082            snapshot.cash_forecasts.push(forecast);
9083        }
9084
9085        // Generate cash pools and sweeps
9086        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9087            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9088            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9089                self.config.treasury.cash_pooling.clone(),
9090                seed + 95,
9091            );
9092
9093            // Create a pool from available accounts
9094            let account_ids: Vec<String> = snapshot
9095                .cash_positions
9096                .iter()
9097                .map(|cp| cp.bank_account_id.clone())
9098                .collect::<std::collections::HashSet<_>>()
9099                .into_iter()
9100                .collect();
9101
9102            if let Some(pool) =
9103                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9104            {
9105                // Generate sweeps - build participant balances from last cash position per account
9106                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9107                for cp in &snapshot.cash_positions {
9108                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9109                }
9110
9111                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9112                    latest_balances
9113                        .into_iter()
9114                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9115                        .map(
9116                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9117                                account_id: id,
9118                                balance,
9119                            },
9120                        )
9121                        .collect();
9122
9123                let sweeps =
9124                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9125                snapshot.cash_pool_sweeps = sweeps;
9126                snapshot.cash_pools.push(pool);
9127            }
9128        }
9129
9130        // Generate bank guarantees
9131        if self.config.treasury.bank_guarantees.enabled {
9132            let vendor_names: Vec<String> = self
9133                .master_data
9134                .vendors
9135                .iter()
9136                .map(|v| v.name.clone())
9137                .collect();
9138            if !vendor_names.is_empty() {
9139                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9140                    self.config.treasury.bank_guarantees.clone(),
9141                    seed + 96,
9142                );
9143                snapshot.bank_guarantees =
9144                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9145            }
9146        }
9147
9148        // Generate netting runs from intercompany matched pairs
9149        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9150            let entity_ids: Vec<String> = self
9151                .config
9152                .companies
9153                .iter()
9154                .map(|c| c.code.clone())
9155                .collect();
9156            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9157                .matched_pairs
9158                .iter()
9159                .map(|mp| {
9160                    (
9161                        mp.seller_company.clone(),
9162                        mp.buyer_company.clone(),
9163                        mp.amount,
9164                    )
9165                })
9166                .collect();
9167            if entity_ids.len() >= 2 {
9168                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9169                    self.config.treasury.netting.clone(),
9170                    seed + 97,
9171                );
9172                snapshot.netting_runs = netting_gen.generate(
9173                    &entity_ids,
9174                    currency,
9175                    start_date,
9176                    self.config.global.period_months,
9177                    &ic_amounts,
9178                );
9179            }
9180        }
9181
9182        // Generate treasury journal entries from the instruments we just created.
9183        {
9184            use datasynth_generators::treasury::TreasuryAccounting;
9185
9186            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9187            let mut treasury_jes = Vec::new();
9188
9189            // Debt interest accrual JEs
9190            if !snapshot.debt_instruments.is_empty() {
9191                let debt_jes =
9192                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9193                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9194                treasury_jes.extend(debt_jes);
9195            }
9196
9197            // Hedge mark-to-market JEs
9198            if !snapshot.hedging_instruments.is_empty() {
9199                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9200                    &snapshot.hedging_instruments,
9201                    &snapshot.hedge_relationships,
9202                    end_date,
9203                    entity_id,
9204                );
9205                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9206                treasury_jes.extend(hedge_jes);
9207            }
9208
9209            // Cash pool sweep JEs
9210            if !snapshot.cash_pool_sweeps.is_empty() {
9211                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9212                    &snapshot.cash_pool_sweeps,
9213                    entity_id,
9214                );
9215                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9216                treasury_jes.extend(sweep_jes);
9217            }
9218
9219            if !treasury_jes.is_empty() {
9220                debug!("Total treasury journal entries: {}", treasury_jes.len());
9221            }
9222            snapshot.journal_entries = treasury_jes;
9223        }
9224
9225        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9226        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9227        stats.cash_position_count = snapshot.cash_positions.len();
9228        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9229        stats.cash_pool_count = snapshot.cash_pools.len();
9230
9231        info!(
9232            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9233            snapshot.debt_instruments.len(),
9234            snapshot.hedging_instruments.len(),
9235            snapshot.cash_positions.len(),
9236            snapshot.cash_forecasts.len(),
9237            snapshot.cash_pools.len(),
9238            snapshot.bank_guarantees.len(),
9239            snapshot.netting_runs.len(),
9240            snapshot.journal_entries.len(),
9241        );
9242        self.check_resources_with_log("post-treasury")?;
9243
9244        Ok(snapshot)
9245    }
9246
9247    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9248    fn phase_project_accounting(
9249        &mut self,
9250        document_flows: &DocumentFlowSnapshot,
9251        hr: &HrSnapshot,
9252        stats: &mut EnhancedGenerationStatistics,
9253    ) -> SynthResult<ProjectAccountingSnapshot> {
9254        if !self.phase_config.generate_project_accounting {
9255            debug!("Phase 23: Skipped (project accounting disabled)");
9256            return Ok(ProjectAccountingSnapshot::default());
9257        }
9258        let degradation = self.check_resources()?;
9259        if degradation >= DegradationLevel::Reduced {
9260            debug!(
9261                "Phase skipped due to resource pressure (degradation: {:?})",
9262                degradation
9263            );
9264            return Ok(ProjectAccountingSnapshot::default());
9265        }
9266        info!("Phase 23: Generating Project Accounting Data");
9267
9268        let seed = self.seed;
9269        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9270            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9271        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9272        let company_code = self
9273            .config
9274            .companies
9275            .first()
9276            .map(|c| c.code.as_str())
9277            .unwrap_or("1000");
9278
9279        let mut snapshot = ProjectAccountingSnapshot::default();
9280
9281        // Generate projects with WBS hierarchies
9282        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9283            self.config.project_accounting.clone(),
9284            seed + 95,
9285        );
9286        let pool = project_gen.generate(company_code, start_date, end_date);
9287        snapshot.projects = pool.projects.clone();
9288
9289        // Link source documents to projects for cost allocation
9290        {
9291            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9292                Vec::new();
9293
9294            // Time entries
9295            for te in &hr.time_entries {
9296                let total_hours = te.hours_regular + te.hours_overtime;
9297                if total_hours > 0.0 {
9298                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9299                        id: te.entry_id.clone(),
9300                        entity_id: company_code.to_string(),
9301                        date: te.date,
9302                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9303                            .unwrap_or(rust_decimal::Decimal::ZERO),
9304                        source_type: CostSourceType::TimeEntry,
9305                        hours: Some(
9306                            rust_decimal::Decimal::from_f64_retain(total_hours)
9307                                .unwrap_or(rust_decimal::Decimal::ZERO),
9308                        ),
9309                    });
9310                }
9311            }
9312
9313            // Expense reports
9314            for er in &hr.expense_reports {
9315                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9316                    id: er.report_id.clone(),
9317                    entity_id: company_code.to_string(),
9318                    date: er.submission_date,
9319                    amount: er.total_amount,
9320                    source_type: CostSourceType::ExpenseReport,
9321                    hours: None,
9322                });
9323            }
9324
9325            // Purchase orders
9326            for po in &document_flows.purchase_orders {
9327                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9328                    id: po.header.document_id.clone(),
9329                    entity_id: company_code.to_string(),
9330                    date: po.header.document_date,
9331                    amount: po.total_net_amount,
9332                    source_type: CostSourceType::PurchaseOrder,
9333                    hours: None,
9334                });
9335            }
9336
9337            // Vendor invoices
9338            for vi in &document_flows.vendor_invoices {
9339                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9340                    id: vi.header.document_id.clone(),
9341                    entity_id: company_code.to_string(),
9342                    date: vi.header.document_date,
9343                    amount: vi.payable_amount,
9344                    source_type: CostSourceType::VendorInvoice,
9345                    hours: None,
9346                });
9347            }
9348
9349            if !source_docs.is_empty() && !pool.projects.is_empty() {
9350                let mut cost_gen =
9351                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9352                        self.config.project_accounting.cost_allocation.clone(),
9353                        seed + 99,
9354                    );
9355                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9356            }
9357        }
9358
9359        // Generate change orders
9360        if self.config.project_accounting.change_orders.enabled {
9361            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9362                self.config.project_accounting.change_orders.clone(),
9363                seed + 96,
9364            );
9365            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9366        }
9367
9368        // Generate milestones
9369        if self.config.project_accounting.milestones.enabled {
9370            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9371                self.config.project_accounting.milestones.clone(),
9372                seed + 97,
9373            );
9374            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9375        }
9376
9377        // Generate earned value metrics (needs cost lines, so only if we have projects)
9378        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9379            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9380                self.config.project_accounting.earned_value.clone(),
9381                seed + 98,
9382            );
9383            snapshot.earned_value_metrics =
9384                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9385        }
9386
9387        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9388        if self.config.project_accounting.revenue_recognition.enabled
9389            && !snapshot.projects.is_empty()
9390            && !snapshot.cost_lines.is_empty()
9391        {
9392            use datasynth_generators::project_accounting::RevenueGenerator;
9393            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9394            let avg_contract_value =
9395                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9396                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9397
9398            // Build contract value tuples: only customer-type projects get revenue recognition.
9399            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9400            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9401                snapshot
9402                    .projects
9403                    .iter()
9404                    .filter(|p| {
9405                        matches!(
9406                            p.project_type,
9407                            datasynth_core::models::ProjectType::Customer
9408                        )
9409                    })
9410                    .map(|p| {
9411                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9412                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9413                        // budget × 1.25 → contract value
9414                        } else {
9415                            avg_contract_value
9416                        };
9417                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9418                        (p.project_id.clone(), cv, etc)
9419                    })
9420                    .collect();
9421
9422            if !contract_values.is_empty() {
9423                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9424                snapshot.revenue_records = rev_gen.generate(
9425                    &snapshot.projects,
9426                    &snapshot.cost_lines,
9427                    &contract_values,
9428                    start_date,
9429                    end_date,
9430                );
9431                debug!(
9432                    "Generated {} revenue recognition records for {} customer projects",
9433                    snapshot.revenue_records.len(),
9434                    contract_values.len()
9435                );
9436            }
9437        }
9438
9439        stats.project_count = snapshot.projects.len();
9440        stats.project_change_order_count = snapshot.change_orders.len();
9441        stats.project_cost_line_count = snapshot.cost_lines.len();
9442
9443        info!(
9444            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9445            snapshot.projects.len(),
9446            snapshot.change_orders.len(),
9447            snapshot.milestones.len(),
9448            snapshot.earned_value_metrics.len()
9449        );
9450        self.check_resources_with_log("post-project-accounting")?;
9451
9452        Ok(snapshot)
9453    }
9454
9455    /// Phase 24: Generate process evolution and organizational events.
9456    fn phase_evolution_events(
9457        &mut self,
9458        stats: &mut EnhancedGenerationStatistics,
9459    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9460        if !self.phase_config.generate_evolution_events {
9461            debug!("Phase 24: Skipped (evolution events disabled)");
9462            return Ok((Vec::new(), Vec::new()));
9463        }
9464        info!("Phase 24: Generating Process Evolution + Organizational Events");
9465
9466        let seed = self.seed;
9467        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9468            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9469        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9470
9471        // Process evolution events
9472        let mut proc_gen =
9473            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9474                seed + 100,
9475            );
9476        let process_events = proc_gen.generate_events(start_date, end_date);
9477
9478        // Organizational events
9479        let company_codes: Vec<String> = self
9480            .config
9481            .companies
9482            .iter()
9483            .map(|c| c.code.clone())
9484            .collect();
9485        let mut org_gen =
9486            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9487                seed + 101,
9488            );
9489        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9490
9491        stats.process_evolution_event_count = process_events.len();
9492        stats.organizational_event_count = org_events.len();
9493
9494        info!(
9495            "Evolution events generated: {} process evolution, {} organizational",
9496            process_events.len(),
9497            org_events.len()
9498        );
9499        self.check_resources_with_log("post-evolution-events")?;
9500
9501        Ok((process_events, org_events))
9502    }
9503
9504    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9505    /// data recovery, and regulatory changes).
9506    fn phase_disruption_events(
9507        &self,
9508        stats: &mut EnhancedGenerationStatistics,
9509    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9510        if !self.config.organizational_events.enabled {
9511            debug!("Phase 24b: Skipped (organizational events disabled)");
9512            return Ok(Vec::new());
9513        }
9514        info!("Phase 24b: Generating Disruption Events");
9515
9516        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9517            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9518        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9519
9520        let company_codes: Vec<String> = self
9521            .config
9522            .companies
9523            .iter()
9524            .map(|c| c.code.clone())
9525            .collect();
9526
9527        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9528        let events = gen.generate(start_date, end_date, &company_codes);
9529
9530        stats.disruption_event_count = events.len();
9531        info!("Disruption events generated: {} events", events.len());
9532        self.check_resources_with_log("post-disruption-events")?;
9533
9534        Ok(events)
9535    }
9536
9537    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9538    ///
9539    /// Produces paired examples where each pair contains the original clean JE
9540    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9541    /// split transaction). Useful for training anomaly detection models with
9542    /// known ground truth.
9543    fn phase_counterfactuals(
9544        &self,
9545        journal_entries: &[JournalEntry],
9546        stats: &mut EnhancedGenerationStatistics,
9547    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9548        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9549            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9550            return Ok(Vec::new());
9551        }
9552        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9553
9554        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9555
9556        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9557
9558        // Rotating set of specs to produce diverse mutation types
9559        let specs = [
9560            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9561            CounterfactualSpec::ShiftDate { days: -14 },
9562            CounterfactualSpec::SelfApprove,
9563            CounterfactualSpec::SplitTransaction { split_count: 3 },
9564        ];
9565
9566        let pairs: Vec<_> = journal_entries
9567            .iter()
9568            .enumerate()
9569            .map(|(i, je)| {
9570                let spec = &specs[i % specs.len()];
9571                gen.generate(je, spec)
9572            })
9573            .collect();
9574
9575        stats.counterfactual_pair_count = pairs.len();
9576        info!(
9577            "Counterfactual pairs generated: {} pairs from {} journal entries",
9578            pairs.len(),
9579            journal_entries.len()
9580        );
9581        self.check_resources_with_log("post-counterfactuals")?;
9582
9583        Ok(pairs)
9584    }
9585
9586    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9587    ///
9588    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9589    /// fraudulent, then generates probabilistic red flags on all chain documents.
9590    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9591    /// to produce realistic ML training data.
9592    fn phase_red_flags(
9593        &self,
9594        anomaly_labels: &AnomalyLabels,
9595        document_flows: &DocumentFlowSnapshot,
9596        stats: &mut EnhancedGenerationStatistics,
9597    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9598        if !self.config.fraud.enabled {
9599            debug!("Phase 26: Skipped (fraud generation disabled)");
9600            return Ok(Vec::new());
9601        }
9602        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9603
9604        use datasynth_generators::fraud::RedFlagGenerator;
9605
9606        let generator = RedFlagGenerator::new();
9607        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9608
9609        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9610        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9611            .labels
9612            .iter()
9613            .filter(|label| label.anomaly_type.is_intentional())
9614            .map(|label| label.document_id.as_str())
9615            .collect();
9616
9617        let mut flags = Vec::new();
9618
9619        // Iterate P2P chains: use the purchase order document ID as the chain key.
9620        for chain in &document_flows.p2p_chains {
9621            let doc_id = &chain.purchase_order.header.document_id;
9622            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9623            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9624        }
9625
9626        // Iterate O2C chains: use the sales order document ID as the chain key.
9627        for chain in &document_flows.o2c_chains {
9628            let doc_id = &chain.sales_order.header.document_id;
9629            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9630            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9631        }
9632
9633        stats.red_flag_count = flags.len();
9634        info!(
9635            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9636            flags.len(),
9637            document_flows.p2p_chains.len(),
9638            document_flows.o2c_chains.len(),
9639            fraud_doc_ids.len()
9640        );
9641        self.check_resources_with_log("post-red-flags")?;
9642
9643        Ok(flags)
9644    }
9645
9646    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9647    ///
9648    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9649    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9650    /// advance them over the simulation period.
9651    fn phase_collusion_rings(
9652        &mut self,
9653        stats: &mut EnhancedGenerationStatistics,
9654    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9655        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9656            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9657            return Ok(Vec::new());
9658        }
9659        info!("Phase 26b: Generating Collusion Rings");
9660
9661        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9662            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9663        let months = self.config.global.period_months;
9664
9665        let employee_ids: Vec<String> = self
9666            .master_data
9667            .employees
9668            .iter()
9669            .map(|e| e.employee_id.clone())
9670            .collect();
9671        let vendor_ids: Vec<String> = self
9672            .master_data
9673            .vendors
9674            .iter()
9675            .map(|v| v.vendor_id.clone())
9676            .collect();
9677
9678        let mut generator =
9679            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9680        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9681
9682        stats.collusion_ring_count = rings.len();
9683        info!(
9684            "Collusion rings generated: {} rings, total members: {}",
9685            rings.len(),
9686            rings
9687                .iter()
9688                .map(datasynth_generators::fraud::CollusionRing::size)
9689                .sum::<usize>()
9690        );
9691        self.check_resources_with_log("post-collusion-rings")?;
9692
9693        Ok(rings)
9694    }
9695
9696    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9697    ///
9698    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9699    /// master data changes over time, supporting bi-temporal audit queries.
9700    fn phase_temporal_attributes(
9701        &mut self,
9702        stats: &mut EnhancedGenerationStatistics,
9703    ) -> SynthResult<
9704        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9705    > {
9706        if !self.config.temporal_attributes.enabled {
9707            debug!("Phase 27: Skipped (temporal attributes disabled)");
9708            return Ok(Vec::new());
9709        }
9710        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9711
9712        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9713            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9714
9715        // Build a TemporalAttributeConfig from the user's config.
9716        // Since Phase 27 is already gated on temporal_attributes.enabled,
9717        // default to enabling version chains so users get actual mutations.
9718        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9719            || self.config.temporal_attributes.enabled;
9720        let temporal_config = {
9721            let ta = &self.config.temporal_attributes;
9722            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9723                .enabled(ta.enabled)
9724                .closed_probability(ta.valid_time.closed_probability)
9725                .avg_validity_days(ta.valid_time.avg_validity_days)
9726                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9727                .with_version_chains(if generate_version_chains {
9728                    ta.avg_versions_per_entity
9729                } else {
9730                    1.0
9731                })
9732                .build()
9733        };
9734        // Apply backdating settings if configured
9735        let temporal_config = if self
9736            .config
9737            .temporal_attributes
9738            .transaction_time
9739            .allow_backdating
9740        {
9741            let mut c = temporal_config;
9742            c.transaction_time.allow_backdating = true;
9743            c.transaction_time.backdating_probability = self
9744                .config
9745                .temporal_attributes
9746                .transaction_time
9747                .backdating_probability;
9748            c.transaction_time.max_backdate_days = self
9749                .config
9750                .temporal_attributes
9751                .transaction_time
9752                .max_backdate_days;
9753            c
9754        } else {
9755            temporal_config
9756        };
9757        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9758            temporal_config,
9759            self.seed + 130,
9760            start_date,
9761        );
9762
9763        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9764            self.seed + 130,
9765            datasynth_core::GeneratorType::Vendor,
9766        );
9767
9768        let chains: Vec<_> = self
9769            .master_data
9770            .vendors
9771            .iter()
9772            .map(|vendor| {
9773                let id = uuid_factory.next();
9774                gen.generate_version_chain(vendor.clone(), id)
9775            })
9776            .collect();
9777
9778        stats.temporal_version_chain_count = chains.len();
9779        info!("Temporal version chains generated: {} chains", chains.len());
9780        self.check_resources_with_log("post-temporal-attributes")?;
9781
9782        Ok(chains)
9783    }
9784
9785    /// Phase 28: Build entity relationship graph and cross-process links.
9786    ///
9787    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9788    /// `EntityGraph` from master-data vendor/customer entities and
9789    /// journal-entry-derived transaction summaries.
9790    ///
9791    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9792    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9793    /// generates inventory-movement cross-process links.
9794    fn phase_entity_relationships(
9795        &self,
9796        journal_entries: &[JournalEntry],
9797        document_flows: &DocumentFlowSnapshot,
9798        stats: &mut EnhancedGenerationStatistics,
9799    ) -> SynthResult<(
9800        Option<datasynth_core::models::EntityGraph>,
9801        Vec<datasynth_core::models::CrossProcessLink>,
9802    )> {
9803        use datasynth_generators::relationships::{
9804            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9805            TransactionSummary,
9806        };
9807
9808        let rs_enabled = self.config.relationship_strength.enabled;
9809        let cpl_enabled = self.config.cross_process_links.enabled
9810            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9811
9812        if !rs_enabled && !cpl_enabled {
9813            debug!(
9814                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9815            );
9816            return Ok((None, Vec::new()));
9817        }
9818
9819        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9820
9821        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9822            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9823
9824        let company_code = self
9825            .config
9826            .companies
9827            .first()
9828            .map(|c| c.code.as_str())
9829            .unwrap_or("1000");
9830
9831        // Build the generator with matching config flags
9832        let gen_config = EntityGraphConfig {
9833            enabled: rs_enabled,
9834            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9835                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9836                enable_return_flows: false,
9837                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9838                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9839                // Use higher link rate for small datasets to avoid probabilistic empty results
9840                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9841                    1.0
9842                } else {
9843                    0.30
9844                },
9845                ..Default::default()
9846            },
9847            strength_config: datasynth_generators::relationships::StrengthConfig {
9848                transaction_volume_weight: self
9849                    .config
9850                    .relationship_strength
9851                    .calculation
9852                    .transaction_volume_weight,
9853                transaction_count_weight: self
9854                    .config
9855                    .relationship_strength
9856                    .calculation
9857                    .transaction_count_weight,
9858                duration_weight: self
9859                    .config
9860                    .relationship_strength
9861                    .calculation
9862                    .relationship_duration_weight,
9863                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9864                mutual_connections_weight: self
9865                    .config
9866                    .relationship_strength
9867                    .calculation
9868                    .mutual_connections_weight,
9869                recency_half_life_days: self
9870                    .config
9871                    .relationship_strength
9872                    .calculation
9873                    .recency_half_life_days,
9874            },
9875            ..Default::default()
9876        };
9877
9878        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9879
9880        // --- Part 1: Entity Relationship Graph ---
9881        let entity_graph = if rs_enabled {
9882            // Build EntitySummary lists from master data
9883            let vendor_summaries: Vec<EntitySummary> = self
9884                .master_data
9885                .vendors
9886                .iter()
9887                .map(|v| {
9888                    EntitySummary::new(
9889                        &v.vendor_id,
9890                        &v.name,
9891                        datasynth_core::models::GraphEntityType::Vendor,
9892                        start_date,
9893                    )
9894                })
9895                .collect();
9896
9897            let customer_summaries: Vec<EntitySummary> = self
9898                .master_data
9899                .customers
9900                .iter()
9901                .map(|c| {
9902                    EntitySummary::new(
9903                        &c.customer_id,
9904                        &c.name,
9905                        datasynth_core::models::GraphEntityType::Customer,
9906                        start_date,
9907                    )
9908                })
9909                .collect();
9910
9911            // Build transaction summaries from journal entries.
9912            // Key = (company_code, trading_partner) for entries that have a
9913            // trading partner.  This captures intercompany flows and any JE
9914            // whose line items carry a trading_partner reference.
9915            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9916                std::collections::HashMap::new();
9917
9918            for je in journal_entries {
9919                let cc = je.header.company_code.clone();
9920                let posting_date = je.header.posting_date;
9921                for line in &je.lines {
9922                    if let Some(ref tp) = line.trading_partner {
9923                        let amount = if line.debit_amount > line.credit_amount {
9924                            line.debit_amount
9925                        } else {
9926                            line.credit_amount
9927                        };
9928                        let entry = txn_summaries
9929                            .entry((cc.clone(), tp.clone()))
9930                            .or_insert_with(|| TransactionSummary {
9931                                total_volume: rust_decimal::Decimal::ZERO,
9932                                transaction_count: 0,
9933                                first_transaction_date: posting_date,
9934                                last_transaction_date: posting_date,
9935                                related_entities: std::collections::HashSet::new(),
9936                            });
9937                        entry.total_volume += amount;
9938                        entry.transaction_count += 1;
9939                        if posting_date < entry.first_transaction_date {
9940                            entry.first_transaction_date = posting_date;
9941                        }
9942                        if posting_date > entry.last_transaction_date {
9943                            entry.last_transaction_date = posting_date;
9944                        }
9945                        entry.related_entities.insert(cc.clone());
9946                    }
9947                }
9948            }
9949
9950            // Also extract transaction relationships from document flow chains.
9951            // P2P chains: Company → Vendor relationships
9952            for chain in &document_flows.p2p_chains {
9953                let cc = chain.purchase_order.header.company_code.clone();
9954                let vendor_id = chain.purchase_order.vendor_id.clone();
9955                let po_date = chain.purchase_order.header.document_date;
9956                let amount = chain.purchase_order.total_net_amount;
9957
9958                let entry = txn_summaries
9959                    .entry((cc.clone(), vendor_id))
9960                    .or_insert_with(|| TransactionSummary {
9961                        total_volume: rust_decimal::Decimal::ZERO,
9962                        transaction_count: 0,
9963                        first_transaction_date: po_date,
9964                        last_transaction_date: po_date,
9965                        related_entities: std::collections::HashSet::new(),
9966                    });
9967                entry.total_volume += amount;
9968                entry.transaction_count += 1;
9969                if po_date < entry.first_transaction_date {
9970                    entry.first_transaction_date = po_date;
9971                }
9972                if po_date > entry.last_transaction_date {
9973                    entry.last_transaction_date = po_date;
9974                }
9975                entry.related_entities.insert(cc);
9976            }
9977
9978            // O2C chains: Company → Customer relationships
9979            for chain in &document_flows.o2c_chains {
9980                let cc = chain.sales_order.header.company_code.clone();
9981                let customer_id = chain.sales_order.customer_id.clone();
9982                let so_date = chain.sales_order.header.document_date;
9983                let amount = chain.sales_order.total_net_amount;
9984
9985                let entry = txn_summaries
9986                    .entry((cc.clone(), customer_id))
9987                    .or_insert_with(|| TransactionSummary {
9988                        total_volume: rust_decimal::Decimal::ZERO,
9989                        transaction_count: 0,
9990                        first_transaction_date: so_date,
9991                        last_transaction_date: so_date,
9992                        related_entities: std::collections::HashSet::new(),
9993                    });
9994                entry.total_volume += amount;
9995                entry.transaction_count += 1;
9996                if so_date < entry.first_transaction_date {
9997                    entry.first_transaction_date = so_date;
9998                }
9999                if so_date > entry.last_transaction_date {
10000                    entry.last_transaction_date = so_date;
10001                }
10002                entry.related_entities.insert(cc);
10003            }
10004
10005            let as_of_date = journal_entries
10006                .last()
10007                .map(|je| je.header.posting_date)
10008                .unwrap_or(start_date);
10009
10010            let graph = gen.generate_entity_graph(
10011                company_code,
10012                as_of_date,
10013                &vendor_summaries,
10014                &customer_summaries,
10015                &txn_summaries,
10016            );
10017
10018            info!(
10019                "Entity relationship graph: {} nodes, {} edges",
10020                graph.nodes.len(),
10021                graph.edges.len()
10022            );
10023            stats.entity_relationship_node_count = graph.nodes.len();
10024            stats.entity_relationship_edge_count = graph.edges.len();
10025            Some(graph)
10026        } else {
10027            None
10028        };
10029
10030        // --- Part 2: Cross-Process Links ---
10031        let cross_process_links = if cpl_enabled {
10032            // Build GoodsReceiptRef from P2P chains
10033            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10034                .p2p_chains
10035                .iter()
10036                .flat_map(|chain| {
10037                    let vendor_id = chain.purchase_order.vendor_id.clone();
10038                    let cc = chain.purchase_order.header.company_code.clone();
10039                    chain.goods_receipts.iter().flat_map(move |gr| {
10040                        gr.items.iter().filter_map({
10041                            let doc_id = gr.header.document_id.clone();
10042                            let v_id = vendor_id.clone();
10043                            let company = cc.clone();
10044                            let receipt_date = gr.header.document_date;
10045                            move |item| {
10046                                item.base
10047                                    .material_id
10048                                    .as_ref()
10049                                    .map(|mat_id| GoodsReceiptRef {
10050                                        document_id: doc_id.clone(),
10051                                        material_id: mat_id.clone(),
10052                                        quantity: item.base.quantity,
10053                                        receipt_date,
10054                                        vendor_id: v_id.clone(),
10055                                        company_code: company.clone(),
10056                                    })
10057                            }
10058                        })
10059                    })
10060                })
10061                .collect();
10062
10063            // Build DeliveryRef from O2C chains
10064            let del_refs: Vec<DeliveryRef> = document_flows
10065                .o2c_chains
10066                .iter()
10067                .flat_map(|chain| {
10068                    let customer_id = chain.sales_order.customer_id.clone();
10069                    let cc = chain.sales_order.header.company_code.clone();
10070                    chain.deliveries.iter().flat_map(move |del| {
10071                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10072                        del.items.iter().filter_map({
10073                            let doc_id = del.header.document_id.clone();
10074                            let c_id = customer_id.clone();
10075                            let company = cc.clone();
10076                            move |item| {
10077                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10078                                    document_id: doc_id.clone(),
10079                                    material_id: mat_id.clone(),
10080                                    quantity: item.base.quantity,
10081                                    delivery_date,
10082                                    customer_id: c_id.clone(),
10083                                    company_code: company.clone(),
10084                                })
10085                            }
10086                        })
10087                    })
10088                })
10089                .collect();
10090
10091            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10092            info!("Cross-process links generated: {} links", links.len());
10093            stats.cross_process_link_count = links.len();
10094            links
10095        } else {
10096            Vec::new()
10097        };
10098
10099        self.check_resources_with_log("post-entity-relationships")?;
10100        Ok((entity_graph, cross_process_links))
10101    }
10102
10103    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10104    fn phase_industry_data(
10105        &self,
10106        stats: &mut EnhancedGenerationStatistics,
10107    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10108        if !self.config.industry_specific.enabled {
10109            return None;
10110        }
10111        info!("Phase 29: Generating industry-specific data");
10112        let output = datasynth_generators::industry::factory::generate_industry_output(
10113            self.config.global.industry,
10114        );
10115        stats.industry_gl_account_count = output.gl_accounts.len();
10116        info!(
10117            "Industry data generated: {} GL accounts for {:?}",
10118            output.gl_accounts.len(),
10119            self.config.global.industry
10120        );
10121        Some(output)
10122    }
10123
10124    /// Phase 3b: Generate opening balances for each company.
10125    fn phase_opening_balances(
10126        &mut self,
10127        coa: &Arc<ChartOfAccounts>,
10128        stats: &mut EnhancedGenerationStatistics,
10129    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10130        if !self.config.balance.generate_opening_balances {
10131            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10132            return Ok(Vec::new());
10133        }
10134        info!("Phase 3b: Generating Opening Balances");
10135
10136        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10137            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10138        let fiscal_year = start_date.year();
10139
10140        // **v5.3** — When the shard context supplies prior-period
10141        // opening-balance carryovers, use them directly instead of
10142        // calling `OpeningBalanceGenerator`.  This implements multi-
10143        // period continuity: period N+1 opens with period N's closing
10144        // BS positions exactly, rather than re-rolling the industry-
10145        // mix generator and losing the audit trail.
10146        //
10147        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10148        // through to the generator path — byte-identical behaviour
10149        // for single-period engagements.
10150        if let Some(ctx) = &self.shard_context {
10151            if !ctx.opening_balances.is_empty() {
10152                debug!(
10153                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10154                    ctx.opening_balances.len()
10155                );
10156                let mut results = Vec::new();
10157                for company in &self.config.companies {
10158                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10159                        .opening_balances
10160                        .iter()
10161                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10162                        .collect();
10163                    let total_assets = ctx
10164                        .opening_balances
10165                        .iter()
10166                        .filter(|ob| {
10167                            matches!(
10168                                ob.account_type,
10169                                AccountType::Asset | AccountType::ContraAsset
10170                            )
10171                        })
10172                        .map(|ob| ob.net_balance())
10173                        .sum::<rust_decimal::Decimal>();
10174                    let total_liabilities = ctx
10175                        .opening_balances
10176                        .iter()
10177                        .filter(|ob| {
10178                            matches!(
10179                                ob.account_type,
10180                                AccountType::Liability | AccountType::ContraLiability
10181                            )
10182                        })
10183                        .map(|ob| ob.net_balance())
10184                        .sum::<rust_decimal::Decimal>();
10185                    let total_equity = ctx
10186                        .opening_balances
10187                        .iter()
10188                        .filter(|ob| {
10189                            matches!(
10190                                ob.account_type,
10191                                AccountType::Equity | AccountType::ContraEquity
10192                            )
10193                        })
10194                        .map(|ob| ob.net_balance())
10195                        .sum::<rust_decimal::Decimal>();
10196                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10197                        < rust_decimal::Decimal::ONE;
10198                    results.push(GeneratedOpeningBalance {
10199                        company_code: company.code.clone(),
10200                        as_of_date: start_date,
10201                        balances,
10202                        total_assets,
10203                        total_liabilities,
10204                        total_equity,
10205                        is_balanced,
10206                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10207                            current_ratio: None,
10208                            quick_ratio: None,
10209                            debt_to_equity: None,
10210                            working_capital: rust_decimal::Decimal::ZERO,
10211                        },
10212                    });
10213                }
10214                stats.opening_balance_count = results.len();
10215                info!(
10216                    "Phase 3b: opening-balance carryover applied ({} companies)",
10217                    results.len()
10218                );
10219                self.check_resources_with_log("post-opening-balances")?;
10220                return Ok(results);
10221            }
10222        }
10223
10224        let industry = match self.config.global.industry {
10225            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10226            IndustrySector::Retail => IndustryType::Retail,
10227            IndustrySector::FinancialServices => IndustryType::Financial,
10228            IndustrySector::Healthcare => IndustryType::Healthcare,
10229            IndustrySector::Technology => IndustryType::Technology,
10230            _ => IndustryType::Manufacturing,
10231        };
10232
10233        let config = datasynth_generators::OpeningBalanceConfig {
10234            industry,
10235            ..Default::default()
10236        };
10237        let mut gen =
10238            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10239
10240        let mut results = Vec::new();
10241        for company in &self.config.companies {
10242            let spec = OpeningBalanceSpec::new(
10243                company.code.clone(),
10244                start_date,
10245                fiscal_year,
10246                company.currency.clone(),
10247                rust_decimal::Decimal::new(10_000_000, 0),
10248                industry,
10249            );
10250            let ob = gen.generate(&spec, coa, start_date, &company.code);
10251            results.push(ob);
10252        }
10253
10254        stats.opening_balance_count = results.len();
10255        info!("Opening balances generated: {} companies", results.len());
10256        self.check_resources_with_log("post-opening-balances")?;
10257
10258        Ok(results)
10259    }
10260
10261    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10262    fn phase_subledger_reconciliation(
10263        &mut self,
10264        subledger: &SubledgerSnapshot,
10265        entries: &[JournalEntry],
10266        stats: &mut EnhancedGenerationStatistics,
10267    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10268        if !self.config.balance.reconcile_subledgers {
10269            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10270            return Ok(Vec::new());
10271        }
10272        info!("Phase 9b: Reconciling GL to subledger balances");
10273
10274        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10275            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10276            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10277
10278        // Build GL balance map from journal entries using a balance tracker
10279        let tracker_config = BalanceTrackerConfig {
10280            validate_on_each_entry: false,
10281            track_history: false,
10282            fail_on_validation_error: false,
10283            ..Default::default()
10284        };
10285        let recon_currency = self
10286            .config
10287            .companies
10288            .first()
10289            .map(|c| c.currency.clone())
10290            .unwrap_or_else(|| "USD".to_string());
10291        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10292        let validation_errors = tracker.apply_entries(entries);
10293        if !validation_errors.is_empty() {
10294            warn!(
10295                error_count = validation_errors.len(),
10296                "Balance tracker encountered validation errors during subledger reconciliation"
10297            );
10298            for err in &validation_errors {
10299                debug!("Balance validation error: {:?}", err);
10300            }
10301        }
10302
10303        let mut engine = datasynth_generators::ReconciliationEngine::new(
10304            datasynth_generators::ReconciliationConfig::default(),
10305        );
10306
10307        let mut results = Vec::new();
10308        let company_code = self
10309            .config
10310            .companies
10311            .first()
10312            .map(|c| c.code.as_str())
10313            .unwrap_or("1000");
10314
10315        // Reconcile AR
10316        if !subledger.ar_invoices.is_empty() {
10317            let gl_balance = tracker
10318                .get_account_balance(
10319                    company_code,
10320                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10321                )
10322                .map(|b| b.closing_balance)
10323                .unwrap_or_default();
10324            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10325            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10326        }
10327
10328        // Reconcile AP
10329        if !subledger.ap_invoices.is_empty() {
10330            let gl_balance = tracker
10331                .get_account_balance(
10332                    company_code,
10333                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10334                )
10335                .map(|b| b.closing_balance)
10336                .unwrap_or_default();
10337            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10338            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10339        }
10340
10341        // Reconcile FA
10342        if !subledger.fa_records.is_empty() {
10343            let gl_asset_balance = tracker
10344                .get_account_balance(
10345                    company_code,
10346                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10347                )
10348                .map(|b| b.closing_balance)
10349                .unwrap_or_default();
10350            let gl_accum_depr_balance = tracker
10351                .get_account_balance(
10352                    company_code,
10353                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10354                )
10355                .map(|b| b.closing_balance)
10356                .unwrap_or_default();
10357            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10358                subledger.fa_records.iter().collect();
10359            let (asset_recon, depr_recon) = engine.reconcile_fa(
10360                company_code,
10361                end_date,
10362                gl_asset_balance,
10363                gl_accum_depr_balance,
10364                &fa_refs,
10365            );
10366            results.push(asset_recon);
10367            results.push(depr_recon);
10368        }
10369
10370        // Reconcile Inventory
10371        if !subledger.inventory_positions.is_empty() {
10372            let gl_balance = tracker
10373                .get_account_balance(
10374                    company_code,
10375                    datasynth_core::accounts::control_accounts::INVENTORY,
10376                )
10377                .map(|b| b.closing_balance)
10378                .unwrap_or_default();
10379            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10380                subledger.inventory_positions.iter().collect();
10381            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10382        }
10383
10384        stats.subledger_reconciliation_count = results.len();
10385        let passed = results.iter().filter(|r| r.is_balanced()).count();
10386        let failed = results.len() - passed;
10387        info!(
10388            "Subledger reconciliation: {} checks, {} passed, {} failed",
10389            results.len(),
10390            passed,
10391            failed
10392        );
10393        self.check_resources_with_log("post-subledger-reconciliation")?;
10394
10395        Ok(results)
10396    }
10397
10398    /// Generate the chart of accounts.
10399    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10400        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10401
10402        let coa_framework = self.resolve_coa_framework();
10403
10404        let mut gen = ChartOfAccountsGenerator::new(
10405            self.config.chart_of_accounts.complexity,
10406            self.config.global.industry,
10407            self.seed,
10408        )
10409        .with_coa_framework(coa_framework);
10410
10411        let mut built = gen.generate();
10412        // v4.4.1: propagate the accounting framework label from config
10413        // onto the CoA struct so SDK consumers can read it without
10414        // cross-referencing the config (they previously saw null).
10415        if self.config.accounting_standards.enabled {
10416            use datasynth_config::schema::AccountingFrameworkConfig;
10417            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10418                match f {
10419                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10420                    AccountingFrameworkConfig::Ifrs => "ifrs",
10421                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10422                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10423                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10424                }
10425                .to_string()
10426            });
10427        }
10428        let coa = Arc::new(built);
10429        self.coa = Some(Arc::clone(&coa));
10430
10431        if let Some(pb) = pb {
10432            pb.finish_with_message("Chart of Accounts complete");
10433        }
10434
10435        Ok(coa)
10436    }
10437
10438    /// Generate master data entities.
10439    fn generate_master_data(&mut self) -> SynthResult<()> {
10440        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10441            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10442        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10443
10444        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10445        let pb = self.create_progress_bar(total, "Generating Master Data");
10446
10447        // Resolve country pack once for all companies (uses primary company's country)
10448        let pack = self.primary_pack().clone();
10449
10450        // Capture config values needed inside the parallel closure
10451        let vendors_per_company = self.phase_config.vendors_per_company;
10452        let customers_per_company = self.phase_config.customers_per_company;
10453        let materials_per_company = self.phase_config.materials_per_company;
10454        let assets_per_company = self.phase_config.assets_per_company;
10455        let coa_framework = self.resolve_coa_framework();
10456
10457        // Generate all master data in parallel across companies.
10458        // Each company's data is independent, making this embarrassingly parallel.
10459        let per_company_results: Vec<_> = self
10460            .config
10461            .companies
10462            .par_iter()
10463            .enumerate()
10464            .map(|(i, company)| {
10465                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10466                let pack = pack.clone();
10467
10468                // Generate vendors (offset counter so IDs are globally unique across companies)
10469                let mut vendor_gen = VendorGenerator::new(company_seed);
10470                vendor_gen.set_country_pack(pack.clone());
10471                vendor_gen.set_coa_framework(coa_framework);
10472                vendor_gen.set_counter_offset(i * vendors_per_company);
10473                // v3.2.0+: user-supplied bank names (and future template
10474                // strings) flow through the shared provider.
10475                vendor_gen.set_template_provider(self.template_provider.clone());
10476                // Wire vendor network config when enabled
10477                if self.config.vendor_network.enabled {
10478                    let vn = &self.config.vendor_network;
10479                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10480                        enabled: true,
10481                        depth: vn.depth,
10482                        tier1_count: datasynth_generators::TierCountConfig::new(
10483                            vn.tier1.min,
10484                            vn.tier1.max,
10485                        ),
10486                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10487                            vn.tier2_per_parent.min,
10488                            vn.tier2_per_parent.max,
10489                        ),
10490                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10491                            vn.tier3_per_parent.min,
10492                            vn.tier3_per_parent.max,
10493                        ),
10494                        cluster_distribution: datasynth_generators::ClusterDistribution {
10495                            reliable_strategic: vn.clusters.reliable_strategic,
10496                            standard_operational: vn.clusters.standard_operational,
10497                            transactional: vn.clusters.transactional,
10498                            problematic: vn.clusters.problematic,
10499                        },
10500                        concentration_limits: datasynth_generators::ConcentrationLimits {
10501                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10502                            max_top5: vn.dependencies.top_5_concentration,
10503                        },
10504                        ..datasynth_generators::VendorNetworkConfig::default()
10505                    });
10506                }
10507                let vendor_pool =
10508                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10509
10510                // Generate customers (offset counter so IDs are globally unique across companies)
10511                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10512                customer_gen.set_country_pack(pack.clone());
10513                customer_gen.set_coa_framework(coa_framework);
10514                customer_gen.set_counter_offset(i * customers_per_company);
10515                // v3.2.0+: user-supplied customer names flow through the shared provider.
10516                customer_gen.set_template_provider(self.template_provider.clone());
10517                // Wire customer segmentation config when enabled
10518                if self.config.customer_segmentation.enabled {
10519                    let cs = &self.config.customer_segmentation;
10520                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10521                        enabled: true,
10522                        segment_distribution: datasynth_generators::SegmentDistribution {
10523                            enterprise: cs.value_segments.enterprise.customer_share,
10524                            mid_market: cs.value_segments.mid_market.customer_share,
10525                            smb: cs.value_segments.smb.customer_share,
10526                            consumer: cs.value_segments.consumer.customer_share,
10527                        },
10528                        referral_config: datasynth_generators::ReferralConfig {
10529                            enabled: cs.networks.referrals.enabled,
10530                            referral_rate: cs.networks.referrals.referral_rate,
10531                            ..Default::default()
10532                        },
10533                        hierarchy_config: datasynth_generators::HierarchyConfig {
10534                            enabled: cs.networks.corporate_hierarchies.enabled,
10535                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10536                            ..Default::default()
10537                        },
10538                        ..Default::default()
10539                    };
10540                    customer_gen.set_segmentation_config(seg_cfg);
10541                }
10542                let customer_pool = customer_gen.generate_customer_pool(
10543                    customers_per_company,
10544                    &company.code,
10545                    start_date,
10546                );
10547
10548                // Generate materials (offset counter so IDs are globally unique across companies)
10549                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10550                material_gen.set_country_pack(pack.clone());
10551                material_gen.set_counter_offset(i * materials_per_company);
10552                // v3.2.1+: user-supplied material descriptions flow through shared provider
10553                material_gen.set_template_provider(self.template_provider.clone());
10554                let material_pool = material_gen.generate_material_pool(
10555                    materials_per_company,
10556                    &company.code,
10557                    start_date,
10558                );
10559
10560                // Generate fixed assets
10561                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10562                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10563                asset_gen.set_template_provider(self.template_provider.clone());
10564                let asset_pool = asset_gen.generate_asset_pool(
10565                    assets_per_company,
10566                    &company.code,
10567                    (start_date, end_date),
10568                );
10569
10570                // Generate employees
10571                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10572                employee_gen.set_country_pack(pack);
10573                // v3.2.1+: user-supplied department names flow through shared provider
10574                employee_gen.set_template_provider(self.template_provider.clone());
10575                let employee_pool =
10576                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10577
10578                // Generate employee change history (2-5 events per employee)
10579                let employee_change_history =
10580                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10581
10582                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10583                let employee_ids: Vec<String> = employee_pool
10584                    .employees
10585                    .iter()
10586                    .map(|e| e.employee_id.clone())
10587                    .collect();
10588                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10589                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10590
10591                // v5.1: profit centre hierarchy (two-level: top-level
10592                // segment / region / product-group nodes + sub-units).
10593                let mut pc_gen =
10594                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10595                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10596
10597                (
10598                    vendor_pool.vendors,
10599                    customer_pool.customers,
10600                    material_pool.materials,
10601                    asset_pool.assets,
10602                    employee_pool.employees,
10603                    employee_change_history,
10604                    cost_centers,
10605                    profit_centers,
10606                )
10607            })
10608            .collect();
10609
10610        // Aggregate results from all companies
10611        for (
10612            vendors,
10613            customers,
10614            materials,
10615            assets,
10616            employees,
10617            change_history,
10618            cost_centers,
10619            profit_centers,
10620        ) in per_company_results
10621        {
10622            self.master_data.vendors.extend(vendors);
10623            self.master_data.customers.extend(customers);
10624            self.master_data.materials.extend(materials);
10625            self.master_data.assets.extend(assets);
10626            self.master_data.employees.extend(employees);
10627            self.master_data.cost_centers.extend(cost_centers);
10628            self.master_data.profit_centers.extend(profit_centers);
10629            self.master_data
10630                .employee_change_history
10631                .extend(change_history);
10632        }
10633
10634        // v3.3.0: one OrganizationalProfile per company. Cheap to
10635        // generate (derived from industry + company_code) so we
10636        // always emit when master data runs; no separate config flag.
10637        {
10638            use datasynth_core::models::IndustrySector;
10639            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10640            let industry = match self.config.global.industry {
10641                IndustrySector::Manufacturing => "manufacturing",
10642                IndustrySector::Retail => "retail",
10643                IndustrySector::FinancialServices => "financial_services",
10644                IndustrySector::Technology => "technology",
10645                IndustrySector::Healthcare => "healthcare",
10646                _ => "other",
10647            };
10648            for (i, company) in self.config.companies.iter().enumerate() {
10649                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10650                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10651                let profile = profile_gen.generate(&company.code, industry);
10652                self.master_data.organizational_profiles.push(profile);
10653            }
10654        }
10655
10656        if let Some(pb) = &pb {
10657            pb.inc(total);
10658        }
10659        if let Some(pb) = pb {
10660            pb.finish_with_message("Master data generation complete");
10661        }
10662
10663        Ok(())
10664    }
10665
10666    /// Generate document flows (P2P and O2C).
10667    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10668        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10669            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10670
10671        // Generate P2P chains
10672        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10673        let months = (self.config.global.period_months as usize).max(1);
10674        let p2p_count = self
10675            .phase_config
10676            .p2p_chains
10677            .min(self.master_data.vendors.len() * 2 * months);
10678        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10679
10680        // Convert P2P config from schema to generator config
10681        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10682        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10683        p2p_gen.set_country_pack(self.primary_pack().clone());
10684        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10685        // to business days. No-op when `temporal_patterns.business_days.
10686        // enabled = false`.
10687        if let Some(ctx) = &self.temporal_context {
10688            p2p_gen.set_temporal_context(Arc::clone(ctx));
10689        }
10690
10691        for i in 0..p2p_count {
10692            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10693            let materials: Vec<&Material> = self
10694                .master_data
10695                .materials
10696                .iter()
10697                .skip(i % self.master_data.materials.len().max(1))
10698                .take(2.min(self.master_data.materials.len()))
10699                .collect();
10700
10701            if materials.is_empty() {
10702                continue;
10703            }
10704
10705            let company = &self.config.companies[i % self.config.companies.len()];
10706            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10707            let fiscal_period = po_date.month() as u8;
10708            let created_by = if self.master_data.employees.is_empty() {
10709                "SYSTEM"
10710            } else {
10711                self.master_data.employees[i % self.master_data.employees.len()]
10712                    .user_id
10713                    .as_str()
10714            };
10715
10716            let chain = p2p_gen.generate_chain(
10717                &company.code,
10718                vendor,
10719                &materials,
10720                po_date,
10721                start_date.year() as u16,
10722                fiscal_period,
10723                created_by,
10724            );
10725
10726            // Flatten documents
10727            flows.purchase_orders.push(chain.purchase_order.clone());
10728            flows.goods_receipts.extend(chain.goods_receipts.clone());
10729            if let Some(vi) = &chain.vendor_invoice {
10730                flows.vendor_invoices.push(vi.clone());
10731            }
10732            if let Some(payment) = &chain.payment {
10733                flows.payments.push(payment.clone());
10734            }
10735            for remainder in &chain.remainder_payments {
10736                flows.payments.push(remainder.clone());
10737            }
10738            flows.p2p_chains.push(chain);
10739
10740            if let Some(pb) = &pb {
10741                pb.inc(1);
10742            }
10743        }
10744
10745        if let Some(pb) = pb {
10746            pb.finish_with_message("P2P document flows complete");
10747        }
10748
10749        // Generate O2C chains
10750        // Cap at ~2 SOs per customer per month to keep order volume realistic
10751        let o2c_count = self
10752            .phase_config
10753            .o2c_chains
10754            .min(self.master_data.customers.len() * 2 * months);
10755        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10756
10757        // Convert O2C config from schema to generator config
10758        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10759        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10760        o2c_gen.set_country_pack(self.primary_pack().clone());
10761        // v3.4.1: wire temporal context (no-op when business_days disabled).
10762        if let Some(ctx) = &self.temporal_context {
10763            o2c_gen.set_temporal_context(Arc::clone(ctx));
10764        }
10765
10766        for i in 0..o2c_count {
10767            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10768            let materials: Vec<&Material> = self
10769                .master_data
10770                .materials
10771                .iter()
10772                .skip(i % self.master_data.materials.len().max(1))
10773                .take(2.min(self.master_data.materials.len()))
10774                .collect();
10775
10776            if materials.is_empty() {
10777                continue;
10778            }
10779
10780            let company = &self.config.companies[i % self.config.companies.len()];
10781            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10782            let fiscal_period = so_date.month() as u8;
10783            let created_by = if self.master_data.employees.is_empty() {
10784                "SYSTEM"
10785            } else {
10786                self.master_data.employees[i % self.master_data.employees.len()]
10787                    .user_id
10788                    .as_str()
10789            };
10790
10791            let chain = o2c_gen.generate_chain(
10792                &company.code,
10793                customer,
10794                &materials,
10795                so_date,
10796                start_date.year() as u16,
10797                fiscal_period,
10798                created_by,
10799            );
10800
10801            // Flatten documents
10802            flows.sales_orders.push(chain.sales_order.clone());
10803            flows.deliveries.extend(chain.deliveries.clone());
10804            if let Some(ci) = &chain.customer_invoice {
10805                flows.customer_invoices.push(ci.clone());
10806            }
10807            if let Some(receipt) = &chain.customer_receipt {
10808                flows.payments.push(receipt.clone());
10809            }
10810            // Extract remainder receipts (follow-up to partial payments)
10811            for receipt in &chain.remainder_receipts {
10812                flows.payments.push(receipt.clone());
10813            }
10814            flows.o2c_chains.push(chain);
10815
10816            if let Some(pb) = &pb {
10817                pb.inc(1);
10818            }
10819        }
10820
10821        if let Some(pb) = pb {
10822            pb.finish_with_message("O2C document flows complete");
10823        }
10824
10825        // Collect all document cross-references from document headers.
10826        // Each document embeds references to its predecessor(s) via add_reference(); here we
10827        // denormalise them into a flat list for the document_references.json output file.
10828        {
10829            let mut refs = Vec::new();
10830            for doc in &flows.purchase_orders {
10831                refs.extend(doc.header.document_references.iter().cloned());
10832            }
10833            for doc in &flows.goods_receipts {
10834                refs.extend(doc.header.document_references.iter().cloned());
10835            }
10836            for doc in &flows.vendor_invoices {
10837                refs.extend(doc.header.document_references.iter().cloned());
10838            }
10839            for doc in &flows.sales_orders {
10840                refs.extend(doc.header.document_references.iter().cloned());
10841            }
10842            for doc in &flows.deliveries {
10843                refs.extend(doc.header.document_references.iter().cloned());
10844            }
10845            for doc in &flows.customer_invoices {
10846                refs.extend(doc.header.document_references.iter().cloned());
10847            }
10848            for doc in &flows.payments {
10849                refs.extend(doc.header.document_references.iter().cloned());
10850            }
10851            debug!(
10852                "Collected {} document cross-references from document headers",
10853                refs.len()
10854            );
10855            flows.document_references = refs;
10856        }
10857
10858        Ok(())
10859    }
10860
10861    /// Generate journal entries using parallel generation across multiple cores.
10862    fn generate_journal_entries(
10863        &mut self,
10864        coa: &Arc<ChartOfAccounts>,
10865    ) -> SynthResult<Vec<JournalEntry>> {
10866        use datasynth_core::traits::ParallelGenerator;
10867
10868        let total = self.calculate_total_transactions();
10869        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10870
10871        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10872            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10873        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10874
10875        let company_codes: Vec<String> = self
10876            .config
10877            .companies
10878            .iter()
10879            .map(|c| c.code.clone())
10880            .collect();
10881
10882        let mut generator = JournalEntryGenerator::new_with_params(
10883            self.config.transactions.clone(),
10884            Arc::clone(coa),
10885            company_codes,
10886            start_date,
10887            end_date,
10888            self.seed,
10889        );
10890        // Wire the `business_processes.*_weight` config through (phantom knob
10891        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10892        let bp = &self.config.business_processes;
10893        generator.set_business_process_weights(
10894            bp.o2c_weight,
10895            bp.p2p_weight,
10896            bp.r2r_weight,
10897            bp.h2r_weight,
10898            bp.a2r_weight,
10899        );
10900        // v3.4.0: wire advanced distributions (mixture models + industry
10901        // profiles). No-op when `distributions.enabled = false` or
10902        // `distributions.amounts.enabled = false`, preserving v3.3.2
10903        // byte-identical output on default configs.
10904        generator
10905            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10906            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10907        let generator = generator;
10908
10909        // Connect generated master data to ensure JEs reference real entities
10910        // Enable persona-based error injection for realistic human behavior
10911        // Pass fraud configuration for fraud injection
10912        let je_pack = self.primary_pack();
10913
10914        let mut generator = generator
10915            .with_master_data(
10916                &self.master_data.vendors,
10917                &self.master_data.customers,
10918                &self.master_data.materials,
10919            )
10920            .with_country_pack_names(je_pack)
10921            .with_country_pack_temporal(
10922                self.config.temporal_patterns.clone(),
10923                self.seed + 200,
10924                je_pack,
10925            )
10926            .with_persona_errors(true)
10927            .with_fraud_config(self.config.fraud.clone());
10928
10929        // Apply temporal drift if configured. v3.5.2+: also merge
10930        // `distributions.regime_changes` (regime events, economic
10931        // cycles, parameter drifts) into the same DriftConfig so both
10932        // knobs flow through the shared DriftController.
10933        let temporal_enabled = self.config.temporal.enabled;
10934        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10935        if temporal_enabled || regimes_enabled {
10936            let mut drift_config = if temporal_enabled {
10937                self.config.temporal.to_core_config()
10938            } else {
10939                // regime-changes only: start from default (drift OFF),
10940                // apply_to flips `enabled = true`.
10941                datasynth_core::distributions::DriftConfig::default()
10942            };
10943            if regimes_enabled {
10944                self.config
10945                    .distributions
10946                    .regime_changes
10947                    .apply_to(&mut drift_config, start_date);
10948            }
10949            generator = generator.with_drift_config(drift_config, self.seed + 100);
10950        }
10951
10952        // Check memory limit at start
10953        self.check_memory_limit()?;
10954
10955        // Determine parallelism: use available cores, but cap at total entries
10956        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10957
10958        // Use parallel generation for datasets with 10K+ entries.
10959        // Below this threshold, the statistical properties of a single-seeded
10960        // generator (e.g. Benford compliance) are better preserved.
10961        let entries = if total >= 10_000 && num_threads > 1 {
10962            // Parallel path: split the generator across cores and generate in parallel.
10963            // Each sub-generator gets a unique seed for deterministic, independent generation.
10964            let sub_generators = generator.split(num_threads);
10965            let entries_per_thread = total as usize / num_threads;
10966            let remainder = total as usize % num_threads;
10967
10968            let batches: Vec<Vec<JournalEntry>> = sub_generators
10969                .into_par_iter()
10970                .enumerate()
10971                .map(|(i, mut gen)| {
10972                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10973                    gen.generate_batch(count)
10974                })
10975                .collect();
10976
10977            // Merge all batches into a single Vec
10978            let entries = JournalEntryGenerator::merge_results(batches);
10979
10980            if let Some(pb) = &pb {
10981                pb.inc(total);
10982            }
10983            entries
10984        } else {
10985            // Sequential path for small datasets (< 1000 entries)
10986            let mut entries = Vec::with_capacity(total as usize);
10987            for _ in 0..total {
10988                let entry = generator.generate();
10989                entries.push(entry);
10990                if let Some(pb) = &pb {
10991                    pb.inc(1);
10992                }
10993            }
10994            entries
10995        };
10996
10997        if let Some(pb) = pb {
10998            pb.finish_with_message("Journal entries complete");
10999        }
11000
11001        Ok(entries)
11002    }
11003
11004    /// Generate journal entries from document flows.
11005    ///
11006    /// This creates proper GL entries for each document in the P2P and O2C flows,
11007    /// ensuring that document activity is reflected in the general ledger.
11008    fn generate_jes_from_document_flows(
11009        &mut self,
11010        flows: &DocumentFlowSnapshot,
11011    ) -> SynthResult<Vec<JournalEntry>> {
11012        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11013        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11014
11015        let je_config = match self.resolve_coa_framework() {
11016            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11017            CoAFramework::GermanSkr04 => {
11018                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11019                DocumentFlowJeConfig::from(&fa)
11020            }
11021            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11022        };
11023
11024        let populate_fec = je_config.populate_fec_fields;
11025        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11026
11027        // Build auxiliary account lookup from vendor/customer master data so that
11028        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11029        // PCG "4010001") instead of raw partner IDs.
11030        if populate_fec {
11031            let mut aux_lookup = std::collections::HashMap::new();
11032            for vendor in &self.master_data.vendors {
11033                if let Some(ref aux) = vendor.auxiliary_gl_account {
11034                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11035                }
11036            }
11037            for customer in &self.master_data.customers {
11038                if let Some(ref aux) = customer.auxiliary_gl_account {
11039                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11040                }
11041            }
11042            if !aux_lookup.is_empty() {
11043                generator.set_auxiliary_account_lookup(aux_lookup);
11044            }
11045        }
11046
11047        let mut entries = Vec::new();
11048
11049        // Generate JEs from P2P chains
11050        for chain in &flows.p2p_chains {
11051            let chain_entries = generator.generate_from_p2p_chain(chain);
11052            entries.extend(chain_entries);
11053            if let Some(pb) = &pb {
11054                pb.inc(1);
11055            }
11056        }
11057
11058        // Generate JEs from O2C chains
11059        for chain in &flows.o2c_chains {
11060            let chain_entries = generator.generate_from_o2c_chain(chain);
11061            entries.extend(chain_entries);
11062            if let Some(pb) = &pb {
11063                pb.inc(1);
11064            }
11065        }
11066
11067        if let Some(pb) = pb {
11068            pb.finish_with_message(format!(
11069                "Generated {} JEs from document flows",
11070                entries.len()
11071            ));
11072        }
11073
11074        Ok(entries)
11075    }
11076
11077    /// Generate journal entries from payroll runs.
11078    ///
11079    /// Creates one JE per payroll run:
11080    /// - DR Salaries & Wages (6100) for gross pay
11081    /// - CR Payroll Clearing (9100) for gross pay
11082    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11083        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11084
11085        let mut jes = Vec::with_capacity(payroll_runs.len());
11086
11087        for run in payroll_runs {
11088            let mut je = JournalEntry::new_simple(
11089                format!("JE-PAYROLL-{}", run.payroll_id),
11090                run.company_code.clone(),
11091                run.run_date,
11092                format!("Payroll {}", run.payroll_id),
11093            );
11094
11095            // Debit Salaries & Wages for gross pay
11096            je.add_line(JournalEntryLine {
11097                line_number: 1,
11098                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11099                debit_amount: run.total_gross,
11100                reference: Some(run.payroll_id.clone()),
11101                text: Some(format!(
11102                    "Payroll {} ({} employees)",
11103                    run.payroll_id, run.employee_count
11104                )),
11105                ..Default::default()
11106            });
11107
11108            // Credit Payroll Clearing for gross pay
11109            je.add_line(JournalEntryLine {
11110                line_number: 2,
11111                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11112                credit_amount: run.total_gross,
11113                reference: Some(run.payroll_id.clone()),
11114                ..Default::default()
11115            });
11116
11117            jes.push(je);
11118        }
11119
11120        jes
11121    }
11122
11123    /// Link document flows to subledger records.
11124    ///
11125    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11126    /// ensuring subledger data is coherent with document flow data.
11127    fn link_document_flows_to_subledgers(
11128        &mut self,
11129        flows: &DocumentFlowSnapshot,
11130    ) -> SynthResult<SubledgerSnapshot> {
11131        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11132        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11133
11134        // Build vendor/customer name maps from master data for realistic subledger names
11135        let vendor_names: std::collections::HashMap<String, String> = self
11136            .master_data
11137            .vendors
11138            .iter()
11139            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11140            .collect();
11141        let customer_names: std::collections::HashMap<String, String> = self
11142            .master_data
11143            .customers
11144            .iter()
11145            .map(|c| (c.customer_id.clone(), c.name.clone()))
11146            .collect();
11147
11148        let mut linker = DocumentFlowLinker::new()
11149            .with_vendor_names(vendor_names)
11150            .with_customer_names(customer_names);
11151
11152        // Convert vendor invoices to AP invoices
11153        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11154        if let Some(pb) = &pb {
11155            pb.inc(flows.vendor_invoices.len() as u64);
11156        }
11157
11158        // Convert customer invoices to AR invoices
11159        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11160        if let Some(pb) = &pb {
11161            pb.inc(flows.customer_invoices.len() as u64);
11162        }
11163
11164        if let Some(pb) = pb {
11165            pb.finish_with_message(format!(
11166                "Linked {} AP and {} AR invoices",
11167                ap_invoices.len(),
11168                ar_invoices.len()
11169            ));
11170        }
11171
11172        Ok(SubledgerSnapshot {
11173            ap_invoices,
11174            ar_invoices,
11175            fa_records: Vec::new(),
11176            inventory_positions: Vec::new(),
11177            inventory_movements: Vec::new(),
11178            // Aging reports are computed after payment settlement in phase_document_flows.
11179            ar_aging_reports: Vec::new(),
11180            ap_aging_reports: Vec::new(),
11181            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11182            depreciation_runs: Vec::new(),
11183            inventory_valuations: Vec::new(),
11184            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11185            dunning_runs: Vec::new(),
11186            dunning_letters: Vec::new(),
11187        })
11188    }
11189
11190    /// Generate OCPM events from document flows.
11191    ///
11192    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11193    /// capturing the object-centric process perspective.
11194    #[allow(clippy::too_many_arguments)]
11195    fn generate_ocpm_events(
11196        &mut self,
11197        flows: &DocumentFlowSnapshot,
11198        sourcing: &SourcingSnapshot,
11199        hr: &HrSnapshot,
11200        manufacturing: &ManufacturingSnapshot,
11201        banking: &BankingSnapshot,
11202        audit: &AuditSnapshot,
11203        financial_reporting: &FinancialReportingSnapshot,
11204    ) -> SynthResult<OcpmSnapshot> {
11205        let total_chains = flows.p2p_chains.len()
11206            + flows.o2c_chains.len()
11207            + sourcing.sourcing_projects.len()
11208            + hr.payroll_runs.len()
11209            + manufacturing.production_orders.len()
11210            + banking.customers.len()
11211            + audit.engagements.len()
11212            + financial_reporting.bank_reconciliations.len();
11213        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11214
11215        // Create OCPM event log with standard types
11216        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11217        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11218
11219        // Configure the OCPM generator
11220        let ocpm_config = OcpmGeneratorConfig {
11221            generate_p2p: true,
11222            generate_o2c: true,
11223            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11224            generate_h2r: !hr.payroll_runs.is_empty(),
11225            generate_mfg: !manufacturing.production_orders.is_empty(),
11226            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11227            generate_bank: !banking.customers.is_empty(),
11228            generate_audit: !audit.engagements.is_empty(),
11229            happy_path_rate: 0.75,
11230            exception_path_rate: 0.20,
11231            error_path_rate: 0.05,
11232            add_duration_variability: true,
11233            duration_std_dev_factor: 0.3,
11234        };
11235        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11236        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11237
11238        // Get available users for resource assignment
11239        let available_users: Vec<String> = self
11240            .master_data
11241            .employees
11242            .iter()
11243            .take(20)
11244            .map(|e| e.user_id.clone())
11245            .collect();
11246
11247        // Deterministic base date from config (avoids Utc::now() non-determinism)
11248        let fallback_date =
11249            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11250        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11251            .unwrap_or(fallback_date);
11252        let base_midnight = base_date
11253            .and_hms_opt(0, 0, 0)
11254            .expect("midnight is always valid");
11255        let base_datetime =
11256            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11257
11258        // Helper closure to add case results to event log
11259        let add_result = |event_log: &mut OcpmEventLog,
11260                          result: datasynth_ocpm::CaseGenerationResult| {
11261            for event in result.events {
11262                event_log.add_event(event);
11263            }
11264            for object in result.objects {
11265                event_log.add_object(object);
11266            }
11267            for relationship in result.relationships {
11268                event_log.add_relationship(relationship);
11269            }
11270            for corr in result.correlation_events {
11271                event_log.add_correlation_event(corr);
11272            }
11273            event_log.add_case(result.case_trace);
11274        };
11275
11276        // Generate events from P2P chains
11277        for chain in &flows.p2p_chains {
11278            let po = &chain.purchase_order;
11279            let documents = P2pDocuments::new(
11280                &po.header.document_id,
11281                &po.vendor_id,
11282                &po.header.company_code,
11283                po.total_net_amount,
11284                &po.header.currency,
11285                &ocpm_uuid_factory,
11286            )
11287            .with_goods_receipt(
11288                chain
11289                    .goods_receipts
11290                    .first()
11291                    .map(|gr| gr.header.document_id.as_str())
11292                    .unwrap_or(""),
11293                &ocpm_uuid_factory,
11294            )
11295            .with_invoice(
11296                chain
11297                    .vendor_invoice
11298                    .as_ref()
11299                    .map(|vi| vi.header.document_id.as_str())
11300                    .unwrap_or(""),
11301                &ocpm_uuid_factory,
11302            )
11303            .with_payment(
11304                chain
11305                    .payment
11306                    .as_ref()
11307                    .map(|p| p.header.document_id.as_str())
11308                    .unwrap_or(""),
11309                &ocpm_uuid_factory,
11310            );
11311
11312            let start_time =
11313                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11314            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11315            add_result(&mut event_log, result);
11316
11317            if let Some(pb) = &pb {
11318                pb.inc(1);
11319            }
11320        }
11321
11322        // Generate events from O2C chains
11323        for chain in &flows.o2c_chains {
11324            let so = &chain.sales_order;
11325            let documents = O2cDocuments::new(
11326                &so.header.document_id,
11327                &so.customer_id,
11328                &so.header.company_code,
11329                so.total_net_amount,
11330                &so.header.currency,
11331                &ocpm_uuid_factory,
11332            )
11333            .with_delivery(
11334                chain
11335                    .deliveries
11336                    .first()
11337                    .map(|d| d.header.document_id.as_str())
11338                    .unwrap_or(""),
11339                &ocpm_uuid_factory,
11340            )
11341            .with_invoice(
11342                chain
11343                    .customer_invoice
11344                    .as_ref()
11345                    .map(|ci| ci.header.document_id.as_str())
11346                    .unwrap_or(""),
11347                &ocpm_uuid_factory,
11348            )
11349            .with_receipt(
11350                chain
11351                    .customer_receipt
11352                    .as_ref()
11353                    .map(|r| r.header.document_id.as_str())
11354                    .unwrap_or(""),
11355                &ocpm_uuid_factory,
11356            );
11357
11358            let start_time =
11359                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11360            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11361            add_result(&mut event_log, result);
11362
11363            if let Some(pb) = &pb {
11364                pb.inc(1);
11365            }
11366        }
11367
11368        // Generate events from S2C sourcing projects
11369        for project in &sourcing.sourcing_projects {
11370            // Find vendor from contracts or qualifications
11371            let vendor_id = sourcing
11372                .contracts
11373                .iter()
11374                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11375                .map(|c| c.vendor_id.clone())
11376                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11377                .or_else(|| {
11378                    self.master_data
11379                        .vendors
11380                        .first()
11381                        .map(|v| v.vendor_id.clone())
11382                })
11383                .unwrap_or_else(|| "V000".to_string());
11384            let mut docs = S2cDocuments::new(
11385                &project.project_id,
11386                &vendor_id,
11387                &project.company_code,
11388                project.estimated_annual_spend,
11389                &ocpm_uuid_factory,
11390            );
11391            // Link RFx if available
11392            if let Some(rfx) = sourcing
11393                .rfx_events
11394                .iter()
11395                .find(|r| r.sourcing_project_id == project.project_id)
11396            {
11397                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11398                // Link winning bid (status == Accepted)
11399                if let Some(bid) = sourcing.bids.iter().find(|b| {
11400                    b.rfx_id == rfx.rfx_id
11401                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11402                }) {
11403                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11404                }
11405            }
11406            // Link contract
11407            if let Some(contract) = sourcing
11408                .contracts
11409                .iter()
11410                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11411            {
11412                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11413            }
11414            let start_time = base_datetime - chrono::Duration::days(90);
11415            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11416            add_result(&mut event_log, result);
11417
11418            if let Some(pb) = &pb {
11419                pb.inc(1);
11420            }
11421        }
11422
11423        // Generate events from H2R payroll runs
11424        for run in &hr.payroll_runs {
11425            // Use first matching payroll line item's employee, or fallback
11426            let employee_id = hr
11427                .payroll_line_items
11428                .iter()
11429                .find(|li| li.payroll_id == run.payroll_id)
11430                .map(|li| li.employee_id.as_str())
11431                .unwrap_or("EMP000");
11432            let docs = H2rDocuments::new(
11433                &run.payroll_id,
11434                employee_id,
11435                &run.company_code,
11436                run.total_gross,
11437                &ocpm_uuid_factory,
11438            )
11439            .with_time_entries(
11440                hr.time_entries
11441                    .iter()
11442                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11443                    .take(5)
11444                    .map(|t| t.entry_id.as_str())
11445                    .collect(),
11446            );
11447            let start_time = base_datetime - chrono::Duration::days(30);
11448            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11449            add_result(&mut event_log, result);
11450
11451            if let Some(pb) = &pb {
11452                pb.inc(1);
11453            }
11454        }
11455
11456        // Generate events from MFG production orders
11457        for order in &manufacturing.production_orders {
11458            let mut docs = MfgDocuments::new(
11459                &order.order_id,
11460                &order.material_id,
11461                &order.company_code,
11462                order.planned_quantity,
11463                &ocpm_uuid_factory,
11464            )
11465            .with_operations(
11466                order
11467                    .operations
11468                    .iter()
11469                    .map(|o| format!("OP-{:04}", o.operation_number))
11470                    .collect::<Vec<_>>()
11471                    .iter()
11472                    .map(std::string::String::as_str)
11473                    .collect(),
11474            );
11475            // Link quality inspection if available (via reference_id matching order_id)
11476            if let Some(insp) = manufacturing
11477                .quality_inspections
11478                .iter()
11479                .find(|i| i.reference_id == order.order_id)
11480            {
11481                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11482            }
11483            // Link cycle count if available (match by material_id in items)
11484            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11485                cc.items
11486                    .iter()
11487                    .any(|item| item.material_id == order.material_id)
11488            }) {
11489                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11490            }
11491            let start_time = base_datetime - chrono::Duration::days(60);
11492            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11493            add_result(&mut event_log, result);
11494
11495            if let Some(pb) = &pb {
11496                pb.inc(1);
11497            }
11498        }
11499
11500        // Generate events from Banking customers
11501        for customer in &banking.customers {
11502            let customer_id_str = customer.customer_id.to_string();
11503            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11504            // Link accounts (primary_owner_id matches customer_id)
11505            if let Some(account) = banking
11506                .accounts
11507                .iter()
11508                .find(|a| a.primary_owner_id == customer.customer_id)
11509            {
11510                let account_id_str = account.account_id.to_string();
11511                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11512                // Link transactions for this account
11513                let txn_strs: Vec<String> = banking
11514                    .transactions
11515                    .iter()
11516                    .filter(|t| t.account_id == account.account_id)
11517                    .take(10)
11518                    .map(|t| t.transaction_id.to_string())
11519                    .collect();
11520                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11521                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11522                    .transactions
11523                    .iter()
11524                    .filter(|t| t.account_id == account.account_id)
11525                    .take(10)
11526                    .map(|t| t.amount)
11527                    .collect();
11528                if !txn_ids.is_empty() {
11529                    docs = docs.with_transactions(txn_ids, txn_amounts);
11530                }
11531            }
11532            let start_time = base_datetime - chrono::Duration::days(180);
11533            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11534            add_result(&mut event_log, result);
11535
11536            if let Some(pb) = &pb {
11537                pb.inc(1);
11538            }
11539        }
11540
11541        // Generate events from Audit engagements
11542        for engagement in &audit.engagements {
11543            let engagement_id_str = engagement.engagement_id.to_string();
11544            let docs = AuditDocuments::new(
11545                &engagement_id_str,
11546                &engagement.client_entity_id,
11547                &ocpm_uuid_factory,
11548            )
11549            .with_workpapers(
11550                audit
11551                    .workpapers
11552                    .iter()
11553                    .filter(|w| w.engagement_id == engagement.engagement_id)
11554                    .take(10)
11555                    .map(|w| w.workpaper_id.to_string())
11556                    .collect::<Vec<_>>()
11557                    .iter()
11558                    .map(std::string::String::as_str)
11559                    .collect(),
11560            )
11561            .with_evidence(
11562                audit
11563                    .evidence
11564                    .iter()
11565                    .filter(|e| e.engagement_id == engagement.engagement_id)
11566                    .take(10)
11567                    .map(|e| e.evidence_id.to_string())
11568                    .collect::<Vec<_>>()
11569                    .iter()
11570                    .map(std::string::String::as_str)
11571                    .collect(),
11572            )
11573            .with_risks(
11574                audit
11575                    .risk_assessments
11576                    .iter()
11577                    .filter(|r| r.engagement_id == engagement.engagement_id)
11578                    .take(5)
11579                    .map(|r| r.risk_id.to_string())
11580                    .collect::<Vec<_>>()
11581                    .iter()
11582                    .map(std::string::String::as_str)
11583                    .collect(),
11584            )
11585            .with_findings(
11586                audit
11587                    .findings
11588                    .iter()
11589                    .filter(|f| f.engagement_id == engagement.engagement_id)
11590                    .take(5)
11591                    .map(|f| f.finding_id.to_string())
11592                    .collect::<Vec<_>>()
11593                    .iter()
11594                    .map(std::string::String::as_str)
11595                    .collect(),
11596            )
11597            .with_judgments(
11598                audit
11599                    .judgments
11600                    .iter()
11601                    .filter(|j| j.engagement_id == engagement.engagement_id)
11602                    .take(5)
11603                    .map(|j| j.judgment_id.to_string())
11604                    .collect::<Vec<_>>()
11605                    .iter()
11606                    .map(std::string::String::as_str)
11607                    .collect(),
11608            );
11609            let start_time = base_datetime - chrono::Duration::days(120);
11610            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11611            add_result(&mut event_log, result);
11612
11613            if let Some(pb) = &pb {
11614                pb.inc(1);
11615            }
11616        }
11617
11618        // Generate events from Bank Reconciliations
11619        for recon in &financial_reporting.bank_reconciliations {
11620            let docs = BankReconDocuments::new(
11621                &recon.reconciliation_id,
11622                &recon.bank_account_id,
11623                &recon.company_code,
11624                recon.bank_ending_balance,
11625                &ocpm_uuid_factory,
11626            )
11627            .with_statement_lines(
11628                recon
11629                    .statement_lines
11630                    .iter()
11631                    .take(20)
11632                    .map(|l| l.line_id.as_str())
11633                    .collect(),
11634            )
11635            .with_reconciling_items(
11636                recon
11637                    .reconciling_items
11638                    .iter()
11639                    .take(10)
11640                    .map(|i| i.item_id.as_str())
11641                    .collect(),
11642            );
11643            let start_time = base_datetime - chrono::Duration::days(30);
11644            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11645            add_result(&mut event_log, result);
11646
11647            if let Some(pb) = &pb {
11648                pb.inc(1);
11649            }
11650        }
11651
11652        // Compute process variants
11653        event_log.compute_variants();
11654
11655        let summary = event_log.summary();
11656
11657        if let Some(pb) = pb {
11658            pb.finish_with_message(format!(
11659                "Generated {} OCPM events, {} objects",
11660                summary.event_count, summary.object_count
11661            ));
11662        }
11663
11664        Ok(OcpmSnapshot {
11665            event_count: summary.event_count,
11666            object_count: summary.object_count,
11667            case_count: summary.case_count,
11668            event_log: Some(event_log),
11669        })
11670    }
11671
11672    /// Inject anomalies into journal entries.
11673    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11674        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11675
11676        // Read anomaly rates from config instead of using hardcoded values.
11677        // Priority: anomaly_injection config > fraud config > default 0.02
11678        let total_rate = if self.config.anomaly_injection.enabled {
11679            self.config.anomaly_injection.rates.total_rate
11680        } else if self.config.fraud.enabled {
11681            self.config.fraud.fraud_rate
11682        } else {
11683            0.02
11684        };
11685
11686        let fraud_rate = if self.config.anomaly_injection.enabled {
11687            self.config.anomaly_injection.rates.fraud_rate
11688        } else {
11689            AnomalyRateConfig::default().fraud_rate
11690        };
11691
11692        let error_rate = if self.config.anomaly_injection.enabled {
11693            self.config.anomaly_injection.rates.error_rate
11694        } else {
11695            AnomalyRateConfig::default().error_rate
11696        };
11697
11698        let process_issue_rate = if self.config.anomaly_injection.enabled {
11699            self.config.anomaly_injection.rates.process_rate
11700        } else {
11701            AnomalyRateConfig::default().process_issue_rate
11702        };
11703
11704        let anomaly_config = AnomalyInjectorConfig {
11705            rates: AnomalyRateConfig {
11706                total_rate,
11707                fraud_rate,
11708                error_rate,
11709                process_issue_rate,
11710                ..Default::default()
11711            },
11712            seed: self.seed + 5000,
11713            ..Default::default()
11714        };
11715
11716        let mut injector = AnomalyInjector::new(anomaly_config);
11717        let result = injector.process_entries(entries);
11718
11719        if let Some(pb) = &pb {
11720            pb.inc(entries.len() as u64);
11721            pb.finish_with_message("Anomaly injection complete");
11722        }
11723
11724        let mut by_type = HashMap::new();
11725        for label in &result.labels {
11726            *by_type
11727                .entry(format!("{:?}", label.anomaly_type))
11728                .or_insert(0) += 1;
11729        }
11730
11731        Ok(AnomalyLabels {
11732            labels: result.labels,
11733            summary: Some(result.summary),
11734            by_type,
11735        })
11736    }
11737
11738    /// Validate journal entries using running balance tracker.
11739    ///
11740    /// Applies all entries to the balance tracker and validates:
11741    /// - Each entry is internally balanced (debits = credits)
11742    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11743    ///
11744    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11745    /// excluded from balance validation as they may be intentionally unbalanced.
11746    fn validate_journal_entries(
11747        &mut self,
11748        entries: &[JournalEntry],
11749    ) -> SynthResult<BalanceValidationResult> {
11750        // Filter out entries with human errors as they may be intentionally unbalanced
11751        let clean_entries: Vec<&JournalEntry> = entries
11752            .iter()
11753            .filter(|e| {
11754                e.header
11755                    .header_text
11756                    .as_ref()
11757                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11758                    .unwrap_or(true)
11759            })
11760            .collect();
11761
11762        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11763
11764        // Configure tracker to not fail on errors (collect them instead)
11765        let config = BalanceTrackerConfig {
11766            validate_on_each_entry: false,   // We'll validate at the end
11767            track_history: false,            // Skip history for performance
11768            fail_on_validation_error: false, // Collect errors, don't fail
11769            ..Default::default()
11770        };
11771        let validation_currency = self
11772            .config
11773            .companies
11774            .first()
11775            .map(|c| c.currency.clone())
11776            .unwrap_or_else(|| "USD".to_string());
11777
11778        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11779
11780        // Apply clean entries (without human errors)
11781        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11782        let errors = tracker.apply_entries(&clean_refs);
11783
11784        if let Some(pb) = &pb {
11785            pb.inc(entries.len() as u64);
11786        }
11787
11788        // Check if any entries were unbalanced
11789        // Note: When fail_on_validation_error is false, errors are stored in tracker
11790        let has_unbalanced = tracker
11791            .get_validation_errors()
11792            .iter()
11793            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11794
11795        // Validate balance sheet for each company
11796        // Include both returned errors and collected validation errors
11797        let mut all_errors = errors;
11798        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11799        let company_codes: Vec<String> = self
11800            .config
11801            .companies
11802            .iter()
11803            .map(|c| c.code.clone())
11804            .collect();
11805
11806        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11807            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11808            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11809
11810        for company_code in &company_codes {
11811            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11812                all_errors.push(e);
11813            }
11814        }
11815
11816        // Get statistics after all mutable operations are done
11817        let stats = tracker.get_statistics();
11818
11819        // Determine if balanced overall
11820        let is_balanced = all_errors.is_empty();
11821
11822        if let Some(pb) = pb {
11823            let msg = if is_balanced {
11824                "Balance validation passed"
11825            } else {
11826                "Balance validation completed with errors"
11827            };
11828            pb.finish_with_message(msg);
11829        }
11830
11831        Ok(BalanceValidationResult {
11832            validated: true,
11833            is_balanced,
11834            entries_processed: stats.entries_processed,
11835            total_debits: stats.total_debits,
11836            total_credits: stats.total_credits,
11837            accounts_tracked: stats.accounts_tracked,
11838            companies_tracked: stats.companies_tracked,
11839            validation_errors: all_errors,
11840            has_unbalanced_entries: has_unbalanced,
11841        })
11842    }
11843
11844    /// Inject data quality variations into journal entries.
11845    ///
11846    /// Applies typos, missing values, and format variations to make
11847    /// the synthetic data more realistic for testing data cleaning pipelines.
11848    fn inject_data_quality(
11849        &mut self,
11850        entries: &mut [JournalEntry],
11851    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11852        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11853
11854        // Build config from user-specified schema settings when data_quality is enabled;
11855        // otherwise fall back to the low-rate minimal() preset.
11856        let config = if self.config.data_quality.enabled {
11857            let dq = &self.config.data_quality;
11858            DataQualityConfig {
11859                enable_missing_values: dq.missing_values.enabled,
11860                missing_values: datasynth_generators::MissingValueConfig {
11861                    global_rate: dq.effective_missing_rate(),
11862                    ..Default::default()
11863                },
11864                enable_format_variations: dq.format_variations.enabled,
11865                format_variations: datasynth_generators::FormatVariationConfig {
11866                    date_variation_rate: dq.format_variations.dates.rate,
11867                    amount_variation_rate: dq.format_variations.amounts.rate,
11868                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11869                    ..Default::default()
11870                },
11871                enable_duplicates: dq.duplicates.enabled,
11872                duplicates: datasynth_generators::DuplicateConfig {
11873                    duplicate_rate: dq.effective_duplicate_rate(),
11874                    ..Default::default()
11875                },
11876                enable_typos: dq.typos.enabled,
11877                typos: datasynth_generators::TypoConfig {
11878                    char_error_rate: dq.effective_typo_rate(),
11879                    ..Default::default()
11880                },
11881                enable_encoding_issues: dq.encoding_issues.enabled,
11882                encoding_issue_rate: dq.encoding_issues.rate,
11883                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11884                track_statistics: true,
11885            }
11886        } else {
11887            DataQualityConfig::minimal()
11888        };
11889        let mut injector = DataQualityInjector::new(config);
11890
11891        // Wire country pack for locale-aware format baselines
11892        injector.set_country_pack(self.primary_pack().clone());
11893
11894        // Build context for missing value decisions
11895        let context = HashMap::new();
11896
11897        for entry in entries.iter_mut() {
11898            // Process header_text field (common target for typos)
11899            if let Some(text) = &entry.header.header_text {
11900                let processed = injector.process_text_field(
11901                    "header_text",
11902                    text,
11903                    &entry.header.document_id.to_string(),
11904                    &context,
11905                );
11906                match processed {
11907                    Some(new_text) if new_text != *text => {
11908                        entry.header.header_text = Some(new_text);
11909                    }
11910                    None => {
11911                        entry.header.header_text = None; // Missing value
11912                    }
11913                    _ => {}
11914                }
11915            }
11916
11917            // Process reference field
11918            if let Some(ref_text) = &entry.header.reference {
11919                let processed = injector.process_text_field(
11920                    "reference",
11921                    ref_text,
11922                    &entry.header.document_id.to_string(),
11923                    &context,
11924                );
11925                match processed {
11926                    Some(new_text) if new_text != *ref_text => {
11927                        entry.header.reference = Some(new_text);
11928                    }
11929                    None => {
11930                        entry.header.reference = None;
11931                    }
11932                    _ => {}
11933                }
11934            }
11935
11936            // Process user_persona field (potential for typos in user IDs)
11937            let user_persona = entry.header.user_persona.clone();
11938            if let Some(processed) = injector.process_text_field(
11939                "user_persona",
11940                &user_persona,
11941                &entry.header.document_id.to_string(),
11942                &context,
11943            ) {
11944                if processed != user_persona {
11945                    entry.header.user_persona = processed;
11946                }
11947            }
11948
11949            // Process line items
11950            for line in &mut entry.lines {
11951                // Process line description if present
11952                if let Some(ref text) = line.line_text {
11953                    let processed = injector.process_text_field(
11954                        "line_text",
11955                        text,
11956                        &entry.header.document_id.to_string(),
11957                        &context,
11958                    );
11959                    match processed {
11960                        Some(new_text) if new_text != *text => {
11961                            line.line_text = Some(new_text);
11962                        }
11963                        None => {
11964                            line.line_text = None;
11965                        }
11966                        _ => {}
11967                    }
11968                }
11969
11970                // Process cost_center if present
11971                if let Some(cc) = &line.cost_center {
11972                    let processed = injector.process_text_field(
11973                        "cost_center",
11974                        cc,
11975                        &entry.header.document_id.to_string(),
11976                        &context,
11977                    );
11978                    match processed {
11979                        Some(new_cc) if new_cc != *cc => {
11980                            line.cost_center = Some(new_cc);
11981                        }
11982                        None => {
11983                            line.cost_center = None;
11984                        }
11985                        _ => {}
11986                    }
11987                }
11988            }
11989
11990            if let Some(pb) = &pb {
11991                pb.inc(1);
11992            }
11993        }
11994
11995        if let Some(pb) = pb {
11996            pb.finish_with_message("Data quality injection complete");
11997        }
11998
11999        let quality_issues = injector.issues().to_vec();
12000        Ok((injector.stats().clone(), quality_issues))
12001    }
12002
12003    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12004    ///
12005    /// Creates complete audit documentation for each company in the configuration,
12006    /// following ISA standards:
12007    /// - ISA 210/220: Engagement acceptance and terms
12008    /// - ISA 230: Audit documentation (workpapers)
12009    /// - ISA 265: Control deficiencies (findings)
12010    /// - ISA 315/330: Risk assessment and response
12011    /// - ISA 500: Audit evidence
12012    /// - ISA 200: Professional judgment
12013    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12014        // Check if FSM-driven audit generation is enabled
12015        let use_fsm = self
12016            .config
12017            .audit
12018            .fsm
12019            .as_ref()
12020            .map(|f| f.enabled)
12021            .unwrap_or(false);
12022
12023        if use_fsm {
12024            return self.generate_audit_data_with_fsm(entries);
12025        }
12026
12027        // --- Legacy (non-FSM) audit generation follows ---
12028        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12029            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12030        let fiscal_year = start_date.year() as u16;
12031        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12032
12033        // Calculate rough total revenue from entries for materiality
12034        let total_revenue: rust_decimal::Decimal = entries
12035            .iter()
12036            .flat_map(|e| e.lines.iter())
12037            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12038            .map(|l| l.credit_amount)
12039            .sum();
12040
12041        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12042        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12043
12044        let mut snapshot = AuditSnapshot::default();
12045
12046        // Initialize generators
12047        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12048        // v3.3.2: thread the user-facing audit schema config into the
12049        // engagement generator (team size range).
12050        engagement_gen.set_team_config(&self.config.audit.team);
12051
12052        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12053        // v3.3.2: thread workpaper + review workflow schema config into
12054        // the workpaper generator (per-section count range + review
12055        // delay ranges).
12056        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12057        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12058        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12059        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12060        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12061        finding_gen.set_template_provider(self.template_provider.clone());
12062        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12063        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12064        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12065        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12066        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12067        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12068        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12069
12070        // Get list of accounts from CoA for risk assessment
12071        let accounts: Vec<String> = self
12072            .coa
12073            .as_ref()
12074            .map(|coa| {
12075                coa.get_postable_accounts()
12076                    .iter()
12077                    .map(|acc| acc.account_code().to_string())
12078                    .collect()
12079            })
12080            .unwrap_or_default();
12081
12082        // Generate engagements for each company
12083        for (i, company) in self.config.companies.iter().enumerate() {
12084            // Calculate company-specific revenue (proportional to volume weight)
12085            let company_revenue = total_revenue
12086                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12087
12088            // Generate engagements for this company
12089            let engagements_for_company =
12090                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12091            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12092                1
12093            } else {
12094                0
12095            };
12096
12097            for _eng_idx in 0..(engagements_for_company + extra) {
12098                // v3.3.2: draw engagement type from the user-configured
12099                // distribution instead of always using the default
12100                // (AnnualAudit). Falls back to the default when all
12101                // probabilities are zero.
12102                let eng_type =
12103                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12104
12105                // Generate the engagement
12106                let mut engagement = engagement_gen.generate_engagement(
12107                    &company.code,
12108                    &company.name,
12109                    fiscal_year,
12110                    period_end,
12111                    company_revenue,
12112                    Some(eng_type),
12113                );
12114
12115                // Replace synthetic team IDs with real employee IDs from master data
12116                if !self.master_data.employees.is_empty() {
12117                    let emp_count = self.master_data.employees.len();
12118                    // Use employee IDs deterministically based on engagement index
12119                    let base = (i * 10 + _eng_idx) % emp_count;
12120                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12121                        .employee_id
12122                        .clone();
12123                    engagement.engagement_manager_id = self.master_data.employees
12124                        [(base + 1) % emp_count]
12125                        .employee_id
12126                        .clone();
12127                    let real_team: Vec<String> = engagement
12128                        .team_member_ids
12129                        .iter()
12130                        .enumerate()
12131                        .map(|(j, _)| {
12132                            self.master_data.employees[(base + 2 + j) % emp_count]
12133                                .employee_id
12134                                .clone()
12135                        })
12136                        .collect();
12137                    engagement.team_member_ids = real_team;
12138                }
12139
12140                if let Some(pb) = &pb {
12141                    pb.inc(1);
12142                }
12143
12144                // Get team members from the engagement
12145                let team_members: Vec<String> = engagement.team_member_ids.clone();
12146
12147                // Generate workpapers for the engagement.
12148                // v3.3.2: honor `audit.generate_workpapers` — when false,
12149                // workpapers (and dependent evidence) are skipped while
12150                // the engagement itself, risk assessments, findings, etc.
12151                // still generate normally.
12152                let workpapers = if self.config.audit.generate_workpapers {
12153                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12154                } else {
12155                    Vec::new()
12156                };
12157
12158                for wp in &workpapers {
12159                    if let Some(pb) = &pb {
12160                        pb.inc(1);
12161                    }
12162
12163                    // Generate evidence for each workpaper
12164                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12165                        wp,
12166                        &team_members,
12167                        wp.preparer_date,
12168                    );
12169
12170                    for _ in &evidence {
12171                        if let Some(pb) = &pb {
12172                            pb.inc(1);
12173                        }
12174                    }
12175
12176                    snapshot.evidence.extend(evidence);
12177                }
12178
12179                // Generate risk assessments for the engagement
12180                let risks =
12181                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12182
12183                for _ in &risks {
12184                    if let Some(pb) = &pb {
12185                        pb.inc(1);
12186                    }
12187                }
12188                snapshot.risk_assessments.extend(risks);
12189
12190                // Generate findings for the engagement
12191                let findings = finding_gen.generate_findings_for_engagement(
12192                    &engagement,
12193                    &workpapers,
12194                    &team_members,
12195                );
12196
12197                for _ in &findings {
12198                    if let Some(pb) = &pb {
12199                        pb.inc(1);
12200                    }
12201                }
12202                snapshot.findings.extend(findings);
12203
12204                // Generate professional judgments for the engagement
12205                let judgments =
12206                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12207
12208                for _ in &judgments {
12209                    if let Some(pb) = &pb {
12210                        pb.inc(1);
12211                    }
12212                }
12213                snapshot.judgments.extend(judgments);
12214
12215                // ISA 505: External confirmations and responses
12216                let (confs, resps) =
12217                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12218                snapshot.confirmations.extend(confs);
12219                snapshot.confirmation_responses.extend(resps);
12220
12221                // ISA 330: Procedure steps per workpaper
12222                let team_pairs: Vec<(String, String)> = team_members
12223                    .iter()
12224                    .map(|id| {
12225                        let name = self
12226                            .master_data
12227                            .employees
12228                            .iter()
12229                            .find(|e| e.employee_id == *id)
12230                            .map(|e| e.display_name.clone())
12231                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12232                        (id.clone(), name)
12233                    })
12234                    .collect();
12235                for wp in &workpapers {
12236                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12237                    snapshot.procedure_steps.extend(steps);
12238                }
12239
12240                // ISA 530: Samples per workpaper
12241                for wp in &workpapers {
12242                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12243                        snapshot.samples.push(sample);
12244                    }
12245                }
12246
12247                // ISA 520: Analytical procedures
12248                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12249                snapshot.analytical_results.extend(analytical);
12250
12251                // ISA 610: Internal audit function and reports
12252                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12253                snapshot.ia_functions.push(ia_func);
12254                snapshot.ia_reports.extend(ia_reports);
12255
12256                // ISA 550: Related parties and transactions
12257                let vendor_names: Vec<String> = self
12258                    .master_data
12259                    .vendors
12260                    .iter()
12261                    .map(|v| v.name.clone())
12262                    .collect();
12263                let customer_names: Vec<String> = self
12264                    .master_data
12265                    .customers
12266                    .iter()
12267                    .map(|c| c.name.clone())
12268                    .collect();
12269                let (parties, rp_txns) =
12270                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12271                snapshot.related_parties.extend(parties);
12272                snapshot.related_party_transactions.extend(rp_txns);
12273
12274                // Add workpapers after findings since findings need them
12275                snapshot.workpapers.extend(workpapers);
12276
12277                // Generate audit scope record for this engagement (one per engagement)
12278                {
12279                    let scope_id = format!(
12280                        "SCOPE-{}-{}",
12281                        engagement.engagement_id.simple(),
12282                        &engagement.client_entity_id
12283                    );
12284                    let scope = datasynth_core::models::audit::AuditScope::new(
12285                        scope_id.clone(),
12286                        engagement.engagement_id.to_string(),
12287                        engagement.client_entity_id.clone(),
12288                        engagement.materiality,
12289                    );
12290                    // Wire scope_id back to engagement
12291                    let mut eng = engagement;
12292                    eng.scope_id = Some(scope_id);
12293                    snapshot.audit_scopes.push(scope);
12294                    snapshot.engagements.push(eng);
12295                }
12296            }
12297        }
12298
12299        // ----------------------------------------------------------------
12300        // ISA 600: Group audit — component auditors, plan, instructions, reports
12301        // ----------------------------------------------------------------
12302        if self.config.companies.len() > 1 {
12303            // Use materiality from the first engagement if available, otherwise
12304            // derive a reasonable figure from total revenue.
12305            let group_materiality = snapshot
12306                .engagements
12307                .first()
12308                .map(|e| e.materiality)
12309                .unwrap_or_else(|| {
12310                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12311                    total_revenue * pct
12312                });
12313
12314            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12315            let group_engagement_id = snapshot
12316                .engagements
12317                .first()
12318                .map(|e| e.engagement_id.to_string())
12319                .unwrap_or_else(|| "GROUP-ENG".to_string());
12320
12321            let component_snapshot = component_gen.generate(
12322                &self.config.companies,
12323                group_materiality,
12324                &group_engagement_id,
12325                period_end,
12326            );
12327
12328            snapshot.component_auditors = component_snapshot.component_auditors;
12329            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12330            snapshot.component_instructions = component_snapshot.component_instructions;
12331            snapshot.component_reports = component_snapshot.component_reports;
12332
12333            info!(
12334                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12335                snapshot.component_auditors.len(),
12336                snapshot.component_instructions.len(),
12337                snapshot.component_reports.len(),
12338            );
12339        }
12340
12341        // ----------------------------------------------------------------
12342        // ISA 210: Engagement letters — one per engagement
12343        // ----------------------------------------------------------------
12344        {
12345            let applicable_framework = self
12346                .config
12347                .accounting_standards
12348                .framework
12349                .as_ref()
12350                .map(|f| format!("{f:?}"))
12351                .unwrap_or_else(|| "IFRS".to_string());
12352
12353            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12354            let entity_count = self.config.companies.len();
12355
12356            for engagement in &snapshot.engagements {
12357                let company = self
12358                    .config
12359                    .companies
12360                    .iter()
12361                    .find(|c| c.code == engagement.client_entity_id);
12362                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12363                let letter_date = engagement.planning_start;
12364                let letter = letter_gen.generate(
12365                    &engagement.engagement_id.to_string(),
12366                    &engagement.client_name,
12367                    entity_count,
12368                    engagement.period_end_date,
12369                    currency,
12370                    &applicable_framework,
12371                    letter_date,
12372                );
12373                snapshot.engagement_letters.push(letter);
12374            }
12375
12376            info!(
12377                "ISA 210 engagement letters: {} generated",
12378                snapshot.engagement_letters.len()
12379            );
12380        }
12381
12382        // ----------------------------------------------------------------
12383        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12384        // ----------------------------------------------------------------
12385        if self.phase_config.generate_legal_documents {
12386            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12387            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12388            for engagement in &snapshot.engagements {
12389                // Build an employee name list for signatory drawing —
12390                // prefer employees from the engaged entity, fall back to
12391                // all employees.
12392                let employee_names: Vec<String> = self
12393                    .master_data
12394                    .employees
12395                    .iter()
12396                    .filter(|e| e.company_code == engagement.client_entity_id)
12397                    .map(|e| e.display_name.clone())
12398                    .collect();
12399                let names_to_use = if !employee_names.is_empty() {
12400                    employee_names
12401                } else {
12402                    self.master_data
12403                        .employees
12404                        .iter()
12405                        .take(10)
12406                        .map(|e| e.display_name.clone())
12407                        .collect()
12408                };
12409                let docs = legal_gen.generate(
12410                    &engagement.client_entity_id,
12411                    engagement.fiscal_year as i32,
12412                    &names_to_use,
12413                );
12414                snapshot.legal_documents.extend(docs);
12415            }
12416            info!(
12417                "v3.3.0 legal documents: {} emitted across {} engagements",
12418                snapshot.legal_documents.len(),
12419                snapshot.engagements.len()
12420            );
12421        }
12422
12423        // ----------------------------------------------------------------
12424        // v3.3.0: IT general controls — access logs + change records
12425        //
12426        // `ItControlsGenerator` runs one pass per company (not per
12427        // engagement) so employee sets and system catalogs stay
12428        // coherent. We derive the period from the earliest engagement's
12429        // planning_start through the latest engagement's period_end_date
12430        // for each company.
12431        // ----------------------------------------------------------------
12432        if self.phase_config.generate_it_controls {
12433            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12434            use std::collections::HashMap;
12435            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12436
12437            // Group engagements by company to produce one IT-controls
12438            // window per entity.
12439            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12440                HashMap::new();
12441            for engagement in &snapshot.engagements {
12442                let entry = by_company
12443                    .entry(engagement.client_entity_id.clone())
12444                    .or_insert((engagement.planning_start, engagement.period_end_date));
12445                if engagement.planning_start < entry.0 {
12446                    entry.0 = engagement.planning_start;
12447                }
12448                if engagement.period_end_date > entry.1 {
12449                    entry.1 = engagement.period_end_date;
12450                }
12451            }
12452
12453            // Standard system catalog — populated from known ERP / app
12454            // names. Keeps the generator's data shape stable when the
12455            // user hasn't configured IT-system naming separately.
12456            let systems: Vec<String> = vec![
12457                "SAP ECC",
12458                "SAP S/4 HANA",
12459                "Oracle EBS",
12460                "Workday",
12461                "NetSuite",
12462                "Active Directory",
12463                "SharePoint",
12464                "Salesforce",
12465                "ServiceNow",
12466                "Jira",
12467                "GitHub Enterprise",
12468                "AWS Console",
12469                "Okta",
12470            ]
12471            .into_iter()
12472            .map(String::from)
12473            .collect();
12474
12475            for (company_code, (start, end)) in by_company {
12476                let emps: Vec<(String, String)> = self
12477                    .master_data
12478                    .employees
12479                    .iter()
12480                    .filter(|e| e.company_code == company_code)
12481                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12482                    .collect();
12483                if emps.is_empty() {
12484                    continue;
12485                }
12486                // Compute period in months, rounded up to the nearest
12487                // whole month (min 1).
12488                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12489                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12490                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12491                snapshot.it_controls_access_logs.extend(access_logs);
12492                snapshot.it_controls_change_records.extend(change_records);
12493            }
12494
12495            info!(
12496                "v3.3.0 IT controls: {} access logs, {} change records",
12497                snapshot.it_controls_access_logs.len(),
12498                snapshot.it_controls_change_records.len()
12499            );
12500        }
12501
12502        // ----------------------------------------------------------------
12503        // ISA 560 / IAS 10: Subsequent events
12504        // ----------------------------------------------------------------
12505        {
12506            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12507            let entity_codes: Vec<String> = self
12508                .config
12509                .companies
12510                .iter()
12511                .map(|c| c.code.clone())
12512                .collect();
12513            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12514            info!(
12515                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12516                subsequent.len(),
12517                subsequent
12518                    .iter()
12519                    .filter(|e| matches!(
12520                        e.classification,
12521                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12522                    ))
12523                    .count(),
12524                subsequent
12525                    .iter()
12526                    .filter(|e| matches!(
12527                        e.classification,
12528                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12529                    ))
12530                    .count(),
12531            );
12532            snapshot.subsequent_events = subsequent;
12533        }
12534
12535        // ----------------------------------------------------------------
12536        // ISA 402: Service organization controls
12537        // ----------------------------------------------------------------
12538        {
12539            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12540            let entity_codes: Vec<String> = self
12541                .config
12542                .companies
12543                .iter()
12544                .map(|c| c.code.clone())
12545                .collect();
12546            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12547            info!(
12548                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12549                soc_snapshot.service_organizations.len(),
12550                soc_snapshot.soc_reports.len(),
12551                soc_snapshot.user_entity_controls.len(),
12552            );
12553            snapshot.service_organizations = soc_snapshot.service_organizations;
12554            snapshot.soc_reports = soc_snapshot.soc_reports;
12555            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12556        }
12557
12558        // ----------------------------------------------------------------
12559        // ISA 570: Going concern assessments
12560        // ----------------------------------------------------------------
12561        {
12562            use datasynth_generators::audit::going_concern_generator::{
12563                GoingConcernGenerator, GoingConcernInput,
12564            };
12565            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12566            let entity_codes: Vec<String> = self
12567                .config
12568                .companies
12569                .iter()
12570                .map(|c| c.code.clone())
12571                .collect();
12572            // Assessment date = period end + 75 days (typical sign-off window).
12573            let assessment_date = period_end + chrono::Duration::days(75);
12574            let period_label = format!("FY{}", period_end.year());
12575
12576            // Build financial inputs from actual journal entries.
12577            //
12578            // We derive approximate P&L, working capital, and operating cash flow
12579            // by aggregating GL account balances from the journal entry population.
12580            // Account ranges used (standard chart):
12581            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12582            //   Expenses:        6xxx (debit-normal)
12583            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12584            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12585            //   Operating CF:    net income adjusted for D&A (rough proxy)
12586            let gc_inputs: Vec<GoingConcernInput> = self
12587                .config
12588                .companies
12589                .iter()
12590                .map(|company| {
12591                    let code = &company.code;
12592                    let mut revenue = rust_decimal::Decimal::ZERO;
12593                    let mut expenses = rust_decimal::Decimal::ZERO;
12594                    let mut current_assets = rust_decimal::Decimal::ZERO;
12595                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12596                    let mut total_debt = rust_decimal::Decimal::ZERO;
12597
12598                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12599                        for line in &je.lines {
12600                            let acct = line.gl_account.as_str();
12601                            let net = line.debit_amount - line.credit_amount;
12602                            if acct.starts_with('4') {
12603                                // Revenue accounts: credit-normal, so negative net = revenue earned
12604                                revenue -= net;
12605                            } else if acct.starts_with('6') {
12606                                // Expense accounts: debit-normal
12607                                expenses += net;
12608                            }
12609                            // Balance sheet accounts for working capital
12610                            if acct.starts_with('1') {
12611                                // Current asset accounts (1000–1499)
12612                                if let Ok(n) = acct.parse::<u32>() {
12613                                    if (1000..=1499).contains(&n) {
12614                                        current_assets += net;
12615                                    }
12616                                }
12617                            } else if acct.starts_with('2') {
12618                                if let Ok(n) = acct.parse::<u32>() {
12619                                    if (2000..=2499).contains(&n) {
12620                                        // Current liabilities
12621                                        current_liabs -= net; // credit-normal
12622                                    } else if (2500..=2999).contains(&n) {
12623                                        // Long-term debt
12624                                        total_debt -= net;
12625                                    }
12626                                }
12627                            }
12628                        }
12629                    }
12630
12631                    let net_income = revenue - expenses;
12632                    let working_capital = current_assets - current_liabs;
12633                    // Rough operating CF proxy: net income (full accrual CF calculation
12634                    // is done separately in the cash flow statement generator)
12635                    let operating_cash_flow = net_income;
12636
12637                    GoingConcernInput {
12638                        entity_code: code.clone(),
12639                        net_income,
12640                        working_capital,
12641                        operating_cash_flow,
12642                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12643                        assessment_date,
12644                    }
12645                })
12646                .collect();
12647
12648            let assessments = if gc_inputs.is_empty() {
12649                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12650            } else {
12651                gc_gen.generate_for_entities_with_inputs(
12652                    &entity_codes,
12653                    &gc_inputs,
12654                    assessment_date,
12655                    &period_label,
12656                )
12657            };
12658            info!(
12659                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12660                assessments.len(),
12661                assessments.iter().filter(|a| matches!(
12662                    a.auditor_conclusion,
12663                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12664                )).count(),
12665                assessments.iter().filter(|a| matches!(
12666                    a.auditor_conclusion,
12667                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12668                )).count(),
12669                assessments.iter().filter(|a| matches!(
12670                    a.auditor_conclusion,
12671                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12672                )).count(),
12673            );
12674            snapshot.going_concern_assessments = assessments;
12675        }
12676
12677        // ----------------------------------------------------------------
12678        // ISA 540: Accounting estimates
12679        // ----------------------------------------------------------------
12680        {
12681            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12682            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12683            let entity_codes: Vec<String> = self
12684                .config
12685                .companies
12686                .iter()
12687                .map(|c| c.code.clone())
12688                .collect();
12689            let estimates = est_gen.generate_for_entities(&entity_codes);
12690            info!(
12691                "ISA 540 accounting estimates: {} estimates across {} entities \
12692                 ({} with retrospective reviews, {} with auditor point estimates)",
12693                estimates.len(),
12694                entity_codes.len(),
12695                estimates
12696                    .iter()
12697                    .filter(|e| e.retrospective_review.is_some())
12698                    .count(),
12699                estimates
12700                    .iter()
12701                    .filter(|e| e.auditor_point_estimate.is_some())
12702                    .count(),
12703            );
12704            snapshot.accounting_estimates = estimates;
12705        }
12706
12707        // ----------------------------------------------------------------
12708        // ISA 700/701/705/706: Audit opinions (one per engagement)
12709        // ----------------------------------------------------------------
12710        {
12711            use datasynth_generators::audit::audit_opinion_generator::{
12712                AuditOpinionGenerator, AuditOpinionInput,
12713            };
12714
12715            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12716
12717            // Build inputs — one per engagement, linking findings and going concern.
12718            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12719                .engagements
12720                .iter()
12721                .map(|eng| {
12722                    // Collect findings for this engagement.
12723                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12724                        .findings
12725                        .iter()
12726                        .filter(|f| f.engagement_id == eng.engagement_id)
12727                        .cloned()
12728                        .collect();
12729
12730                    // Going concern for this entity.
12731                    let gc = snapshot
12732                        .going_concern_assessments
12733                        .iter()
12734                        .find(|g| g.entity_code == eng.client_entity_id)
12735                        .cloned();
12736
12737                    // Component reports relevant to this engagement.
12738                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12739                        snapshot.component_reports.clone();
12740
12741                    let auditor = self
12742                        .master_data
12743                        .employees
12744                        .first()
12745                        .map(|e| e.display_name.clone())
12746                        .unwrap_or_else(|| "Global Audit LLP".into());
12747
12748                    let partner = self
12749                        .master_data
12750                        .employees
12751                        .get(1)
12752                        .map(|e| e.display_name.clone())
12753                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12754
12755                    AuditOpinionInput {
12756                        entity_code: eng.client_entity_id.clone(),
12757                        entity_name: eng.client_name.clone(),
12758                        engagement_id: eng.engagement_id,
12759                        period_end: eng.period_end_date,
12760                        findings: eng_findings,
12761                        going_concern: gc,
12762                        component_reports: comp_reports,
12763                        // Mark as US-listed when audit standards include PCAOB.
12764                        is_us_listed: {
12765                            let fw = &self.config.audit_standards.isa_compliance.framework;
12766                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12767                        },
12768                        auditor_name: auditor,
12769                        engagement_partner: partner,
12770                    }
12771                })
12772                .collect();
12773
12774            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12775
12776            for go in &generated_opinions {
12777                snapshot
12778                    .key_audit_matters
12779                    .extend(go.key_audit_matters.clone());
12780            }
12781            snapshot.audit_opinions = generated_opinions
12782                .into_iter()
12783                .map(|go| go.opinion)
12784                .collect();
12785
12786            info!(
12787                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12788                snapshot.audit_opinions.len(),
12789                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12790                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12791                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12792                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12793            );
12794        }
12795
12796        // ----------------------------------------------------------------
12797        // SOX 302 / 404 assessments
12798        // ----------------------------------------------------------------
12799        {
12800            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12801
12802            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12803
12804            for (i, company) in self.config.companies.iter().enumerate() {
12805                // Collect findings for this company's engagements.
12806                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12807                    .engagements
12808                    .iter()
12809                    .filter(|e| e.client_entity_id == company.code)
12810                    .map(|e| e.engagement_id)
12811                    .collect();
12812
12813                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12814                    .findings
12815                    .iter()
12816                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12817                    .cloned()
12818                    .collect();
12819
12820                // Derive executive names from employee list.
12821                let emp_count = self.master_data.employees.len();
12822                let ceo_name = if emp_count > 0 {
12823                    self.master_data.employees[i % emp_count]
12824                        .display_name
12825                        .clone()
12826                } else {
12827                    format!("CEO of {}", company.name)
12828                };
12829                let cfo_name = if emp_count > 1 {
12830                    self.master_data.employees[(i + 1) % emp_count]
12831                        .display_name
12832                        .clone()
12833                } else {
12834                    format!("CFO of {}", company.name)
12835                };
12836
12837                // Use engagement materiality if available.
12838                let materiality = snapshot
12839                    .engagements
12840                    .iter()
12841                    .find(|e| e.client_entity_id == company.code)
12842                    .map(|e| e.materiality)
12843                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12844
12845                let input = SoxGeneratorInput {
12846                    company_code: company.code.clone(),
12847                    company_name: company.name.clone(),
12848                    fiscal_year,
12849                    period_end,
12850                    findings: company_findings,
12851                    ceo_name,
12852                    cfo_name,
12853                    materiality_threshold: materiality,
12854                    revenue_percent: rust_decimal::Decimal::from(100),
12855                    assets_percent: rust_decimal::Decimal::from(100),
12856                    significant_accounts: vec![
12857                        "Revenue".into(),
12858                        "Accounts Receivable".into(),
12859                        "Inventory".into(),
12860                        "Fixed Assets".into(),
12861                        "Accounts Payable".into(),
12862                    ],
12863                };
12864
12865                let (certs, assessment) = sox_gen.generate(&input);
12866                snapshot.sox_302_certifications.extend(certs);
12867                snapshot.sox_404_assessments.push(assessment);
12868            }
12869
12870            info!(
12871                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12872                snapshot.sox_302_certifications.len(),
12873                snapshot.sox_404_assessments.len(),
12874                snapshot
12875                    .sox_404_assessments
12876                    .iter()
12877                    .filter(|a| a.icfr_effective)
12878                    .count(),
12879                snapshot
12880                    .sox_404_assessments
12881                    .iter()
12882                    .filter(|a| !a.icfr_effective)
12883                    .count(),
12884            );
12885        }
12886
12887        // ----------------------------------------------------------------
12888        // ISA 320: Materiality calculations (one per entity)
12889        // ----------------------------------------------------------------
12890        {
12891            use datasynth_generators::audit::materiality_generator::{
12892                MaterialityGenerator, MaterialityInput,
12893            };
12894
12895            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12896
12897            // Compute per-company financials from JEs.
12898            // Asset accounts start with '1', revenue with '4',
12899            // expense accounts with '5' or '6'.
12900            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12901
12902            for company in &self.config.companies {
12903                let company_code = company.code.clone();
12904
12905                // Revenue: credit-side entries on 4xxx accounts
12906                let company_revenue: rust_decimal::Decimal = entries
12907                    .iter()
12908                    .filter(|e| e.company_code() == company_code)
12909                    .flat_map(|e| e.lines.iter())
12910                    .filter(|l| l.account_code.starts_with('4'))
12911                    .map(|l| l.credit_amount)
12912                    .sum();
12913
12914                // Total assets: debit balances on 1xxx accounts
12915                let total_assets: rust_decimal::Decimal = entries
12916                    .iter()
12917                    .filter(|e| e.company_code() == company_code)
12918                    .flat_map(|e| e.lines.iter())
12919                    .filter(|l| l.account_code.starts_with('1'))
12920                    .map(|l| l.debit_amount)
12921                    .sum();
12922
12923                // Expenses: debit-side entries on 5xxx/6xxx accounts
12924                let total_expenses: rust_decimal::Decimal = entries
12925                    .iter()
12926                    .filter(|e| e.company_code() == company_code)
12927                    .flat_map(|e| e.lines.iter())
12928                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12929                    .map(|l| l.debit_amount)
12930                    .sum();
12931
12932                // Equity: credit balances on 3xxx accounts
12933                let equity: rust_decimal::Decimal = entries
12934                    .iter()
12935                    .filter(|e| e.company_code() == company_code)
12936                    .flat_map(|e| e.lines.iter())
12937                    .filter(|l| l.account_code.starts_with('3'))
12938                    .map(|l| l.credit_amount)
12939                    .sum();
12940
12941                let pretax_income = company_revenue - total_expenses;
12942
12943                // If no company-specific data, fall back to proportional share
12944                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12945                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
12946                        .unwrap_or(rust_decimal::Decimal::ONE);
12947                    (
12948                        total_revenue * w,
12949                        total_revenue * w * rust_decimal::Decimal::from(3),
12950                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
12951                        total_revenue * w * rust_decimal::Decimal::from(2),
12952                    )
12953                } else {
12954                    (company_revenue, total_assets, pretax_income, equity)
12955                };
12956
12957                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
12958
12959                materiality_inputs.push(MaterialityInput {
12960                    entity_code: company_code,
12961                    period: format!("FY{}", fiscal_year),
12962                    revenue: rev,
12963                    pretax_income: pti,
12964                    total_assets: assets,
12965                    equity: eq,
12966                    gross_profit,
12967                });
12968            }
12969
12970            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12971
12972            info!(
12973                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12974                 {} total assets, {} equity benchmarks)",
12975                snapshot.materiality_calculations.len(),
12976                snapshot
12977                    .materiality_calculations
12978                    .iter()
12979                    .filter(|m| matches!(
12980                        m.benchmark,
12981                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12982                    ))
12983                    .count(),
12984                snapshot
12985                    .materiality_calculations
12986                    .iter()
12987                    .filter(|m| matches!(
12988                        m.benchmark,
12989                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12990                    ))
12991                    .count(),
12992                snapshot
12993                    .materiality_calculations
12994                    .iter()
12995                    .filter(|m| matches!(
12996                        m.benchmark,
12997                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12998                    ))
12999                    .count(),
13000                snapshot
13001                    .materiality_calculations
13002                    .iter()
13003                    .filter(|m| matches!(
13004                        m.benchmark,
13005                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13006                    ))
13007                    .count(),
13008            );
13009        }
13010
13011        // ----------------------------------------------------------------
13012        // ISA 315: Combined Risk Assessments (per entity, per account area)
13013        // ----------------------------------------------------------------
13014        {
13015            use datasynth_generators::audit::cra_generator::CraGenerator;
13016
13017            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13018
13019            // Build entity → scope_id map from already-generated scopes
13020            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13021                .audit_scopes
13022                .iter()
13023                .map(|s| (s.entity_code.clone(), s.id.clone()))
13024                .collect();
13025
13026            for company in &self.config.companies {
13027                let cras = cra_gen.generate_for_entity(&company.code, None);
13028                let scope_id = entity_scope_map.get(&company.code).cloned();
13029                let cras_with_scope: Vec<_> = cras
13030                    .into_iter()
13031                    .map(|mut cra| {
13032                        cra.scope_id = scope_id.clone();
13033                        cra
13034                    })
13035                    .collect();
13036                snapshot.combined_risk_assessments.extend(cras_with_scope);
13037            }
13038
13039            let significant_count = snapshot
13040                .combined_risk_assessments
13041                .iter()
13042                .filter(|c| c.significant_risk)
13043                .count();
13044            let high_cra_count = snapshot
13045                .combined_risk_assessments
13046                .iter()
13047                .filter(|c| {
13048                    matches!(
13049                        c.combined_risk,
13050                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13051                    )
13052                })
13053                .count();
13054
13055            info!(
13056                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13057                snapshot.combined_risk_assessments.len(),
13058                significant_count,
13059                high_cra_count,
13060            );
13061        }
13062
13063        // ----------------------------------------------------------------
13064        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13065        // ----------------------------------------------------------------
13066        {
13067            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13068
13069            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13070
13071            // Group CRAs by entity and use per-entity tolerable error from materiality
13072            for company in &self.config.companies {
13073                let entity_code = company.code.clone();
13074
13075                // Find tolerable error for this entity (= performance materiality)
13076                let tolerable_error = snapshot
13077                    .materiality_calculations
13078                    .iter()
13079                    .find(|m| m.entity_code == entity_code)
13080                    .map(|m| m.tolerable_error);
13081
13082                // Collect CRAs for this entity
13083                let entity_cras: Vec<_> = snapshot
13084                    .combined_risk_assessments
13085                    .iter()
13086                    .filter(|c| c.entity_code == entity_code)
13087                    .cloned()
13088                    .collect();
13089
13090                if !entity_cras.is_empty() {
13091                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13092                    snapshot.sampling_plans.extend(plans);
13093                    snapshot.sampled_items.extend(items);
13094                }
13095            }
13096
13097            let misstatement_count = snapshot
13098                .sampled_items
13099                .iter()
13100                .filter(|i| i.misstatement_found)
13101                .count();
13102
13103            info!(
13104                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13105                snapshot.sampling_plans.len(),
13106                snapshot.sampled_items.len(),
13107                misstatement_count,
13108            );
13109        }
13110
13111        // ----------------------------------------------------------------
13112        // ISA 315: Significant Classes of Transactions (SCOTS)
13113        // ----------------------------------------------------------------
13114        {
13115            use datasynth_generators::audit::scots_generator::{
13116                ScotsGenerator, ScotsGeneratorConfig,
13117            };
13118
13119            let ic_enabled = self.config.intercompany.enabled;
13120
13121            let config = ScotsGeneratorConfig {
13122                intercompany_enabled: ic_enabled,
13123                ..ScotsGeneratorConfig::default()
13124            };
13125            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13126
13127            for company in &self.config.companies {
13128                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13129                snapshot
13130                    .significant_transaction_classes
13131                    .extend(entity_scots);
13132            }
13133
13134            let estimation_count = snapshot
13135                .significant_transaction_classes
13136                .iter()
13137                .filter(|s| {
13138                    matches!(
13139                        s.transaction_type,
13140                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13141                    )
13142                })
13143                .count();
13144
13145            info!(
13146                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13147                snapshot.significant_transaction_classes.len(),
13148                estimation_count,
13149            );
13150        }
13151
13152        // ----------------------------------------------------------------
13153        // ISA 520: Unusual Item Markers
13154        // ----------------------------------------------------------------
13155        {
13156            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13157
13158            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13159            let entity_codes: Vec<String> = self
13160                .config
13161                .companies
13162                .iter()
13163                .map(|c| c.code.clone())
13164                .collect();
13165            let unusual_flags =
13166                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13167            info!(
13168                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13169                unusual_flags.len(),
13170                unusual_flags
13171                    .iter()
13172                    .filter(|f| matches!(
13173                        f.severity,
13174                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13175                    ))
13176                    .count(),
13177                unusual_flags
13178                    .iter()
13179                    .filter(|f| matches!(
13180                        f.severity,
13181                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13182                    ))
13183                    .count(),
13184                unusual_flags
13185                    .iter()
13186                    .filter(|f| matches!(
13187                        f.severity,
13188                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13189                    ))
13190                    .count(),
13191            );
13192            snapshot.unusual_items = unusual_flags;
13193        }
13194
13195        // ----------------------------------------------------------------
13196        // ISA 520: Analytical Relationships
13197        // ----------------------------------------------------------------
13198        {
13199            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13200
13201            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13202            let entity_codes: Vec<String> = self
13203                .config
13204                .companies
13205                .iter()
13206                .map(|c| c.code.clone())
13207                .collect();
13208            let current_period_label = format!("FY{fiscal_year}");
13209            let prior_period_label = format!("FY{}", fiscal_year - 1);
13210            let analytical_rels = ar_gen.generate_for_entities(
13211                &entity_codes,
13212                entries,
13213                &current_period_label,
13214                &prior_period_label,
13215            );
13216            let out_of_range = analytical_rels
13217                .iter()
13218                .filter(|r| !r.within_expected_range)
13219                .count();
13220            info!(
13221                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13222                analytical_rels.len(),
13223                out_of_range,
13224            );
13225            snapshot.analytical_relationships = analytical_rels;
13226        }
13227
13228        if let Some(pb) = pb {
13229            pb.finish_with_message(format!(
13230                "Audit data: {} engagements, {} workpapers, {} evidence, \
13231                 {} confirmations, {} procedure steps, {} samples, \
13232                 {} analytical, {} IA funcs, {} related parties, \
13233                 {} component auditors, {} letters, {} subsequent events, \
13234                 {} service orgs, {} going concern, {} accounting estimates, \
13235                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13236                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13237                 {} unusual items, {} analytical relationships",
13238                snapshot.engagements.len(),
13239                snapshot.workpapers.len(),
13240                snapshot.evidence.len(),
13241                snapshot.confirmations.len(),
13242                snapshot.procedure_steps.len(),
13243                snapshot.samples.len(),
13244                snapshot.analytical_results.len(),
13245                snapshot.ia_functions.len(),
13246                snapshot.related_parties.len(),
13247                snapshot.component_auditors.len(),
13248                snapshot.engagement_letters.len(),
13249                snapshot.subsequent_events.len(),
13250                snapshot.service_organizations.len(),
13251                snapshot.going_concern_assessments.len(),
13252                snapshot.accounting_estimates.len(),
13253                snapshot.audit_opinions.len(),
13254                snapshot.key_audit_matters.len(),
13255                snapshot.sox_302_certifications.len(),
13256                snapshot.sox_404_assessments.len(),
13257                snapshot.materiality_calculations.len(),
13258                snapshot.combined_risk_assessments.len(),
13259                snapshot.sampling_plans.len(),
13260                snapshot.significant_transaction_classes.len(),
13261                snapshot.unusual_items.len(),
13262                snapshot.analytical_relationships.len(),
13263            ));
13264        }
13265
13266        // ----------------------------------------------------------------
13267        // PCAOB-ISA cross-reference mappings
13268        // ----------------------------------------------------------------
13269        // Always include the standard PCAOB-ISA mappings when audit generation is
13270        // enabled. These are static reference data (no randomness required) so we
13271        // call standard_mappings() directly.
13272        {
13273            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13274            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13275            debug!(
13276                "PCAOB-ISA mappings generated: {} mappings",
13277                snapshot.isa_pcaob_mappings.len()
13278            );
13279        }
13280
13281        // ----------------------------------------------------------------
13282        // ISA standard reference entries
13283        // ----------------------------------------------------------------
13284        // Emit flat ISA standard reference data (number, title, series) so
13285        // consumers get a machine-readable listing of all 34 ISA standards in
13286        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13287        {
13288            use datasynth_standards::audit::isa_reference::IsaStandard;
13289            snapshot.isa_mappings = IsaStandard::standard_entries();
13290            debug!(
13291                "ISA standard entries generated: {} standards",
13292                snapshot.isa_mappings.len()
13293            );
13294        }
13295
13296        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13297        // For each RPT, find the chronologically closest JE for the engagement's entity.
13298        {
13299            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13300                .engagements
13301                .iter()
13302                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13303                .collect();
13304
13305            for rpt in &mut snapshot.related_party_transactions {
13306                if rpt.journal_entry_id.is_some() {
13307                    continue; // already set
13308                }
13309                let entity = engagement_by_id
13310                    .get(&rpt.engagement_id.to_string())
13311                    .copied()
13312                    .unwrap_or("");
13313
13314                // Find closest JE by date in the entity's company
13315                let best_je = entries
13316                    .iter()
13317                    .filter(|je| je.header.company_code == entity)
13318                    .min_by_key(|je| {
13319                        (je.header.posting_date - rpt.transaction_date)
13320                            .num_days()
13321                            .abs()
13322                    });
13323
13324                if let Some(je) = best_je {
13325                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13326                }
13327            }
13328
13329            let linked = snapshot
13330                .related_party_transactions
13331                .iter()
13332                .filter(|t| t.journal_entry_id.is_some())
13333                .count();
13334            debug!(
13335                "Linked {}/{} related party transactions to journal entries",
13336                linked,
13337                snapshot.related_party_transactions.len()
13338            );
13339        }
13340
13341        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13342        // One opinion per engagement, derived from that engagement's findings,
13343        // going-concern assessment, and any component-auditor reports. Fills
13344        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13345        // export.
13346        if !snapshot.engagements.is_empty() {
13347            use datasynth_generators::audit_opinion_generator::{
13348                AuditOpinionGenerator, AuditOpinionInput,
13349            };
13350
13351            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13352            let inputs: Vec<AuditOpinionInput> = snapshot
13353                .engagements
13354                .iter()
13355                .map(|eng| {
13356                    let findings = snapshot
13357                        .findings
13358                        .iter()
13359                        .filter(|f| f.engagement_id == eng.engagement_id)
13360                        .cloned()
13361                        .collect();
13362                    let going_concern = snapshot
13363                        .going_concern_assessments
13364                        .iter()
13365                        .find(|gc| gc.entity_code == eng.client_entity_id)
13366                        .cloned();
13367                    // ComponentAuditorReport doesn't carry an engagement id, but
13368                    // component scope is keyed by `entity_code`, so filter on that.
13369                    let component_reports = snapshot
13370                        .component_reports
13371                        .iter()
13372                        .filter(|r| r.entity_code == eng.client_entity_id)
13373                        .cloned()
13374                        .collect();
13375
13376                    AuditOpinionInput {
13377                        entity_code: eng.client_entity_id.clone(),
13378                        entity_name: eng.client_name.clone(),
13379                        engagement_id: eng.engagement_id,
13380                        period_end: eng.period_end_date,
13381                        findings,
13382                        going_concern,
13383                        component_reports,
13384                        is_us_listed: matches!(
13385                            eng.engagement_type,
13386                            datasynth_core::audit::EngagementType::IntegratedAudit
13387                                | datasynth_core::audit::EngagementType::Sox404
13388                        ),
13389                        auditor_name: "DataSynth Audit LLP".to_string(),
13390                        engagement_partner: "Engagement Partner".to_string(),
13391                    }
13392                })
13393                .collect();
13394
13395            let generated = opinion_gen.generate_batch(&inputs);
13396            for g in generated {
13397                snapshot.key_audit_matters.extend(g.key_audit_matters);
13398                snapshot.audit_opinions.push(g.opinion);
13399            }
13400            debug!(
13401                "Generated {} audit opinions with {} key audit matters",
13402                snapshot.audit_opinions.len(),
13403                snapshot.key_audit_matters.len()
13404            );
13405        }
13406
13407        Ok(snapshot)
13408    }
13409
13410    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13411    ///
13412    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13413    /// from the current orchestrator state, runs the FSM engine, and maps the
13414    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13415    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13416    fn generate_audit_data_with_fsm(
13417        &mut self,
13418        entries: &[JournalEntry],
13419    ) -> SynthResult<AuditSnapshot> {
13420        use datasynth_audit_fsm::{
13421            context::EngagementContext,
13422            engine::AuditFsmEngine,
13423            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13424        };
13425        use rand::SeedableRng;
13426        use rand_chacha::ChaCha8Rng;
13427
13428        info!("Audit FSM: generating audit data via FSM engine");
13429
13430        let fsm_config = self
13431            .config
13432            .audit
13433            .fsm
13434            .as_ref()
13435            .expect("FSM config must be present when FSM is enabled");
13436
13437        // 1. Load blueprint from config string.
13438        let bwp = match fsm_config.blueprint.as_str() {
13439            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13440            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13441            _ => {
13442                warn!(
13443                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13444                    fsm_config.blueprint
13445                );
13446                BlueprintWithPreconditions::load_builtin_fsa()
13447            }
13448        }
13449        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13450
13451        // 2. Load overlay from config string.
13452        let overlay = match fsm_config.overlay.as_str() {
13453            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13454            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13455            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13456            _ => {
13457                warn!(
13458                    "Unknown FSM overlay '{}', falling back to builtin:default",
13459                    fsm_config.overlay
13460                );
13461                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13462            }
13463        }
13464        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13465
13466        // 3. Build EngagementContext from orchestrator state.
13467        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13468            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13469        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13470
13471        // Determine the engagement entity early so we can filter JEs.
13472        let company = self.config.companies.first();
13473        let company_code = company
13474            .map(|c| c.code.clone())
13475            .unwrap_or_else(|| "UNKNOWN".to_string());
13476        let company_name = company
13477            .map(|c| c.name.clone())
13478            .unwrap_or_else(|| "Unknown Company".to_string());
13479        let currency = company
13480            .map(|c| c.currency.clone())
13481            .unwrap_or_else(|| "USD".to_string());
13482
13483        // Filter JEs to the engagement entity for single-company coherence.
13484        let entity_entries: Vec<_> = entries
13485            .iter()
13486            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13487            .cloned()
13488            .collect();
13489        let entries = &entity_entries; // Shadow the parameter for remaining usage
13490
13491        // Financial aggregates from journal entries.
13492        let total_revenue: rust_decimal::Decimal = entries
13493            .iter()
13494            .flat_map(|e| e.lines.iter())
13495            .filter(|l| l.account_code.starts_with('4'))
13496            .map(|l| l.credit_amount - l.debit_amount)
13497            .sum();
13498
13499        let total_assets: rust_decimal::Decimal = entries
13500            .iter()
13501            .flat_map(|e| e.lines.iter())
13502            .filter(|l| l.account_code.starts_with('1'))
13503            .map(|l| l.debit_amount - l.credit_amount)
13504            .sum();
13505
13506        let total_expenses: rust_decimal::Decimal = entries
13507            .iter()
13508            .flat_map(|e| e.lines.iter())
13509            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13510            .map(|l| l.debit_amount)
13511            .sum();
13512
13513        let equity: rust_decimal::Decimal = entries
13514            .iter()
13515            .flat_map(|e| e.lines.iter())
13516            .filter(|l| l.account_code.starts_with('3'))
13517            .map(|l| l.credit_amount - l.debit_amount)
13518            .sum();
13519
13520        let total_debt: rust_decimal::Decimal = entries
13521            .iter()
13522            .flat_map(|e| e.lines.iter())
13523            .filter(|l| l.account_code.starts_with('2'))
13524            .map(|l| l.credit_amount - l.debit_amount)
13525            .sum();
13526
13527        let pretax_income = total_revenue - total_expenses;
13528
13529        let cogs: rust_decimal::Decimal = entries
13530            .iter()
13531            .flat_map(|e| e.lines.iter())
13532            .filter(|l| l.account_code.starts_with('5'))
13533            .map(|l| l.debit_amount)
13534            .sum();
13535        let gross_profit = total_revenue - cogs;
13536
13537        let current_assets: rust_decimal::Decimal = entries
13538            .iter()
13539            .flat_map(|e| e.lines.iter())
13540            .filter(|l| {
13541                l.account_code.starts_with("10")
13542                    || l.account_code.starts_with("11")
13543                    || l.account_code.starts_with("12")
13544                    || l.account_code.starts_with("13")
13545            })
13546            .map(|l| l.debit_amount - l.credit_amount)
13547            .sum();
13548        let current_liabilities: rust_decimal::Decimal = entries
13549            .iter()
13550            .flat_map(|e| e.lines.iter())
13551            .filter(|l| {
13552                l.account_code.starts_with("20")
13553                    || l.account_code.starts_with("21")
13554                    || l.account_code.starts_with("22")
13555            })
13556            .map(|l| l.credit_amount - l.debit_amount)
13557            .sum();
13558        let working_capital = current_assets - current_liabilities;
13559
13560        let depreciation: rust_decimal::Decimal = entries
13561            .iter()
13562            .flat_map(|e| e.lines.iter())
13563            .filter(|l| l.account_code.starts_with("60"))
13564            .map(|l| l.debit_amount)
13565            .sum();
13566        let operating_cash_flow = pretax_income + depreciation;
13567
13568        // GL accounts for reference data.
13569        let accounts: Vec<String> = self
13570            .coa
13571            .as_ref()
13572            .map(|coa| {
13573                coa.get_postable_accounts()
13574                    .iter()
13575                    .map(|acc| acc.account_code().to_string())
13576                    .collect()
13577            })
13578            .unwrap_or_default();
13579
13580        // Team member IDs and display names from master data.
13581        let team_member_ids: Vec<String> = self
13582            .master_data
13583            .employees
13584            .iter()
13585            .take(8) // Cap team size
13586            .map(|e| e.employee_id.clone())
13587            .collect();
13588        let team_member_pairs: Vec<(String, String)> = self
13589            .master_data
13590            .employees
13591            .iter()
13592            .take(8)
13593            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13594            .collect();
13595
13596        let vendor_names: Vec<String> = self
13597            .master_data
13598            .vendors
13599            .iter()
13600            .map(|v| v.name.clone())
13601            .collect();
13602        let customer_names: Vec<String> = self
13603            .master_data
13604            .customers
13605            .iter()
13606            .map(|c| c.name.clone())
13607            .collect();
13608
13609        let entity_codes: Vec<String> = self
13610            .config
13611            .companies
13612            .iter()
13613            .map(|c| c.code.clone())
13614            .collect();
13615
13616        // Journal entry IDs for evidence tracing (sample up to 50).
13617        let journal_entry_ids: Vec<String> = entries
13618            .iter()
13619            .take(50)
13620            .map(|e| e.header.document_id.to_string())
13621            .collect();
13622
13623        // Account balances for risk weighting (aggregate debit - credit per account).
13624        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13625        for entry in entries {
13626            for line in &entry.lines {
13627                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13628                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13629                *account_balances
13630                    .entry(line.account_code.clone())
13631                    .or_insert(0.0) += debit_f64 - credit_f64;
13632            }
13633        }
13634
13635        // Internal control IDs and anomaly refs are populated by the
13636        // caller when available; here we default to empty because the
13637        // orchestrator state may not have generated controls/anomalies
13638        // yet at this point in the pipeline.
13639        let control_ids: Vec<String> = Vec::new();
13640        let anomaly_refs: Vec<String> = Vec::new();
13641
13642        let mut context = EngagementContext {
13643            company_code,
13644            company_name,
13645            fiscal_year: start_date.year(),
13646            currency,
13647            total_revenue,
13648            total_assets,
13649            engagement_start: start_date,
13650            report_date: period_end,
13651            pretax_income,
13652            equity,
13653            gross_profit,
13654            working_capital,
13655            operating_cash_flow,
13656            total_debt,
13657            team_member_ids,
13658            team_member_pairs,
13659            accounts,
13660            vendor_names,
13661            customer_names,
13662            journal_entry_ids,
13663            account_balances,
13664            control_ids,
13665            anomaly_refs,
13666            journal_entries: entries.to_vec(),
13667            is_us_listed: false,
13668            entity_codes,
13669            auditor_firm_name: "DataSynth Audit LLP".into(),
13670            accounting_framework: self
13671                .config
13672                .accounting_standards
13673                .framework
13674                .map(|f| match f {
13675                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13676                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13677                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13678                        "French GAAP"
13679                    }
13680                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13681                        "German GAAP"
13682                    }
13683                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13684                        "Dual Reporting"
13685                    }
13686                })
13687                .unwrap_or("IFRS")
13688                .into(),
13689        };
13690
13691        // 4. Create and run the FSM engine.
13692        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13693        let rng = ChaCha8Rng::seed_from_u64(seed);
13694        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13695
13696        let mut result = engine
13697            .run_engagement(&context)
13698            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13699
13700        info!(
13701            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13702             {} phases completed, duration {:.1}h",
13703            result.event_log.len(),
13704            result.artifacts.total_artifacts(),
13705            result.anomalies.len(),
13706            result.phases_completed.len(),
13707            result.total_duration_hours,
13708        );
13709
13710        // 4b. Populate financial data in the artifact bag for downstream consumers.
13711        let tb_entity = context.company_code.clone();
13712        let tb_fy = context.fiscal_year;
13713        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13714        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13715            entries,
13716            &tb_entity,
13717            tb_fy,
13718            self.coa.as_ref().map(|c| c.as_ref()),
13719        );
13720
13721        // 5. Map ArtifactBag fields to AuditSnapshot.
13722        let bag = result.artifacts;
13723        let mut snapshot = AuditSnapshot {
13724            engagements: bag.engagements,
13725            engagement_letters: bag.engagement_letters,
13726            materiality_calculations: bag.materiality_calculations,
13727            risk_assessments: bag.risk_assessments,
13728            combined_risk_assessments: bag.combined_risk_assessments,
13729            workpapers: bag.workpapers,
13730            evidence: bag.evidence,
13731            findings: bag.findings,
13732            judgments: bag.judgments,
13733            sampling_plans: bag.sampling_plans,
13734            sampled_items: bag.sampled_items,
13735            analytical_results: bag.analytical_results,
13736            going_concern_assessments: bag.going_concern_assessments,
13737            subsequent_events: bag.subsequent_events,
13738            audit_opinions: bag.audit_opinions,
13739            key_audit_matters: bag.key_audit_matters,
13740            procedure_steps: bag.procedure_steps,
13741            samples: bag.samples,
13742            confirmations: bag.confirmations,
13743            confirmation_responses: bag.confirmation_responses,
13744            // Store the event trail for downstream export.
13745            fsm_event_trail: Some(result.event_log),
13746            // Fields not produced by the FSM engine remain at their defaults.
13747            ..Default::default()
13748        };
13749
13750        // 6. Add static reference data (same as legacy path).
13751        {
13752            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13753            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13754        }
13755        {
13756            use datasynth_standards::audit::isa_reference::IsaStandard;
13757            snapshot.isa_mappings = IsaStandard::standard_entries();
13758        }
13759
13760        info!(
13761            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13762             {} risk assessments, {} findings, {} materiality calcs",
13763            snapshot.engagements.len(),
13764            snapshot.workpapers.len(),
13765            snapshot.evidence.len(),
13766            snapshot.risk_assessments.len(),
13767            snapshot.findings.len(),
13768            snapshot.materiality_calculations.len(),
13769        );
13770
13771        Ok(snapshot)
13772    }
13773
13774    /// Export journal entries as graph data for ML training and network reconstruction.
13775    ///
13776    /// Builds a transaction graph where:
13777    /// - Nodes are GL accounts
13778    /// - Edges are money flows from credit to debit accounts
13779    /// - Edge attributes include amount, date, business process, anomaly flags
13780    fn export_graphs(
13781        &mut self,
13782        entries: &[JournalEntry],
13783        _coa: &Arc<ChartOfAccounts>,
13784        stats: &mut EnhancedGenerationStatistics,
13785    ) -> SynthResult<GraphExportSnapshot> {
13786        let pb = self.create_progress_bar(100, "Exporting Graphs");
13787
13788        let mut snapshot = GraphExportSnapshot::default();
13789
13790        // Get output directory
13791        let output_dir = self
13792            .output_path
13793            .clone()
13794            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13795        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13796
13797        // Process each graph type configuration
13798        for graph_type in &self.config.graph_export.graph_types {
13799            if let Some(pb) = &pb {
13800                pb.inc(10);
13801            }
13802
13803            // Build transaction graph
13804            let graph_config = TransactionGraphConfig {
13805                include_vendors: false,
13806                include_customers: false,
13807                create_debit_credit_edges: true,
13808                include_document_nodes: graph_type.include_document_nodes,
13809                min_edge_weight: graph_type.min_edge_weight,
13810                aggregate_parallel_edges: graph_type.aggregate_edges,
13811                framework: None,
13812            };
13813
13814            let mut builder = TransactionGraphBuilder::new(graph_config);
13815            builder.add_journal_entries(entries);
13816            let graph = builder.build();
13817
13818            // Update stats
13819            stats.graph_node_count += graph.node_count();
13820            stats.graph_edge_count += graph.edge_count();
13821
13822            if let Some(pb) = &pb {
13823                pb.inc(40);
13824            }
13825
13826            // Export to each configured format
13827            for format in &self.config.graph_export.formats {
13828                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13829
13830                // Create output directory
13831                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13832                    warn!("Failed to create graph output directory: {}", e);
13833                    continue;
13834                }
13835
13836                match format {
13837                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13838                        let pyg_config = PyGExportConfig {
13839                            common: datasynth_graph::CommonExportConfig {
13840                                export_node_features: true,
13841                                export_edge_features: true,
13842                                export_node_labels: true,
13843                                export_edge_labels: true,
13844                                export_masks: true,
13845                                train_ratio: self.config.graph_export.train_ratio,
13846                                val_ratio: self.config.graph_export.validation_ratio,
13847                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13848                            },
13849                            one_hot_categoricals: false,
13850                        };
13851
13852                        let exporter = PyGExporter::new(pyg_config);
13853                        match exporter.export(&graph, &format_dir) {
13854                            Ok(metadata) => {
13855                                snapshot.exports.insert(
13856                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13857                                    GraphExportInfo {
13858                                        name: graph_type.name.clone(),
13859                                        format: "pytorch_geometric".to_string(),
13860                                        output_path: format_dir.clone(),
13861                                        node_count: metadata.num_nodes,
13862                                        edge_count: metadata.num_edges,
13863                                    },
13864                                );
13865                                snapshot.graph_count += 1;
13866                            }
13867                            Err(e) => {
13868                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13869                            }
13870                        }
13871                    }
13872                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13873                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13874
13875                        let neo4j_config = Neo4jExportConfig {
13876                            export_node_properties: true,
13877                            export_edge_properties: true,
13878                            export_features: true,
13879                            generate_cypher: true,
13880                            generate_admin_import: true,
13881                            database_name: "synth".to_string(),
13882                            cypher_batch_size: 1000,
13883                        };
13884
13885                        let exporter = Neo4jExporter::new(neo4j_config);
13886                        match exporter.export(&graph, &format_dir) {
13887                            Ok(metadata) => {
13888                                snapshot.exports.insert(
13889                                    format!("{}_{}", graph_type.name, "neo4j"),
13890                                    GraphExportInfo {
13891                                        name: graph_type.name.clone(),
13892                                        format: "neo4j".to_string(),
13893                                        output_path: format_dir.clone(),
13894                                        node_count: metadata.num_nodes,
13895                                        edge_count: metadata.num_edges,
13896                                    },
13897                                );
13898                                snapshot.graph_count += 1;
13899                            }
13900                            Err(e) => {
13901                                warn!("Failed to export Neo4j graph: {}", e);
13902                            }
13903                        }
13904                    }
13905                    datasynth_config::schema::GraphExportFormat::Dgl => {
13906                        use datasynth_graph::{DGLExportConfig, DGLExporter};
13907
13908                        let dgl_config = DGLExportConfig {
13909                            common: datasynth_graph::CommonExportConfig {
13910                                export_node_features: true,
13911                                export_edge_features: true,
13912                                export_node_labels: true,
13913                                export_edge_labels: true,
13914                                export_masks: true,
13915                                train_ratio: self.config.graph_export.train_ratio,
13916                                val_ratio: self.config.graph_export.validation_ratio,
13917                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13918                            },
13919                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
13920                            include_pickle_script: true, // DGL ecosystem standard helper
13921                        };
13922
13923                        let exporter = DGLExporter::new(dgl_config);
13924                        match exporter.export(&graph, &format_dir) {
13925                            Ok(metadata) => {
13926                                snapshot.exports.insert(
13927                                    format!("{}_{}", graph_type.name, "dgl"),
13928                                    GraphExportInfo {
13929                                        name: graph_type.name.clone(),
13930                                        format: "dgl".to_string(),
13931                                        output_path: format_dir.clone(),
13932                                        node_count: metadata.common.num_nodes,
13933                                        edge_count: metadata.common.num_edges,
13934                                    },
13935                                );
13936                                snapshot.graph_count += 1;
13937                            }
13938                            Err(e) => {
13939                                warn!("Failed to export DGL graph: {}", e);
13940                            }
13941                        }
13942                    }
13943                    datasynth_config::schema::GraphExportFormat::RustGraph => {
13944                        use datasynth_graph::{
13945                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13946                        };
13947
13948                        let rustgraph_config = RustGraphExportConfig {
13949                            include_features: true,
13950                            include_temporal: true,
13951                            include_labels: true,
13952                            source_name: "datasynth".to_string(),
13953                            batch_id: None,
13954                            output_format: RustGraphOutputFormat::JsonLines,
13955                            export_node_properties: true,
13956                            export_edge_properties: true,
13957                            pretty_print: false,
13958                        };
13959
13960                        let exporter = RustGraphExporter::new(rustgraph_config);
13961                        match exporter.export(&graph, &format_dir) {
13962                            Ok(metadata) => {
13963                                snapshot.exports.insert(
13964                                    format!("{}_{}", graph_type.name, "rustgraph"),
13965                                    GraphExportInfo {
13966                                        name: graph_type.name.clone(),
13967                                        format: "rustgraph".to_string(),
13968                                        output_path: format_dir.clone(),
13969                                        node_count: metadata.num_nodes,
13970                                        edge_count: metadata.num_edges,
13971                                    },
13972                                );
13973                                snapshot.graph_count += 1;
13974                            }
13975                            Err(e) => {
13976                                warn!("Failed to export RustGraph: {}", e);
13977                            }
13978                        }
13979                    }
13980                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13981                        // Hypergraph export is handled separately in Phase 10b
13982                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13983                    }
13984                }
13985            }
13986
13987            if let Some(pb) = &pb {
13988                pb.inc(40);
13989            }
13990        }
13991
13992        stats.graph_export_count = snapshot.graph_count;
13993        snapshot.exported = snapshot.graph_count > 0;
13994
13995        if let Some(pb) = pb {
13996            pb.finish_with_message(format!(
13997                "Graphs exported: {} graphs ({} nodes, {} edges)",
13998                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13999            ));
14000        }
14001
14002        Ok(snapshot)
14003    }
14004
14005    /// Build additional graph types (banking, approval, entity) when relevant data
14006    /// is available. These run as a late phase because the data they need (banking
14007    /// snapshot, intercompany snapshot) is only generated after the main graph
14008    /// export phase.
14009    fn build_additional_graphs(
14010        &self,
14011        banking: &BankingSnapshot,
14012        intercompany: &IntercompanySnapshot,
14013        entries: &[JournalEntry],
14014        stats: &mut EnhancedGenerationStatistics,
14015    ) {
14016        let output_dir = self
14017            .output_path
14018            .clone()
14019            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14020        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14021
14022        // Banking graph: build when banking customers and transactions exist
14023        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14024            info!("Phase 10c: Building banking network graph");
14025            let config = BankingGraphConfig::default();
14026            let mut builder = BankingGraphBuilder::new(config);
14027            builder.add_customers(&banking.customers);
14028            builder.add_accounts(&banking.accounts, &banking.customers);
14029            builder.add_transactions(&banking.transactions);
14030            let graph = builder.build();
14031
14032            let node_count = graph.node_count();
14033            let edge_count = graph.edge_count();
14034            stats.graph_node_count += node_count;
14035            stats.graph_edge_count += edge_count;
14036
14037            // Export as PyG if configured
14038            for format in &self.config.graph_export.formats {
14039                if matches!(
14040                    format,
14041                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14042                ) {
14043                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14044                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14045                        warn!("Failed to create banking graph output dir: {}", e);
14046                        continue;
14047                    }
14048                    let pyg_config = PyGExportConfig::default();
14049                    let exporter = PyGExporter::new(pyg_config);
14050                    if let Err(e) = exporter.export(&graph, &format_dir) {
14051                        warn!("Failed to export banking graph as PyG: {}", e);
14052                    } else {
14053                        info!(
14054                            "Banking network graph exported: {} nodes, {} edges",
14055                            node_count, edge_count
14056                        );
14057                    }
14058                }
14059            }
14060        }
14061
14062        // Approval graph: build from journal entry approval workflows
14063        let approval_entries: Vec<_> = entries
14064            .iter()
14065            .filter(|je| je.header.approval_workflow.is_some())
14066            .collect();
14067
14068        if !approval_entries.is_empty() {
14069            info!(
14070                "Phase 10c: Building approval network graph ({} entries with approvals)",
14071                approval_entries.len()
14072            );
14073            let config = ApprovalGraphConfig::default();
14074            let mut builder = ApprovalGraphBuilder::new(config);
14075
14076            for je in &approval_entries {
14077                if let Some(ref wf) = je.header.approval_workflow {
14078                    for action in &wf.actions {
14079                        let record = datasynth_core::models::ApprovalRecord {
14080                            approval_id: format!(
14081                                "APR-{}-{}",
14082                                je.header.document_id, action.approval_level
14083                            ),
14084                            document_number: je.header.document_id.to_string(),
14085                            document_type: "JE".to_string(),
14086                            company_code: je.company_code().to_string(),
14087                            requester_id: wf.preparer_id.clone(),
14088                            requester_name: Some(wf.preparer_name.clone()),
14089                            approver_id: action.actor_id.clone(),
14090                            approver_name: action.actor_name.clone(),
14091                            approval_date: je.posting_date(),
14092                            action: format!("{:?}", action.action),
14093                            amount: wf.amount,
14094                            approval_limit: None,
14095                            comments: action.comments.clone(),
14096                            delegation_from: None,
14097                            is_auto_approved: false,
14098                        };
14099                        builder.add_approval(&record);
14100                    }
14101                }
14102            }
14103
14104            let graph = builder.build();
14105            let node_count = graph.node_count();
14106            let edge_count = graph.edge_count();
14107            stats.graph_node_count += node_count;
14108            stats.graph_edge_count += edge_count;
14109
14110            // Export as PyG if configured
14111            for format in &self.config.graph_export.formats {
14112                if matches!(
14113                    format,
14114                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14115                ) {
14116                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14117                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14118                        warn!("Failed to create approval graph output dir: {}", e);
14119                        continue;
14120                    }
14121                    let pyg_config = PyGExportConfig::default();
14122                    let exporter = PyGExporter::new(pyg_config);
14123                    if let Err(e) = exporter.export(&graph, &format_dir) {
14124                        warn!("Failed to export approval graph as PyG: {}", e);
14125                    } else {
14126                        info!(
14127                            "Approval network graph exported: {} nodes, {} edges",
14128                            node_count, edge_count
14129                        );
14130                    }
14131                }
14132            }
14133        }
14134
14135        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14136        if self.config.companies.len() >= 2 {
14137            info!(
14138                "Phase 10c: Building entity relationship graph ({} companies)",
14139                self.config.companies.len()
14140            );
14141
14142            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14143                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14144
14145            // Map CompanyConfig → Company objects
14146            let parent_code = &self.config.companies[0].code;
14147            let mut companies: Vec<datasynth_core::models::Company> =
14148                Vec::with_capacity(self.config.companies.len());
14149
14150            // First company is the parent
14151            let first = &self.config.companies[0];
14152            companies.push(datasynth_core::models::Company::parent(
14153                &first.code,
14154                &first.name,
14155                &first.country,
14156                &first.currency,
14157            ));
14158
14159            // Remaining companies are subsidiaries (100% owned by parent)
14160            for cc in self.config.companies.iter().skip(1) {
14161                companies.push(datasynth_core::models::Company::subsidiary(
14162                    &cc.code,
14163                    &cc.name,
14164                    &cc.country,
14165                    &cc.currency,
14166                    parent_code,
14167                    rust_decimal::Decimal::from(100),
14168                ));
14169            }
14170
14171            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14172            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14173                self.config
14174                    .companies
14175                    .iter()
14176                    .skip(1)
14177                    .enumerate()
14178                    .map(|(i, cc)| {
14179                        let mut rel =
14180                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14181                                format!("REL{:03}", i + 1),
14182                                parent_code.clone(),
14183                                cc.code.clone(),
14184                                rust_decimal::Decimal::from(100),
14185                                start_date,
14186                            );
14187                        rel.functional_currency = cc.currency.clone();
14188                        rel
14189                    })
14190                    .collect();
14191
14192            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14193            builder.add_companies(&companies);
14194            builder.add_ownership_relationships(&relationships);
14195
14196            // Thread IC matched-pair transaction edges into the entity graph
14197            for pair in &intercompany.matched_pairs {
14198                builder.add_intercompany_edge(
14199                    &pair.seller_company,
14200                    &pair.buyer_company,
14201                    pair.amount,
14202                    &format!("{:?}", pair.transaction_type),
14203                );
14204            }
14205
14206            let graph = builder.build();
14207            let node_count = graph.node_count();
14208            let edge_count = graph.edge_count();
14209            stats.graph_node_count += node_count;
14210            stats.graph_edge_count += edge_count;
14211
14212            // Export as PyG if configured
14213            for format in &self.config.graph_export.formats {
14214                if matches!(
14215                    format,
14216                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14217                ) {
14218                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14219                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14220                        warn!("Failed to create entity graph output dir: {}", e);
14221                        continue;
14222                    }
14223                    let pyg_config = PyGExportConfig::default();
14224                    let exporter = PyGExporter::new(pyg_config);
14225                    if let Err(e) = exporter.export(&graph, &format_dir) {
14226                        warn!("Failed to export entity graph as PyG: {}", e);
14227                    } else {
14228                        info!(
14229                            "Entity relationship graph exported: {} nodes, {} edges",
14230                            node_count, edge_count
14231                        );
14232                    }
14233                }
14234            }
14235        } else {
14236            debug!(
14237                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14238                self.config.companies.len()
14239            );
14240        }
14241    }
14242
14243    /// Export a multi-layer hypergraph for RustGraph integration.
14244    ///
14245    /// Builds a 3-layer hypergraph:
14246    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14247    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14248    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14249    #[allow(clippy::too_many_arguments)]
14250    fn export_hypergraph(
14251        &self,
14252        coa: &Arc<ChartOfAccounts>,
14253        entries: &[JournalEntry],
14254        document_flows: &DocumentFlowSnapshot,
14255        sourcing: &SourcingSnapshot,
14256        hr: &HrSnapshot,
14257        manufacturing: &ManufacturingSnapshot,
14258        banking: &BankingSnapshot,
14259        audit: &AuditSnapshot,
14260        financial_reporting: &FinancialReportingSnapshot,
14261        ocpm: &OcpmSnapshot,
14262        compliance: &ComplianceRegulationsSnapshot,
14263        stats: &mut EnhancedGenerationStatistics,
14264    ) -> SynthResult<HypergraphExportInfo> {
14265        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14266        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14267        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14268        use datasynth_graph::models::hypergraph::AggregationStrategy;
14269
14270        let hg_settings = &self.config.graph_export.hypergraph;
14271
14272        // Parse aggregation strategy from config string
14273        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14274            "truncate" => AggregationStrategy::Truncate,
14275            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14276            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14277            "importance_sample" => AggregationStrategy::ImportanceSample,
14278            _ => AggregationStrategy::PoolByCounterparty,
14279        };
14280
14281        let builder_config = HypergraphConfig {
14282            max_nodes: hg_settings.max_nodes,
14283            aggregation_strategy,
14284            include_coso: hg_settings.governance_layer.include_coso,
14285            include_controls: hg_settings.governance_layer.include_controls,
14286            include_sox: hg_settings.governance_layer.include_sox,
14287            include_vendors: hg_settings.governance_layer.include_vendors,
14288            include_customers: hg_settings.governance_layer.include_customers,
14289            include_employees: hg_settings.governance_layer.include_employees,
14290            include_p2p: hg_settings.process_layer.include_p2p,
14291            include_o2c: hg_settings.process_layer.include_o2c,
14292            include_s2c: hg_settings.process_layer.include_s2c,
14293            include_h2r: hg_settings.process_layer.include_h2r,
14294            include_mfg: hg_settings.process_layer.include_mfg,
14295            include_bank: hg_settings.process_layer.include_bank,
14296            include_audit: hg_settings.process_layer.include_audit,
14297            include_r2r: hg_settings.process_layer.include_r2r,
14298            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14299            docs_per_counterparty_threshold: hg_settings
14300                .process_layer
14301                .docs_per_counterparty_threshold,
14302            include_accounts: hg_settings.accounting_layer.include_accounts,
14303            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14304            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14305            include_compliance: self.config.compliance_regulations.enabled,
14306            include_tax: true,
14307            include_treasury: true,
14308            include_esg: true,
14309            include_project: true,
14310            include_intercompany: true,
14311            include_temporal_events: true,
14312        };
14313
14314        let mut builder = HypergraphBuilder::new(builder_config);
14315
14316        // Layer 1: Governance & Controls
14317        builder.add_coso_framework();
14318
14319        // Add controls if available (generated during JE generation)
14320        // Controls are generated per-company; we use the standard set
14321        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14322            let controls = InternalControl::standard_controls();
14323            builder.add_controls(&controls);
14324        }
14325
14326        // Add master data
14327        builder.add_vendors(&self.master_data.vendors);
14328        builder.add_customers(&self.master_data.customers);
14329        builder.add_employees(&self.master_data.employees);
14330
14331        // Layer 2: Process Events (all process families)
14332        builder.add_p2p_documents(
14333            &document_flows.purchase_orders,
14334            &document_flows.goods_receipts,
14335            &document_flows.vendor_invoices,
14336            &document_flows.payments,
14337        );
14338        builder.add_o2c_documents(
14339            &document_flows.sales_orders,
14340            &document_flows.deliveries,
14341            &document_flows.customer_invoices,
14342        );
14343        builder.add_s2c_documents(
14344            &sourcing.sourcing_projects,
14345            &sourcing.qualifications,
14346            &sourcing.rfx_events,
14347            &sourcing.bids,
14348            &sourcing.bid_evaluations,
14349            &sourcing.contracts,
14350        );
14351        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14352        builder.add_mfg_documents(
14353            &manufacturing.production_orders,
14354            &manufacturing.quality_inspections,
14355            &manufacturing.cycle_counts,
14356        );
14357        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14358        builder.add_audit_documents(
14359            &audit.engagements,
14360            &audit.workpapers,
14361            &audit.findings,
14362            &audit.evidence,
14363            &audit.risk_assessments,
14364            &audit.judgments,
14365            &audit.materiality_calculations,
14366            &audit.audit_opinions,
14367            &audit.going_concern_assessments,
14368        );
14369        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14370
14371        // OCPM events as hyperedges
14372        if let Some(ref event_log) = ocpm.event_log {
14373            builder.add_ocpm_events(event_log);
14374        }
14375
14376        // Compliance regulations as cross-layer nodes
14377        if self.config.compliance_regulations.enabled
14378            && hg_settings.governance_layer.include_controls
14379        {
14380            // Reconstruct ComplianceStandard objects from the registry
14381            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14382            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14383                .standard_records
14384                .iter()
14385                .filter_map(|r| {
14386                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14387                    registry.get(&sid).cloned()
14388                })
14389                .collect();
14390
14391            builder.add_compliance_regulations(
14392                &standards,
14393                &compliance.findings,
14394                &compliance.filings,
14395            );
14396        }
14397
14398        // Layer 3: Accounting Network
14399        builder.add_accounts(coa);
14400        builder.add_journal_entries_as_hyperedges(entries);
14401
14402        // Build the hypergraph
14403        let hypergraph = builder.build();
14404
14405        // Export
14406        let output_dir = self
14407            .output_path
14408            .clone()
14409            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14410        let hg_dir = output_dir
14411            .join(&self.config.graph_export.output_subdirectory)
14412            .join(&hg_settings.output_subdirectory);
14413
14414        // Branch on output format
14415        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14416            "unified" => {
14417                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14418                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14419                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14420                })?;
14421                (
14422                    metadata.num_nodes,
14423                    metadata.num_edges,
14424                    metadata.num_hyperedges,
14425                )
14426            }
14427            _ => {
14428                // "native" or any unrecognized format → use existing exporter
14429                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14430                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14431                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14432                })?;
14433                (
14434                    metadata.num_nodes,
14435                    metadata.num_edges,
14436                    metadata.num_hyperedges,
14437                )
14438            }
14439        };
14440
14441        // Stream to RustGraph ingest endpoint if configured
14442        #[cfg(feature = "streaming")]
14443        if let Some(ref target_url) = hg_settings.stream_target {
14444            use crate::stream_client::{StreamClient, StreamConfig};
14445            use std::io::Write as _;
14446
14447            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14448            let stream_config = StreamConfig {
14449                target_url: target_url.clone(),
14450                batch_size: hg_settings.stream_batch_size,
14451                api_key,
14452                ..StreamConfig::default()
14453            };
14454
14455            match StreamClient::new(stream_config) {
14456                Ok(mut client) => {
14457                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14458                    match exporter.export_to_writer(&hypergraph, &mut client) {
14459                        Ok(_) => {
14460                            if let Err(e) = client.flush() {
14461                                warn!("Failed to flush stream client: {}", e);
14462                            } else {
14463                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14464                            }
14465                        }
14466                        Err(e) => {
14467                            warn!("Streaming export failed: {}", e);
14468                        }
14469                    }
14470                }
14471                Err(e) => {
14472                    warn!("Failed to create stream client: {}", e);
14473                }
14474            }
14475        }
14476
14477        // Update stats
14478        stats.graph_node_count += num_nodes;
14479        stats.graph_edge_count += num_edges;
14480        stats.graph_export_count += 1;
14481
14482        Ok(HypergraphExportInfo {
14483            node_count: num_nodes,
14484            edge_count: num_edges,
14485            hyperedge_count: num_hyperedges,
14486            output_path: hg_dir,
14487        })
14488    }
14489
14490    /// Generate banking KYC/AML data.
14491    ///
14492    /// Creates banking customers, accounts, and transactions with AML typology injection.
14493    /// Uses the BankingOrchestrator from synth-banking crate.
14494    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14495        let pb = self.create_progress_bar(100, "Generating Banking Data");
14496
14497        // Build the banking orchestrator from config
14498        let orchestrator = BankingOrchestratorBuilder::new()
14499            .config(self.config.banking.clone())
14500            .seed(self.seed + 9000)
14501            .country_pack(self.primary_pack().clone())
14502            .build();
14503
14504        if let Some(pb) = &pb {
14505            pb.inc(10);
14506        }
14507
14508        // Generate the banking data
14509        let result = orchestrator.generate();
14510
14511        if let Some(pb) = &pb {
14512            pb.inc(90);
14513            pb.finish_with_message(format!(
14514                "Banking: {} customers, {} transactions",
14515                result.customers.len(),
14516                result.transactions.len()
14517            ));
14518        }
14519
14520        // Cross-reference banking customers with core master data so that
14521        // banking customer names align with the enterprise customer list.
14522        // We rotate through core customers, overlaying their name and country
14523        // onto the generated banking customers where possible.
14524        let mut banking_customers = result.customers;
14525        let core_customers = &self.master_data.customers;
14526        if !core_customers.is_empty() {
14527            for (i, bc) in banking_customers.iter_mut().enumerate() {
14528                let core = &core_customers[i % core_customers.len()];
14529                bc.name = CustomerName::business(&core.name);
14530                bc.residence_country = core.country.clone();
14531                bc.enterprise_customer_id = Some(core.customer_id.clone());
14532            }
14533            debug!(
14534                "Cross-referenced {} banking customers with {} core customers",
14535                banking_customers.len(),
14536                core_customers.len()
14537            );
14538        }
14539
14540        Ok(BankingSnapshot {
14541            customers: banking_customers,
14542            accounts: result.accounts,
14543            transactions: result.transactions,
14544            transaction_labels: result.transaction_labels,
14545            customer_labels: result.customer_labels,
14546            account_labels: result.account_labels,
14547            relationship_labels: result.relationship_labels,
14548            narratives: result.narratives,
14549            suspicious_count: result.stats.suspicious_count,
14550            scenario_count: result.scenarios.len(),
14551        })
14552    }
14553
14554    /// Calculate total transactions to generate.
14555    fn calculate_total_transactions(&self) -> u64 {
14556        let months = self.config.global.period_months as f64;
14557        self.config
14558            .companies
14559            .iter()
14560            .map(|c| {
14561                let annual = c.annual_transaction_volume.count() as f64;
14562                let weighted = annual * c.volume_weight;
14563                (weighted * months / 12.0) as u64
14564            })
14565            .sum()
14566    }
14567
14568    /// Create a progress bar if progress display is enabled.
14569    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14570        if !self.phase_config.show_progress {
14571            return None;
14572        }
14573
14574        let pb = if let Some(mp) = &self.multi_progress {
14575            mp.add(ProgressBar::new(total))
14576        } else {
14577            ProgressBar::new(total)
14578        };
14579
14580        pb.set_style(
14581            ProgressStyle::default_bar()
14582                .template(&format!(
14583                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14584                ))
14585                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14586                .progress_chars("#>-"),
14587        );
14588
14589        Some(pb)
14590    }
14591
14592    /// Get the generated chart of accounts.
14593    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14594        self.coa.clone()
14595    }
14596
14597    /// Get the generated master data.
14598    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14599        &self.master_data
14600    }
14601
14602    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14603    fn phase_compliance_regulations(
14604        &mut self,
14605        _stats: &mut EnhancedGenerationStatistics,
14606    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14607        if !self.phase_config.generate_compliance_regulations {
14608            return Ok(ComplianceRegulationsSnapshot::default());
14609        }
14610
14611        info!("Phase: Generating Compliance Regulations Data");
14612
14613        let cr_config = &self.config.compliance_regulations;
14614
14615        // Determine jurisdictions: from config or inferred from companies
14616        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14617            self.config
14618                .companies
14619                .iter()
14620                .map(|c| c.country.clone())
14621                .collect::<std::collections::HashSet<_>>()
14622                .into_iter()
14623                .collect()
14624        } else {
14625            cr_config.jurisdictions.clone()
14626        };
14627
14628        // Determine reference date
14629        let fallback_date =
14630            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14631        let reference_date = cr_config
14632            .reference_date
14633            .as_ref()
14634            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14635            .unwrap_or_else(|| {
14636                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14637                    .unwrap_or(fallback_date)
14638            });
14639
14640        // Generate standards registry data
14641        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14642        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14643        let cross_reference_records = reg_gen.generate_cross_reference_records();
14644        let jurisdiction_records =
14645            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14646
14647        info!(
14648            "  Standards: {} records, {} cross-references, {} jurisdictions",
14649            standard_records.len(),
14650            cross_reference_records.len(),
14651            jurisdiction_records.len()
14652        );
14653
14654        // Generate audit procedures (if enabled)
14655        let audit_procedures = if cr_config.audit_procedures.enabled {
14656            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14657                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14658                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14659                confidence_level: cr_config.audit_procedures.confidence_level,
14660                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14661            };
14662            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14663                self.seed + 9000,
14664                proc_config,
14665            );
14666            let registry = reg_gen.registry();
14667            let mut all_procs = Vec::new();
14668            for jurisdiction in &jurisdictions {
14669                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14670                all_procs.extend(procs);
14671            }
14672            info!("  Audit procedures: {}", all_procs.len());
14673            all_procs
14674        } else {
14675            Vec::new()
14676        };
14677
14678        // Generate compliance findings (if enabled)
14679        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14680            let finding_config =
14681                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14682                    finding_rate: cr_config.findings.finding_rate,
14683                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14684                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14685                    generate_remediation: cr_config.findings.generate_remediation,
14686                };
14687            let mut finding_gen =
14688                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14689                    self.seed + 9100,
14690                    finding_config,
14691                );
14692            let mut all_findings = Vec::new();
14693            for company in &self.config.companies {
14694                let company_findings =
14695                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14696                all_findings.extend(company_findings);
14697            }
14698            info!("  Compliance findings: {}", all_findings.len());
14699            all_findings
14700        } else {
14701            Vec::new()
14702        };
14703
14704        // Generate regulatory filings (if enabled)
14705        let filings = if cr_config.filings.enabled {
14706            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14707                filing_types: cr_config.filings.filing_types.clone(),
14708                generate_status_progression: cr_config.filings.generate_status_progression,
14709            };
14710            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14711                self.seed + 9200,
14712                filing_config,
14713            );
14714            let company_codes: Vec<String> = self
14715                .config
14716                .companies
14717                .iter()
14718                .map(|c| c.code.clone())
14719                .collect();
14720            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14721                .unwrap_or(fallback_date);
14722            let filings = filing_gen.generate_filings(
14723                &company_codes,
14724                &jurisdictions,
14725                start_date,
14726                self.config.global.period_months,
14727            );
14728            info!("  Regulatory filings: {}", filings.len());
14729            filings
14730        } else {
14731            Vec::new()
14732        };
14733
14734        // Build compliance graph (if enabled)
14735        let compliance_graph = if cr_config.graph.enabled {
14736            let graph_config = datasynth_graph::ComplianceGraphConfig {
14737                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14738                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14739                include_cross_references: cr_config.graph.include_cross_references,
14740                include_supersession_edges: cr_config.graph.include_supersession_edges,
14741                include_account_links: cr_config.graph.include_account_links,
14742                include_control_links: cr_config.graph.include_control_links,
14743                include_company_links: cr_config.graph.include_company_links,
14744            };
14745            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14746
14747            // Add standard nodes
14748            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14749                .iter()
14750                .map(|r| datasynth_graph::StandardNodeInput {
14751                    standard_id: r.standard_id.clone(),
14752                    title: r.title.clone(),
14753                    category: r.category.clone(),
14754                    domain: r.domain.clone(),
14755                    is_active: r.is_active,
14756                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14757                    applicable_account_types: r.applicable_account_types.clone(),
14758                    applicable_processes: r.applicable_processes.clone(),
14759                })
14760                .collect();
14761            builder.add_standards(&standard_inputs);
14762
14763            // Add jurisdiction nodes
14764            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14765                jurisdiction_records
14766                    .iter()
14767                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14768                        country_code: r.country_code.clone(),
14769                        country_name: r.country_name.clone(),
14770                        framework: r.accounting_framework.clone(),
14771                        standard_count: r.standard_count,
14772                        tax_rate: r.statutory_tax_rate,
14773                    })
14774                    .collect();
14775            builder.add_jurisdictions(&jurisdiction_inputs);
14776
14777            // Add cross-reference edges
14778            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14779                cross_reference_records
14780                    .iter()
14781                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14782                        from_standard: r.from_standard.clone(),
14783                        to_standard: r.to_standard.clone(),
14784                        relationship: r.relationship.clone(),
14785                        convergence_level: r.convergence_level,
14786                    })
14787                    .collect();
14788            builder.add_cross_references(&xref_inputs);
14789
14790            // Add jurisdiction→standard mappings
14791            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14792                .iter()
14793                .map(|r| datasynth_graph::JurisdictionMappingInput {
14794                    country_code: r.jurisdiction.clone(),
14795                    standard_id: r.standard_id.clone(),
14796                })
14797                .collect();
14798            builder.add_jurisdiction_mappings(&mapping_inputs);
14799
14800            // Add procedure nodes
14801            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14802                .iter()
14803                .map(|p| datasynth_graph::ProcedureNodeInput {
14804                    procedure_id: p.procedure_id.clone(),
14805                    standard_id: p.standard_id.clone(),
14806                    procedure_type: p.procedure_type.clone(),
14807                    sample_size: p.sample_size,
14808                    confidence_level: p.confidence_level,
14809                })
14810                .collect();
14811            builder.add_procedures(&proc_inputs);
14812
14813            // Add finding nodes
14814            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14815                .iter()
14816                .map(|f| datasynth_graph::FindingNodeInput {
14817                    finding_id: f.finding_id.to_string(),
14818                    standard_id: f
14819                        .related_standards
14820                        .first()
14821                        .map(|s| s.as_str().to_string())
14822                        .unwrap_or_default(),
14823                    severity: f.severity.to_string(),
14824                    deficiency_level: f.deficiency_level.to_string(),
14825                    severity_score: f.deficiency_level.severity_score(),
14826                    control_id: f.control_id.clone(),
14827                    affected_accounts: f.affected_accounts.clone(),
14828                })
14829                .collect();
14830            builder.add_findings(&finding_inputs);
14831
14832            // Cross-domain: link standards to accounts from chart of accounts
14833            if cr_config.graph.include_account_links {
14834                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14835                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14836                for std_record in &standard_records {
14837                    if let Some(std_obj) =
14838                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14839                            &std_record.standard_id,
14840                        ))
14841                    {
14842                        for acct_type in &std_obj.applicable_account_types {
14843                            account_links.push(datasynth_graph::AccountLinkInput {
14844                                standard_id: std_record.standard_id.clone(),
14845                                account_code: acct_type.clone(),
14846                                account_name: acct_type.clone(),
14847                            });
14848                        }
14849                    }
14850                }
14851                builder.add_account_links(&account_links);
14852            }
14853
14854            // Cross-domain: link standards to internal controls
14855            if cr_config.graph.include_control_links {
14856                let mut control_links = Vec::new();
14857                // SOX/PCAOB standards link to all controls
14858                let sox_like_ids: Vec<String> = standard_records
14859                    .iter()
14860                    .filter(|r| {
14861                        r.standard_id.starts_with("SOX")
14862                            || r.standard_id.starts_with("PCAOB-AS-2201")
14863                    })
14864                    .map(|r| r.standard_id.clone())
14865                    .collect();
14866                // Get control IDs from config (C001-C060 standard controls)
14867                let control_ids = [
14868                    ("C001", "Cash Controls"),
14869                    ("C002", "Large Transaction Approval"),
14870                    ("C010", "PO Approval"),
14871                    ("C011", "Three-Way Match"),
14872                    ("C020", "Revenue Recognition"),
14873                    ("C021", "Credit Check"),
14874                    ("C030", "Manual JE Approval"),
14875                    ("C031", "Period Close Review"),
14876                    ("C032", "Account Reconciliation"),
14877                    ("C040", "Payroll Processing"),
14878                    ("C050", "Fixed Asset Capitalization"),
14879                    ("C060", "Intercompany Elimination"),
14880                ];
14881                for sox_id in &sox_like_ids {
14882                    for (ctrl_id, ctrl_name) in &control_ids {
14883                        control_links.push(datasynth_graph::ControlLinkInput {
14884                            standard_id: sox_id.clone(),
14885                            control_id: ctrl_id.to_string(),
14886                            control_name: ctrl_name.to_string(),
14887                        });
14888                    }
14889                }
14890                builder.add_control_links(&control_links);
14891            }
14892
14893            // Cross-domain: filing nodes with company links
14894            if cr_config.graph.include_company_links {
14895                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14896                    .iter()
14897                    .enumerate()
14898                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
14899                        filing_id: format!("F{:04}", i + 1),
14900                        filing_type: f.filing_type.to_string(),
14901                        company_code: f.company_code.clone(),
14902                        jurisdiction: f.jurisdiction.clone(),
14903                        status: format!("{:?}", f.status),
14904                    })
14905                    .collect();
14906                builder.add_filings(&filing_inputs);
14907            }
14908
14909            let graph = builder.build();
14910            info!(
14911                "  Compliance graph: {} nodes, {} edges",
14912                graph.nodes.len(),
14913                graph.edges.len()
14914            );
14915            Some(graph)
14916        } else {
14917            None
14918        };
14919
14920        self.check_resources_with_log("post-compliance-regulations")?;
14921
14922        Ok(ComplianceRegulationsSnapshot {
14923            standard_records,
14924            cross_reference_records,
14925            jurisdiction_records,
14926            audit_procedures,
14927            findings,
14928            filings,
14929            compliance_graph,
14930        })
14931    }
14932
14933    /// Build a lineage graph describing config → phase → output relationships.
14934    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14935        use super::lineage::LineageGraphBuilder;
14936
14937        let mut builder = LineageGraphBuilder::new();
14938
14939        // Config sections
14940        builder.add_config_section("config:global", "Global Config");
14941        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14942        builder.add_config_section("config:transactions", "Transaction Config");
14943
14944        // Generator phases
14945        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14946        builder.add_generator_phase("phase:je", "Journal Entry Generation");
14947
14948        // Config → phase edges
14949        builder.configured_by("phase:coa", "config:chart_of_accounts");
14950        builder.configured_by("phase:je", "config:transactions");
14951
14952        // Output files
14953        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14954        builder.produced_by("output:je", "phase:je");
14955
14956        // Optional phases based on config
14957        if self.phase_config.generate_master_data {
14958            builder.add_config_section("config:master_data", "Master Data Config");
14959            builder.add_generator_phase("phase:master_data", "Master Data Generation");
14960            builder.configured_by("phase:master_data", "config:master_data");
14961            builder.input_to("phase:master_data", "phase:je");
14962        }
14963
14964        if self.phase_config.generate_document_flows {
14965            builder.add_config_section("config:document_flows", "Document Flow Config");
14966            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14967            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14968            builder.configured_by("phase:p2p", "config:document_flows");
14969            builder.configured_by("phase:o2c", "config:document_flows");
14970
14971            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14972            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14973            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14974            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14975            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14976
14977            builder.produced_by("output:po", "phase:p2p");
14978            builder.produced_by("output:gr", "phase:p2p");
14979            builder.produced_by("output:vi", "phase:p2p");
14980            builder.produced_by("output:so", "phase:o2c");
14981            builder.produced_by("output:ci", "phase:o2c");
14982        }
14983
14984        if self.phase_config.inject_anomalies {
14985            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14986            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14987            builder.configured_by("phase:anomaly", "config:fraud");
14988            builder.add_output_file(
14989                "output:labels",
14990                "Anomaly Labels",
14991                "labels/anomaly_labels.csv",
14992            );
14993            builder.produced_by("output:labels", "phase:anomaly");
14994        }
14995
14996        if self.phase_config.generate_audit {
14997            builder.add_config_section("config:audit", "Audit Config");
14998            builder.add_generator_phase("phase:audit", "Audit Data Generation");
14999            builder.configured_by("phase:audit", "config:audit");
15000        }
15001
15002        if self.phase_config.generate_banking {
15003            builder.add_config_section("config:banking", "Banking Config");
15004            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15005            builder.configured_by("phase:banking", "config:banking");
15006        }
15007
15008        if self.config.llm.enabled {
15009            builder.add_config_section("config:llm", "LLM Enrichment Config");
15010            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15011            builder.configured_by("phase:llm_enrichment", "config:llm");
15012        }
15013
15014        if self.config.diffusion.enabled {
15015            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15016            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15017            builder.configured_by("phase:diffusion", "config:diffusion");
15018        }
15019
15020        if self.config.causal.enabled {
15021            builder.add_config_section("config:causal", "Causal Generation Config");
15022            builder.add_generator_phase("phase:causal", "Causal Overlay");
15023            builder.configured_by("phase:causal", "config:causal");
15024        }
15025
15026        builder.build()
15027    }
15028
15029    // -----------------------------------------------------------------------
15030    // Trial-balance helpers used to replace hardcoded proxy values
15031    // -----------------------------------------------------------------------
15032
15033    /// Compute total revenue for a company from its journal entries.
15034    ///
15035    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15036    /// net credits on all revenue-account lines filtered to `company_code`.
15037    fn compute_company_revenue(
15038        entries: &[JournalEntry],
15039        company_code: &str,
15040    ) -> rust_decimal::Decimal {
15041        use rust_decimal::Decimal;
15042        let mut revenue = Decimal::ZERO;
15043        for je in entries {
15044            if je.header.company_code != company_code {
15045                continue;
15046            }
15047            for line in &je.lines {
15048                if line.gl_account.starts_with('4') {
15049                    // Revenue is credit-normal
15050                    revenue += line.credit_amount - line.debit_amount;
15051                }
15052            }
15053        }
15054        revenue.max(Decimal::ZERO)
15055    }
15056
15057    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15058    ///
15059    /// Asset accounts start with "1"; liability accounts start with "2".
15060    fn compute_entity_net_assets(
15061        entries: &[JournalEntry],
15062        entity_code: &str,
15063    ) -> rust_decimal::Decimal {
15064        use rust_decimal::Decimal;
15065        let mut asset_net = Decimal::ZERO;
15066        let mut liability_net = Decimal::ZERO;
15067        for je in entries {
15068            if je.header.company_code != entity_code {
15069                continue;
15070            }
15071            for line in &je.lines {
15072                if line.gl_account.starts_with('1') {
15073                    asset_net += line.debit_amount - line.credit_amount;
15074                } else if line.gl_account.starts_with('2') {
15075                    liability_net += line.credit_amount - line.debit_amount;
15076                }
15077            }
15078        }
15079        asset_net - liability_net
15080    }
15081
15082    /// v3.5.1+: Run the statistical validation suite configured in
15083    /// `distributions.validation.tests` over the final amount
15084    /// distribution.  Collects every non-zero line-level amount (debit +
15085    /// credit) and hands it to the runners in
15086    /// `datasynth_core::distributions::validation`.
15087    ///
15088    /// Returns `Ok(None)` when validation is disabled (the default).
15089    /// When `reporting.fail_on_error = true` and any test fails, returns
15090    /// `Err` with a concise message; otherwise attaches the report to
15091    /// the result and lets callers inspect it.
15092    fn phase_statistical_validation(
15093        &self,
15094        entries: &[JournalEntry],
15095    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15096        use datasynth_config::schema::StatisticalTestConfig;
15097        use datasynth_core::distributions::{
15098            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15099            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15100        };
15101        use rust_decimal::prelude::ToPrimitive;
15102
15103        let cfg = &self.config.distributions.validation;
15104        if !cfg.enabled {
15105            return Ok(None);
15106        }
15107
15108        // Collect per-line positive amounts (debit + credit is zero on the
15109        // non-posting side, so this naturally picks the magnitude).
15110        let amounts: Vec<rust_decimal::Decimal> = entries
15111            .iter()
15112            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15113            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15114            .collect();
15115
15116        // v4.1.0+ paired (amount, line_count) per entry for correlation
15117        // checks. Amount per entry is the debit-side total (= credit-side
15118        // total for a balanced entry).
15119        let paired_amount_linecount: Vec<(f64, f64)> = entries
15120            .iter()
15121            .filter_map(|je| {
15122                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15123                if amt > rust_decimal::Decimal::ZERO {
15124                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15125                } else {
15126                    None
15127                }
15128            })
15129            .collect();
15130
15131        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15132        for test_cfg in &cfg.tests {
15133            match test_cfg {
15134                StatisticalTestConfig::BenfordFirstDigit {
15135                    threshold_mad,
15136                    warning_mad,
15137                } => {
15138                    results.push(run_benford_first_digit(
15139                        &amounts,
15140                        *threshold_mad,
15141                        *warning_mad,
15142                    ));
15143                }
15144                StatisticalTestConfig::ChiSquared { bins, significance } => {
15145                    results.push(run_chi_squared(&amounts, *bins, *significance));
15146                }
15147                StatisticalTestConfig::DistributionFit {
15148                    target: _,
15149                    ks_significance,
15150                    method: _,
15151                } => {
15152                    // v3.5.1+: log-uniformity KS check. Target-specific
15153                    // fits against Normal / Exponential land in v4.1.1+.
15154                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15155                }
15156                StatisticalTestConfig::AndersonDarling {
15157                    target: _,
15158                    significance,
15159                } => {
15160                    // v4.1.0+: A*² statistic against log-normal on the
15161                    // log-scale. Other targets follow the same pattern.
15162                    results.push(run_anderson_darling(&amounts, *significance));
15163                }
15164                StatisticalTestConfig::CorrelationCheck {
15165                    expected_correlations,
15166                } => {
15167                    // v4.1.0+: (amount, line_count) is tracked today.
15168                    // Other pairs resolve to Skipped pending richer
15169                    // per-entry attribute collection.
15170                    if expected_correlations.is_empty() {
15171                        results.push(StatisticalTestResult {
15172                            name: "correlation_check".to_string(),
15173                            outcome: TestOutcome::Skipped,
15174                            statistic: 0.0,
15175                            threshold: 0.0,
15176                            message: "no expected correlations declared".to_string(),
15177                        });
15178                    } else {
15179                        for ec in expected_correlations {
15180                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15181                            let is_amount_linecount = (ec.field1 == "amount"
15182                                && ec.field2 == "line_count")
15183                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15184                            if is_amount_linecount {
15185                                let xs: Vec<f64> =
15186                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15187                                let ys: Vec<f64> =
15188                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15189                                results.push(run_correlation_check(
15190                                    &pair_key,
15191                                    &xs,
15192                                    &ys,
15193                                    ec.expected_r,
15194                                    ec.tolerance,
15195                                ));
15196                            } else {
15197                                results.push(StatisticalTestResult {
15198                                    name: format!("correlation_check_{pair_key}"),
15199                                    outcome: TestOutcome::Skipped,
15200                                    statistic: 0.0,
15201                                    threshold: ec.tolerance,
15202                                    message: format!(
15203                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15204                                        ec.field1, ec.field2
15205                                    ),
15206                                });
15207                            }
15208                        }
15209                    }
15210                }
15211            }
15212        }
15213
15214        let report = StatisticalValidationReport {
15215            sample_count: amounts.len(),
15216            results,
15217        };
15218
15219        if cfg.reporting.fail_on_error && !report.all_passed() {
15220            let failed = report.failed_names().join(", ");
15221            return Err(SynthError::validation(format!(
15222                "statistical validation failed: {failed}"
15223            )));
15224        }
15225
15226        Ok(Some(report))
15227    }
15228
15229    /// v3.3.0: analytics-metadata phase.
15230    ///
15231    /// Runs AFTER all JE-adding phases (including Phase 20b's
15232    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15233    /// gated by an individual `analytics_metadata.<flag>` toggle:
15234    ///
15235    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15236    ///    current-period account balances.
15237    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15238    ///    configured `global.industry`.
15239    /// 3. `ManagementReportGenerator` — management-report artefacts.
15240    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15241    fn phase_analytics_metadata(
15242        &mut self,
15243        entries: &[JournalEntry],
15244    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15245        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15246        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15247        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15248        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15249        use std::collections::BTreeMap;
15250
15251        let mut snap = AnalyticsMetadataSnapshot::default();
15252
15253        if !self.phase_config.generate_analytics_metadata {
15254            return Ok(snap);
15255        }
15256
15257        let cfg = &self.config.analytics_metadata;
15258        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15259            .map(|d| d.year())
15260            .unwrap_or(2025);
15261
15262        // ---- 1. Prior-year comparatives ----
15263        if cfg.prior_year {
15264            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15265            for company in &self.config.companies {
15266                // Aggregate current-period balances per account code +
15267                // account name from the entries slice.
15268                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15269                    BTreeMap::new();
15270                for je in entries {
15271                    if je.header.company_code != company.code {
15272                        continue;
15273                    }
15274                    for line in &je.lines {
15275                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15276                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15277                        });
15278                        entry.1 += line.debit_amount - line.credit_amount;
15279                    }
15280                }
15281                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15282                    .into_iter()
15283                    .filter(|(_, (_, bal))| !bal.is_zero())
15284                    .map(|(code, (name, bal))| (code, name, bal))
15285                    .collect();
15286                if !current.is_empty() {
15287                    let comparatives =
15288                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15289                    snap.prior_year_comparatives.extend(comparatives);
15290                }
15291            }
15292            info!(
15293                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15294                snap.prior_year_comparatives.len(),
15295                self.config.companies.len()
15296            );
15297        }
15298
15299        // ---- 2. Industry benchmarks ----
15300        if cfg.industry_benchmark {
15301            use datasynth_core::models::IndustrySector;
15302            let industry = match self.config.global.industry {
15303                IndustrySector::Manufacturing => "manufacturing",
15304                IndustrySector::Retail => "retail",
15305                IndustrySector::FinancialServices => "financial_services",
15306                IndustrySector::Technology => "technology",
15307                IndustrySector::Healthcare => "healthcare",
15308                _ => "other",
15309            };
15310            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15311            let benchmarks = gen.generate(industry, fiscal_year);
15312            info!(
15313                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15314                benchmarks.len()
15315            );
15316            snap.industry_benchmarks = benchmarks;
15317        }
15318
15319        // ---- 3. Management reports ----
15320        if cfg.management_reports {
15321            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15322            let period_months = self.config.global.period_months;
15323            for company in &self.config.companies {
15324                let reports =
15325                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15326                snap.management_reports.extend(reports);
15327            }
15328            info!(
15329                "v3.3.0 analytics: {} management reports across {} companies",
15330                snap.management_reports.len(),
15331                self.config.companies.len()
15332            );
15333        }
15334
15335        // ---- 4. Drift-event labels ----
15336        if cfg.drift_events {
15337            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15338                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15339            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15340                .unwrap_or(fallback_start);
15341            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15342            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15343            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15344            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15345            snap.drift_events = drifts;
15346        }
15347        // `entries` parameter reserved for future JE-aware drift detection
15348        let _ = entries;
15349
15350        Ok(snap)
15351    }
15352}
15353
15354/// Get the directory name for a graph export format.
15355fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15356    match format {
15357        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15358        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15359        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15360        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15361        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15362    }
15363}
15364
15365/// Aggregate journal entry lines into per-account trial balance rows.
15366///
15367/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15368/// debit/credit totals and a net balance (debit minus credit).
15369fn compute_trial_balance_entries(
15370    entries: &[JournalEntry],
15371    entity_code: &str,
15372    fiscal_year: i32,
15373    coa: Option<&ChartOfAccounts>,
15374) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15375    use std::collections::BTreeMap;
15376
15377    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15378        BTreeMap::new();
15379
15380    for je in entries {
15381        for line in &je.lines {
15382            let entry = balances.entry(line.account_code.clone()).or_default();
15383            entry.0 += line.debit_amount;
15384            entry.1 += line.credit_amount;
15385        }
15386    }
15387
15388    balances
15389        .into_iter()
15390        .map(
15391            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15392                account_description: coa
15393                    .and_then(|c| c.get_account(&account_code))
15394                    .map(|a| a.description().to_string())
15395                    .unwrap_or_else(|| account_code.clone()),
15396                account_code,
15397                debit_balance: debit,
15398                credit_balance: credit,
15399                net_balance: debit - credit,
15400                entity_code: entity_code.to_string(),
15401                period: format!("FY{}", fiscal_year),
15402            },
15403        )
15404        .collect()
15405}
15406
15407#[cfg(test)]
15408#[allow(clippy::unwrap_used)]
15409mod tests {
15410    use super::*;
15411    use datasynth_config::schema::*;
15412
15413    fn create_test_config() -> GeneratorConfig {
15414        GeneratorConfig {
15415            global: GlobalConfig {
15416                industry: IndustrySector::Manufacturing,
15417                start_date: "2024-01-01".to_string(),
15418                period_months: 1,
15419                seed: Some(42),
15420                parallel: false,
15421                group_currency: "USD".to_string(),
15422                presentation_currency: None,
15423                worker_threads: 0,
15424                memory_limit_mb: 0,
15425                fiscal_year_months: None,
15426            },
15427            companies: vec![CompanyConfig {
15428                code: "1000".to_string(),
15429                name: "Test Company".to_string(),
15430                currency: "USD".to_string(),
15431                functional_currency: None,
15432                country: "US".to_string(),
15433                annual_transaction_volume: TransactionVolume::TenK,
15434                volume_weight: 1.0,
15435                fiscal_year_variant: "K4".to_string(),
15436            }],
15437            chart_of_accounts: ChartOfAccountsConfig {
15438                complexity: CoAComplexity::Small,
15439                industry_specific: true,
15440                custom_accounts: None,
15441                min_hierarchy_depth: 2,
15442                max_hierarchy_depth: 4,
15443            },
15444            transactions: TransactionConfig::default(),
15445            output: OutputConfig::default(),
15446            fraud: FraudConfig::default(),
15447            internal_controls: InternalControlsConfig::default(),
15448            business_processes: BusinessProcessConfig::default(),
15449            user_personas: UserPersonaConfig::default(),
15450            templates: TemplateConfig::default(),
15451            approval: ApprovalConfig::default(),
15452            departments: DepartmentConfig::default(),
15453            master_data: MasterDataConfig::default(),
15454            document_flows: DocumentFlowConfig::default(),
15455            intercompany: IntercompanyConfig::default(),
15456            balance: BalanceConfig::default(),
15457            ocpm: OcpmConfig::default(),
15458            audit: AuditGenerationConfig::default(),
15459            banking: datasynth_banking::BankingConfig::default(),
15460            data_quality: DataQualitySchemaConfig::default(),
15461            scenario: ScenarioConfig::default(),
15462            temporal: TemporalDriftConfig::default(),
15463            graph_export: GraphExportConfig::default(),
15464            streaming: StreamingSchemaConfig::default(),
15465            rate_limit: RateLimitSchemaConfig::default(),
15466            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15467            relationships: RelationshipSchemaConfig::default(),
15468            accounting_standards: AccountingStandardsConfig::default(),
15469            audit_standards: AuditStandardsConfig::default(),
15470            distributions: Default::default(),
15471            temporal_patterns: Default::default(),
15472            vendor_network: VendorNetworkSchemaConfig::default(),
15473            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15474            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15475            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15476            organizational_events: OrganizationalEventsSchemaConfig::default(),
15477            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15478            market_drift: MarketDriftSchemaConfig::default(),
15479            drift_labeling: DriftLabelingSchemaConfig::default(),
15480            anomaly_injection: Default::default(),
15481            industry_specific: Default::default(),
15482            fingerprint_privacy: Default::default(),
15483            quality_gates: Default::default(),
15484            compliance: Default::default(),
15485            webhooks: Default::default(),
15486            llm: Default::default(),
15487            diffusion: Default::default(),
15488            causal: Default::default(),
15489            source_to_pay: Default::default(),
15490            financial_reporting: Default::default(),
15491            hr: Default::default(),
15492            manufacturing: Default::default(),
15493            sales_quotes: Default::default(),
15494            tax: Default::default(),
15495            treasury: Default::default(),
15496            project_accounting: Default::default(),
15497            esg: Default::default(),
15498            country_packs: None,
15499            scenarios: Default::default(),
15500            session: Default::default(),
15501            compliance_regulations: Default::default(),
15502            analytics_metadata: Default::default(),
15503        }
15504    }
15505
15506    #[test]
15507    fn test_enhanced_orchestrator_creation() {
15508        let config = create_test_config();
15509        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15510        assert!(orchestrator.is_ok());
15511    }
15512
15513    #[test]
15514    fn test_minimal_generation() {
15515        let config = create_test_config();
15516        let phase_config = PhaseConfig {
15517            generate_master_data: false,
15518            generate_document_flows: false,
15519            generate_journal_entries: true,
15520            inject_anomalies: false,
15521            show_progress: false,
15522            ..Default::default()
15523        };
15524
15525        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15526        let result = orchestrator.generate();
15527
15528        assert!(result.is_ok());
15529        let result = result.unwrap();
15530        assert!(!result.journal_entries.is_empty());
15531    }
15532
15533    #[test]
15534    fn test_master_data_generation() {
15535        let config = create_test_config();
15536        let phase_config = PhaseConfig {
15537            generate_master_data: true,
15538            generate_document_flows: false,
15539            generate_journal_entries: false,
15540            inject_anomalies: false,
15541            show_progress: false,
15542            vendors_per_company: 5,
15543            customers_per_company: 5,
15544            materials_per_company: 10,
15545            assets_per_company: 5,
15546            employees_per_company: 10,
15547            ..Default::default()
15548        };
15549
15550        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15551        let result = orchestrator.generate().unwrap();
15552
15553        assert!(!result.master_data.vendors.is_empty());
15554        assert!(!result.master_data.customers.is_empty());
15555        assert!(!result.master_data.materials.is_empty());
15556    }
15557
15558    #[test]
15559    fn test_document_flow_generation() {
15560        let config = create_test_config();
15561        let phase_config = PhaseConfig {
15562            generate_master_data: true,
15563            generate_document_flows: true,
15564            generate_journal_entries: false,
15565            inject_anomalies: false,
15566            inject_data_quality: false,
15567            validate_balances: false,
15568            generate_ocpm_events: false,
15569            show_progress: false,
15570            vendors_per_company: 5,
15571            customers_per_company: 5,
15572            materials_per_company: 10,
15573            assets_per_company: 5,
15574            employees_per_company: 10,
15575            p2p_chains: 5,
15576            o2c_chains: 5,
15577            ..Default::default()
15578        };
15579
15580        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15581        let result = orchestrator.generate().unwrap();
15582
15583        // Should have generated P2P and O2C chains
15584        assert!(!result.document_flows.p2p_chains.is_empty());
15585        assert!(!result.document_flows.o2c_chains.is_empty());
15586
15587        // Flattened documents should be populated
15588        assert!(!result.document_flows.purchase_orders.is_empty());
15589        assert!(!result.document_flows.sales_orders.is_empty());
15590    }
15591
15592    #[test]
15593    fn test_anomaly_injection() {
15594        let config = create_test_config();
15595        let phase_config = PhaseConfig {
15596            generate_master_data: false,
15597            generate_document_flows: false,
15598            generate_journal_entries: true,
15599            inject_anomalies: true,
15600            show_progress: false,
15601            ..Default::default()
15602        };
15603
15604        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15605        let result = orchestrator.generate().unwrap();
15606
15607        // Should have journal entries
15608        assert!(!result.journal_entries.is_empty());
15609
15610        // With ~833 entries and 2% rate, expect some anomalies
15611        // Note: This is probabilistic, so we just verify the structure exists
15612        assert!(result.anomaly_labels.summary.is_some());
15613    }
15614
15615    #[test]
15616    fn test_full_generation_pipeline() {
15617        let config = create_test_config();
15618        let phase_config = PhaseConfig {
15619            generate_master_data: true,
15620            generate_document_flows: true,
15621            generate_journal_entries: true,
15622            inject_anomalies: false,
15623            inject_data_quality: false,
15624            validate_balances: true,
15625            generate_ocpm_events: false,
15626            show_progress: false,
15627            vendors_per_company: 3,
15628            customers_per_company: 3,
15629            materials_per_company: 5,
15630            assets_per_company: 3,
15631            employees_per_company: 5,
15632            p2p_chains: 3,
15633            o2c_chains: 3,
15634            ..Default::default()
15635        };
15636
15637        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15638        let result = orchestrator.generate().unwrap();
15639
15640        // All phases should have results
15641        assert!(!result.master_data.vendors.is_empty());
15642        assert!(!result.master_data.customers.is_empty());
15643        assert!(!result.document_flows.p2p_chains.is_empty());
15644        assert!(!result.document_flows.o2c_chains.is_empty());
15645        assert!(!result.journal_entries.is_empty());
15646        assert!(result.statistics.accounts_count > 0);
15647
15648        // Subledger linking should have run
15649        assert!(!result.subledger.ap_invoices.is_empty());
15650        assert!(!result.subledger.ar_invoices.is_empty());
15651
15652        // Balance validation should have run
15653        assert!(result.balance_validation.validated);
15654        assert!(result.balance_validation.entries_processed > 0);
15655    }
15656
15657    #[test]
15658    fn test_subledger_linking() {
15659        let config = create_test_config();
15660        let phase_config = PhaseConfig {
15661            generate_master_data: true,
15662            generate_document_flows: true,
15663            generate_journal_entries: false,
15664            inject_anomalies: false,
15665            inject_data_quality: false,
15666            validate_balances: false,
15667            generate_ocpm_events: false,
15668            show_progress: false,
15669            vendors_per_company: 5,
15670            customers_per_company: 5,
15671            materials_per_company: 10,
15672            assets_per_company: 3,
15673            employees_per_company: 5,
15674            p2p_chains: 5,
15675            o2c_chains: 5,
15676            ..Default::default()
15677        };
15678
15679        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15680        let result = orchestrator.generate().unwrap();
15681
15682        // Should have document flows
15683        assert!(!result.document_flows.vendor_invoices.is_empty());
15684        assert!(!result.document_flows.customer_invoices.is_empty());
15685
15686        // Subledger should be linked from document flows
15687        assert!(!result.subledger.ap_invoices.is_empty());
15688        assert!(!result.subledger.ar_invoices.is_empty());
15689
15690        // AP invoices count should match vendor invoices count
15691        assert_eq!(
15692            result.subledger.ap_invoices.len(),
15693            result.document_flows.vendor_invoices.len()
15694        );
15695
15696        // AR invoices count should match customer invoices count
15697        assert_eq!(
15698            result.subledger.ar_invoices.len(),
15699            result.document_flows.customer_invoices.len()
15700        );
15701
15702        // Statistics should reflect subledger counts
15703        assert_eq!(
15704            result.statistics.ap_invoice_count,
15705            result.subledger.ap_invoices.len()
15706        );
15707        assert_eq!(
15708            result.statistics.ar_invoice_count,
15709            result.subledger.ar_invoices.len()
15710        );
15711    }
15712
15713    #[test]
15714    fn test_balance_validation() {
15715        let config = create_test_config();
15716        let phase_config = PhaseConfig {
15717            generate_master_data: false,
15718            generate_document_flows: false,
15719            generate_journal_entries: true,
15720            inject_anomalies: false,
15721            validate_balances: true,
15722            show_progress: false,
15723            ..Default::default()
15724        };
15725
15726        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15727        let result = orchestrator.generate().unwrap();
15728
15729        // Balance validation should run
15730        assert!(result.balance_validation.validated);
15731        assert!(result.balance_validation.entries_processed > 0);
15732
15733        // Generated JEs should be balanced (no unbalanced entries)
15734        assert!(!result.balance_validation.has_unbalanced_entries);
15735
15736        // Total debits should equal total credits
15737        assert_eq!(
15738            result.balance_validation.total_debits,
15739            result.balance_validation.total_credits
15740        );
15741    }
15742
15743    #[test]
15744    fn test_statistics_accuracy() {
15745        let config = create_test_config();
15746        let phase_config = PhaseConfig {
15747            generate_master_data: true,
15748            generate_document_flows: false,
15749            generate_journal_entries: true,
15750            inject_anomalies: false,
15751            show_progress: false,
15752            vendors_per_company: 10,
15753            customers_per_company: 20,
15754            materials_per_company: 15,
15755            assets_per_company: 5,
15756            employees_per_company: 8,
15757            ..Default::default()
15758        };
15759
15760        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15761        let result = orchestrator.generate().unwrap();
15762
15763        // Statistics should match actual data
15764        assert_eq!(
15765            result.statistics.vendor_count,
15766            result.master_data.vendors.len()
15767        );
15768        assert_eq!(
15769            result.statistics.customer_count,
15770            result.master_data.customers.len()
15771        );
15772        assert_eq!(
15773            result.statistics.material_count,
15774            result.master_data.materials.len()
15775        );
15776        assert_eq!(
15777            result.statistics.total_entries as usize,
15778            result.journal_entries.len()
15779        );
15780    }
15781
15782    #[test]
15783    fn test_phase_config_defaults() {
15784        let config = PhaseConfig::default();
15785        assert!(config.generate_master_data);
15786        assert!(config.generate_document_flows);
15787        assert!(config.generate_journal_entries);
15788        assert!(!config.inject_anomalies);
15789        assert!(config.validate_balances);
15790        assert!(config.show_progress);
15791        assert!(config.vendors_per_company > 0);
15792        assert!(config.customers_per_company > 0);
15793    }
15794
15795    #[test]
15796    fn test_get_coa_before_generation() {
15797        let config = create_test_config();
15798        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15799
15800        // Before generation, CoA should be None
15801        assert!(orchestrator.get_coa().is_none());
15802    }
15803
15804    #[test]
15805    fn test_get_coa_after_generation() {
15806        let config = create_test_config();
15807        let phase_config = PhaseConfig {
15808            generate_master_data: false,
15809            generate_document_flows: false,
15810            generate_journal_entries: true,
15811            inject_anomalies: false,
15812            show_progress: false,
15813            ..Default::default()
15814        };
15815
15816        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15817        let _ = orchestrator.generate().unwrap();
15818
15819        // After generation, CoA should be available
15820        assert!(orchestrator.get_coa().is_some());
15821    }
15822
15823    #[test]
15824    fn test_get_master_data() {
15825        let config = create_test_config();
15826        let phase_config = PhaseConfig {
15827            generate_master_data: true,
15828            generate_document_flows: false,
15829            generate_journal_entries: false,
15830            inject_anomalies: false,
15831            show_progress: false,
15832            vendors_per_company: 5,
15833            customers_per_company: 5,
15834            materials_per_company: 5,
15835            assets_per_company: 5,
15836            employees_per_company: 5,
15837            ..Default::default()
15838        };
15839
15840        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15841        let result = orchestrator.generate().unwrap();
15842
15843        // After generate(), master_data is moved into the result
15844        assert!(!result.master_data.vendors.is_empty());
15845    }
15846
15847    #[test]
15848    fn test_with_progress_builder() {
15849        let config = create_test_config();
15850        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15851            .unwrap()
15852            .with_progress(false);
15853
15854        // Should still work without progress
15855        assert!(!orchestrator.phase_config.show_progress);
15856    }
15857
15858    #[test]
15859    fn test_multi_company_generation() {
15860        let mut config = create_test_config();
15861        config.companies.push(CompanyConfig {
15862            code: "2000".to_string(),
15863            name: "Subsidiary".to_string(),
15864            currency: "EUR".to_string(),
15865            functional_currency: None,
15866            country: "DE".to_string(),
15867            annual_transaction_volume: TransactionVolume::TenK,
15868            volume_weight: 0.5,
15869            fiscal_year_variant: "K4".to_string(),
15870        });
15871
15872        let phase_config = PhaseConfig {
15873            generate_master_data: true,
15874            generate_document_flows: false,
15875            generate_journal_entries: true,
15876            inject_anomalies: false,
15877            show_progress: false,
15878            vendors_per_company: 5,
15879            customers_per_company: 5,
15880            materials_per_company: 5,
15881            assets_per_company: 5,
15882            employees_per_company: 5,
15883            ..Default::default()
15884        };
15885
15886        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15887        let result = orchestrator.generate().unwrap();
15888
15889        // Should have master data for both companies
15890        assert!(result.statistics.vendor_count >= 10); // 5 per company
15891        assert!(result.statistics.customer_count >= 10);
15892        assert!(result.statistics.companies_count == 2);
15893    }
15894
15895    #[test]
15896    fn test_empty_master_data_skips_document_flows() {
15897        let config = create_test_config();
15898        let phase_config = PhaseConfig {
15899            generate_master_data: false,   // Skip master data
15900            generate_document_flows: true, // Try to generate flows
15901            generate_journal_entries: false,
15902            inject_anomalies: false,
15903            show_progress: false,
15904            ..Default::default()
15905        };
15906
15907        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15908        let result = orchestrator.generate().unwrap();
15909
15910        // Without master data, document flows should be empty
15911        assert!(result.document_flows.p2p_chains.is_empty());
15912        assert!(result.document_flows.o2c_chains.is_empty());
15913    }
15914
15915    #[test]
15916    fn test_journal_entry_line_item_count() {
15917        let config = create_test_config();
15918        let phase_config = PhaseConfig {
15919            generate_master_data: false,
15920            generate_document_flows: false,
15921            generate_journal_entries: true,
15922            inject_anomalies: false,
15923            show_progress: false,
15924            ..Default::default()
15925        };
15926
15927        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15928        let result = orchestrator.generate().unwrap();
15929
15930        // Total line items should match sum of all entry line counts
15931        let calculated_line_items: u64 = result
15932            .journal_entries
15933            .iter()
15934            .map(|e| e.line_count() as u64)
15935            .sum();
15936        assert_eq!(result.statistics.total_line_items, calculated_line_items);
15937    }
15938
15939    #[test]
15940    fn test_audit_generation() {
15941        let config = create_test_config();
15942        let phase_config = PhaseConfig {
15943            generate_master_data: false,
15944            generate_document_flows: false,
15945            generate_journal_entries: true,
15946            inject_anomalies: false,
15947            show_progress: false,
15948            generate_audit: true,
15949            audit_engagements: 2,
15950            workpapers_per_engagement: 5,
15951            evidence_per_workpaper: 2,
15952            risks_per_engagement: 3,
15953            findings_per_engagement: 2,
15954            judgments_per_engagement: 2,
15955            ..Default::default()
15956        };
15957
15958        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15959        let result = orchestrator.generate().unwrap();
15960
15961        // Should have generated audit data
15962        assert_eq!(result.audit.engagements.len(), 2);
15963        assert!(!result.audit.workpapers.is_empty());
15964        assert!(!result.audit.evidence.is_empty());
15965        assert!(!result.audit.risk_assessments.is_empty());
15966        assert!(!result.audit.findings.is_empty());
15967        assert!(!result.audit.judgments.is_empty());
15968
15969        // New ISA entity collections should also be populated
15970        assert!(
15971            !result.audit.confirmations.is_empty(),
15972            "ISA 505 confirmations should be generated"
15973        );
15974        assert!(
15975            !result.audit.confirmation_responses.is_empty(),
15976            "ISA 505 confirmation responses should be generated"
15977        );
15978        assert!(
15979            !result.audit.procedure_steps.is_empty(),
15980            "ISA 330 procedure steps should be generated"
15981        );
15982        // Samples may or may not be generated depending on workpaper sampling methods
15983        assert!(
15984            !result.audit.analytical_results.is_empty(),
15985            "ISA 520 analytical procedures should be generated"
15986        );
15987        assert!(
15988            !result.audit.ia_functions.is_empty(),
15989            "ISA 610 IA functions should be generated (one per engagement)"
15990        );
15991        assert!(
15992            !result.audit.related_parties.is_empty(),
15993            "ISA 550 related parties should be generated"
15994        );
15995
15996        // Statistics should match
15997        assert_eq!(
15998            result.statistics.audit_engagement_count,
15999            result.audit.engagements.len()
16000        );
16001        assert_eq!(
16002            result.statistics.audit_workpaper_count,
16003            result.audit.workpapers.len()
16004        );
16005        assert_eq!(
16006            result.statistics.audit_evidence_count,
16007            result.audit.evidence.len()
16008        );
16009        assert_eq!(
16010            result.statistics.audit_risk_count,
16011            result.audit.risk_assessments.len()
16012        );
16013        assert_eq!(
16014            result.statistics.audit_finding_count,
16015            result.audit.findings.len()
16016        );
16017        assert_eq!(
16018            result.statistics.audit_judgment_count,
16019            result.audit.judgments.len()
16020        );
16021        assert_eq!(
16022            result.statistics.audit_confirmation_count,
16023            result.audit.confirmations.len()
16024        );
16025        assert_eq!(
16026            result.statistics.audit_confirmation_response_count,
16027            result.audit.confirmation_responses.len()
16028        );
16029        assert_eq!(
16030            result.statistics.audit_procedure_step_count,
16031            result.audit.procedure_steps.len()
16032        );
16033        assert_eq!(
16034            result.statistics.audit_sample_count,
16035            result.audit.samples.len()
16036        );
16037        assert_eq!(
16038            result.statistics.audit_analytical_result_count,
16039            result.audit.analytical_results.len()
16040        );
16041        assert_eq!(
16042            result.statistics.audit_ia_function_count,
16043            result.audit.ia_functions.len()
16044        );
16045        assert_eq!(
16046            result.statistics.audit_ia_report_count,
16047            result.audit.ia_reports.len()
16048        );
16049        assert_eq!(
16050            result.statistics.audit_related_party_count,
16051            result.audit.related_parties.len()
16052        );
16053        assert_eq!(
16054            result.statistics.audit_related_party_transaction_count,
16055            result.audit.related_party_transactions.len()
16056        );
16057    }
16058
16059    #[test]
16060    fn test_new_phases_disabled_by_default() {
16061        let config = create_test_config();
16062        // Verify new config fields default to disabled
16063        assert!(!config.llm.enabled);
16064        assert!(!config.diffusion.enabled);
16065        assert!(!config.causal.enabled);
16066
16067        let phase_config = PhaseConfig {
16068            generate_master_data: false,
16069            generate_document_flows: false,
16070            generate_journal_entries: true,
16071            inject_anomalies: false,
16072            show_progress: false,
16073            ..Default::default()
16074        };
16075
16076        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16077        let result = orchestrator.generate().unwrap();
16078
16079        // All new phase statistics should be zero when disabled
16080        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16081        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16082        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16083        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16084        assert_eq!(result.statistics.causal_generation_ms, 0);
16085        assert_eq!(result.statistics.causal_samples_generated, 0);
16086        assert!(result.statistics.causal_validation_passed.is_none());
16087        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16088        assert!(result.counterfactual_pairs.is_empty());
16089    }
16090
16091    #[test]
16092    fn test_counterfactual_generation_enabled() {
16093        let config = create_test_config();
16094        let phase_config = PhaseConfig {
16095            generate_master_data: false,
16096            generate_document_flows: false,
16097            generate_journal_entries: true,
16098            inject_anomalies: false,
16099            show_progress: false,
16100            generate_counterfactuals: true,
16101            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16102            ..Default::default()
16103        };
16104
16105        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16106        let result = orchestrator.generate().unwrap();
16107
16108        // With JE generation enabled, counterfactual pairs should be generated
16109        if !result.journal_entries.is_empty() {
16110            assert_eq!(
16111                result.counterfactual_pairs.len(),
16112                result.journal_entries.len()
16113            );
16114            assert_eq!(
16115                result.statistics.counterfactual_pair_count,
16116                result.journal_entries.len()
16117            );
16118            // Each pair should have a distinct pair_id
16119            let ids: std::collections::HashSet<_> = result
16120                .counterfactual_pairs
16121                .iter()
16122                .map(|p| p.pair_id.clone())
16123                .collect();
16124            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16125        }
16126    }
16127
16128    #[test]
16129    fn test_llm_enrichment_enabled() {
16130        let mut config = create_test_config();
16131        config.llm.enabled = true;
16132        config.llm.max_vendor_enrichments = 3;
16133
16134        let phase_config = PhaseConfig {
16135            generate_master_data: true,
16136            generate_document_flows: false,
16137            generate_journal_entries: false,
16138            inject_anomalies: false,
16139            show_progress: false,
16140            vendors_per_company: 5,
16141            customers_per_company: 3,
16142            materials_per_company: 3,
16143            assets_per_company: 3,
16144            employees_per_company: 3,
16145            ..Default::default()
16146        };
16147
16148        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16149        let result = orchestrator.generate().unwrap();
16150
16151        // LLM enrichment should have run
16152        assert!(result.statistics.llm_vendors_enriched > 0);
16153        assert!(result.statistics.llm_vendors_enriched <= 3);
16154    }
16155
16156    #[test]
16157    fn test_diffusion_enhancement_enabled() {
16158        let mut config = create_test_config();
16159        config.diffusion.enabled = true;
16160        config.diffusion.n_steps = 50;
16161        config.diffusion.sample_size = 20;
16162
16163        let phase_config = PhaseConfig {
16164            generate_master_data: false,
16165            generate_document_flows: false,
16166            generate_journal_entries: true,
16167            inject_anomalies: false,
16168            show_progress: false,
16169            ..Default::default()
16170        };
16171
16172        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16173        let result = orchestrator.generate().unwrap();
16174
16175        // Diffusion phase should have generated samples
16176        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16177    }
16178
16179    #[test]
16180    fn test_causal_overlay_enabled() {
16181        let mut config = create_test_config();
16182        config.causal.enabled = true;
16183        config.causal.template = "fraud_detection".to_string();
16184        config.causal.sample_size = 100;
16185        config.causal.validate = true;
16186
16187        let phase_config = PhaseConfig {
16188            generate_master_data: false,
16189            generate_document_flows: false,
16190            generate_journal_entries: true,
16191            inject_anomalies: false,
16192            show_progress: false,
16193            ..Default::default()
16194        };
16195
16196        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16197        let result = orchestrator.generate().unwrap();
16198
16199        // Causal phase should have generated samples
16200        assert_eq!(result.statistics.causal_samples_generated, 100);
16201        // Validation should have run
16202        assert!(result.statistics.causal_validation_passed.is_some());
16203    }
16204
16205    #[test]
16206    fn test_causal_overlay_revenue_cycle_template() {
16207        let mut config = create_test_config();
16208        config.causal.enabled = true;
16209        config.causal.template = "revenue_cycle".to_string();
16210        config.causal.sample_size = 50;
16211        config.causal.validate = false;
16212
16213        let phase_config = PhaseConfig {
16214            generate_master_data: false,
16215            generate_document_flows: false,
16216            generate_journal_entries: true,
16217            inject_anomalies: false,
16218            show_progress: false,
16219            ..Default::default()
16220        };
16221
16222        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16223        let result = orchestrator.generate().unwrap();
16224
16225        // Causal phase should have generated samples
16226        assert_eq!(result.statistics.causal_samples_generated, 50);
16227        // Validation was disabled
16228        assert!(result.statistics.causal_validation_passed.is_none());
16229    }
16230
16231    #[test]
16232    fn test_all_new_phases_enabled_together() {
16233        let mut config = create_test_config();
16234        config.llm.enabled = true;
16235        config.llm.max_vendor_enrichments = 2;
16236        config.diffusion.enabled = true;
16237        config.diffusion.n_steps = 20;
16238        config.diffusion.sample_size = 10;
16239        config.causal.enabled = true;
16240        config.causal.sample_size = 50;
16241        config.causal.validate = true;
16242
16243        let phase_config = PhaseConfig {
16244            generate_master_data: true,
16245            generate_document_flows: false,
16246            generate_journal_entries: true,
16247            inject_anomalies: false,
16248            show_progress: false,
16249            vendors_per_company: 5,
16250            customers_per_company: 3,
16251            materials_per_company: 3,
16252            assets_per_company: 3,
16253            employees_per_company: 3,
16254            ..Default::default()
16255        };
16256
16257        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16258        let result = orchestrator.generate().unwrap();
16259
16260        // All three phases should have run
16261        assert!(result.statistics.llm_vendors_enriched > 0);
16262        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16263        assert_eq!(result.statistics.causal_samples_generated, 50);
16264        assert!(result.statistics.causal_validation_passed.is_some());
16265    }
16266
16267    #[test]
16268    fn test_statistics_serialization_with_new_fields() {
16269        let stats = EnhancedGenerationStatistics {
16270            total_entries: 100,
16271            total_line_items: 500,
16272            llm_enrichment_ms: 42,
16273            llm_vendors_enriched: 10,
16274            diffusion_enhancement_ms: 100,
16275            diffusion_samples_generated: 50,
16276            causal_generation_ms: 200,
16277            causal_samples_generated: 100,
16278            causal_validation_passed: Some(true),
16279            ..Default::default()
16280        };
16281
16282        let json = serde_json::to_string(&stats).unwrap();
16283        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16284
16285        assert_eq!(deserialized.llm_enrichment_ms, 42);
16286        assert_eq!(deserialized.llm_vendors_enriched, 10);
16287        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16288        assert_eq!(deserialized.diffusion_samples_generated, 50);
16289        assert_eq!(deserialized.causal_generation_ms, 200);
16290        assert_eq!(deserialized.causal_samples_generated, 100);
16291        assert_eq!(deserialized.causal_validation_passed, Some(true));
16292    }
16293
16294    #[test]
16295    fn test_statistics_backward_compat_deserialization() {
16296        // Old JSON without the new fields should still deserialize
16297        let old_json = r#"{
16298            "total_entries": 100,
16299            "total_line_items": 500,
16300            "accounts_count": 50,
16301            "companies_count": 1,
16302            "period_months": 12,
16303            "vendor_count": 10,
16304            "customer_count": 20,
16305            "material_count": 15,
16306            "asset_count": 5,
16307            "employee_count": 8,
16308            "p2p_chain_count": 5,
16309            "o2c_chain_count": 5,
16310            "ap_invoice_count": 5,
16311            "ar_invoice_count": 5,
16312            "ocpm_event_count": 0,
16313            "ocpm_object_count": 0,
16314            "ocpm_case_count": 0,
16315            "audit_engagement_count": 0,
16316            "audit_workpaper_count": 0,
16317            "audit_evidence_count": 0,
16318            "audit_risk_count": 0,
16319            "audit_finding_count": 0,
16320            "audit_judgment_count": 0,
16321            "anomalies_injected": 0,
16322            "data_quality_issues": 0,
16323            "banking_customer_count": 0,
16324            "banking_account_count": 0,
16325            "banking_transaction_count": 0,
16326            "banking_suspicious_count": 0,
16327            "graph_export_count": 0,
16328            "graph_node_count": 0,
16329            "graph_edge_count": 0
16330        }"#;
16331
16332        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16333
16334        // New fields should default to 0 / None
16335        assert_eq!(stats.llm_enrichment_ms, 0);
16336        assert_eq!(stats.llm_vendors_enriched, 0);
16337        assert_eq!(stats.diffusion_enhancement_ms, 0);
16338        assert_eq!(stats.diffusion_samples_generated, 0);
16339        assert_eq!(stats.causal_generation_ms, 0);
16340        assert_eq!(stats.causal_samples_generated, 0);
16341        assert!(stats.causal_validation_passed.is_none());
16342    }
16343}