Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use rayon::prelude::*;
178
179// ============================================================================
180// Configuration Conversion Functions
181// ============================================================================
182
183/// Convert P2P flow config from schema to generator config.
184/// v4.4.1 — build a `DataQualityStats` with only `total_records`
185/// populated to `n_entries`. Used when the data-quality phase is
186/// skipped (by config or resource pressure) so downstream consumers
187/// can still see the denominator. Before v4.4.1 the writer emitted
188/// `total_records: 0` in those cases, which the SDK team flagged as
189/// indistinguishable from "ran but processed nothing".
190fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
191    #[allow(clippy::field_reassign_with_default)]
192    {
193        let mut s = DataQualityStats::default();
194        s.total_records = n_entries;
195        s.missing_values.total_records = n_entries;
196        s.format_variations.total_processed = n_entries;
197        s.duplicates.total_processed = n_entries;
198        s
199    }
200}
201
202fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
203    let payment_behavior = &schema_config.payment_behavior;
204    let late_dist = &payment_behavior.late_payment_days_distribution;
205
206    P2PGeneratorConfig {
207        three_way_match_rate: schema_config.three_way_match_rate,
208        partial_delivery_rate: schema_config.partial_delivery_rate,
209        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
210        price_variance_rate: schema_config.price_variance_rate,
211        max_price_variance_percent: schema_config.max_price_variance_percent,
212        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
213        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
214        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
215        payment_method_distribution: vec![
216            (PaymentMethod::BankTransfer, 0.60),
217            (PaymentMethod::Check, 0.25),
218            (PaymentMethod::Wire, 0.10),
219            (PaymentMethod::CreditCard, 0.05),
220        ],
221        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
222        payment_behavior: P2PPaymentBehavior {
223            late_payment_rate: payment_behavior.late_payment_rate,
224            late_payment_distribution: LatePaymentDistribution {
225                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
226                late_8_to_14: late_dist.late_8_to_14,
227                very_late_15_to_30: late_dist.very_late_15_to_30,
228                severely_late_31_to_60: late_dist.severely_late_31_to_60,
229                extremely_late_over_60: late_dist.extremely_late_over_60,
230            },
231            partial_payment_rate: payment_behavior.partial_payment_rate,
232            payment_correction_rate: payment_behavior.payment_correction_rate,
233            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
234        },
235    }
236}
237
238/// Convert O2C flow config from schema to generator config.
239fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
240    let payment_behavior = &schema_config.payment_behavior;
241
242    O2CGeneratorConfig {
243        credit_check_failure_rate: schema_config.credit_check_failure_rate,
244        partial_shipment_rate: schema_config.partial_shipment_rate,
245        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
246        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
247        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
248        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
249        bad_debt_rate: schema_config.bad_debt_rate,
250        returns_rate: schema_config.return_rate,
251        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
252        payment_method_distribution: vec![
253            (PaymentMethod::BankTransfer, 0.50),
254            (PaymentMethod::Check, 0.30),
255            (PaymentMethod::Wire, 0.15),
256            (PaymentMethod::CreditCard, 0.05),
257        ],
258        payment_behavior: O2CPaymentBehavior {
259            partial_payment_rate: payment_behavior.partial_payments.rate,
260            short_payment_rate: payment_behavior.short_payments.rate,
261            max_short_percent: payment_behavior.short_payments.max_short_percent,
262            on_account_rate: payment_behavior.on_account_payments.rate,
263            payment_correction_rate: payment_behavior.payment_corrections.rate,
264            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
265        },
266    }
267}
268
269/// Configuration for which generation phases to run.
270#[derive(Debug, Clone)]
271pub struct PhaseConfig {
272    /// Generate master data (vendors, customers, materials, assets, employees).
273    pub generate_master_data: bool,
274    /// Generate document flows (P2P, O2C).
275    pub generate_document_flows: bool,
276    /// Generate OCPM events from document flows.
277    pub generate_ocpm_events: bool,
278    /// Generate journal entries.
279    pub generate_journal_entries: bool,
280    /// Inject anomalies.
281    pub inject_anomalies: bool,
282    /// Inject data quality variations (typos, missing values, format variations).
283    pub inject_data_quality: bool,
284    /// Validate balance sheet equation after generation.
285    pub validate_balances: bool,
286    /// Show progress bars.
287    pub show_progress: bool,
288    /// Number of vendors to generate per company.
289    pub vendors_per_company: usize,
290    /// Number of customers to generate per company.
291    pub customers_per_company: usize,
292    /// Number of materials to generate per company.
293    pub materials_per_company: usize,
294    /// Number of assets to generate per company.
295    pub assets_per_company: usize,
296    /// Number of employees to generate per company.
297    pub employees_per_company: usize,
298    /// Number of P2P chains to generate.
299    pub p2p_chains: usize,
300    /// Number of O2C chains to generate.
301    pub o2c_chains: usize,
302    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
303    pub generate_audit: bool,
304    /// Number of audit engagements to generate.
305    pub audit_engagements: usize,
306    /// Number of workpapers per engagement.
307    pub workpapers_per_engagement: usize,
308    /// Number of evidence items per workpaper.
309    pub evidence_per_workpaper: usize,
310    /// Number of risk assessments per engagement.
311    pub risks_per_engagement: usize,
312    /// Number of findings per engagement.
313    pub findings_per_engagement: usize,
314    /// Number of professional judgments per engagement.
315    pub judgments_per_engagement: usize,
316    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
317    pub generate_banking: bool,
318    /// Generate graph exports (accounting network for ML training).
319    pub generate_graph_export: bool,
320    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
321    pub generate_sourcing: bool,
322    /// Generate bank reconciliations from payments.
323    pub generate_bank_reconciliation: bool,
324    /// Generate financial statements from trial balances.
325    pub generate_financial_statements: bool,
326    /// Generate accounting standards data (revenue recognition, impairment).
327    pub generate_accounting_standards: bool,
328    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
329    pub generate_manufacturing: bool,
330    /// Generate sales quotes, management KPIs, and budgets.
331    pub generate_sales_kpi_budgets: bool,
332    /// Generate tax jurisdictions and tax codes.
333    pub generate_tax: bool,
334    /// Generate ESG data (emissions, energy, water, waste, social, governance).
335    pub generate_esg: bool,
336    /// Generate intercompany transactions and eliminations.
337    pub generate_intercompany: bool,
338    /// Generate process evolution and organizational events.
339    pub generate_evolution_events: bool,
340    /// Generate counterfactual (original, mutated) JE pairs for ML training.
341    pub generate_counterfactuals: bool,
342    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
343    pub generate_compliance_regulations: bool,
344    /// Generate period-close journal entries (tax provision, income statement close).
345    pub generate_period_close: bool,
346    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
347    pub generate_hr: bool,
348    /// Generate treasury data (cash management, hedging, debt, pooling).
349    pub generate_treasury: bool,
350    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
351    pub generate_project_accounting: bool,
352    /// v3.3.0: generate legal documents per engagement (engagement letters,
353    /// management rep letters, legal opinions, regulatory filings,
354    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
355    pub generate_legal_documents: bool,
356    /// v3.3.0: generate IT general controls (access logs, change
357    /// management records) per audit engagement. Gated by
358    /// `audit.it_controls.enabled`.
359    pub generate_it_controls: bool,
360    /// v3.3.0: run the analytics-metadata phase after all JE-adding
361    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
362    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
363    /// top-level `analytics_metadata.enabled` config flag.
364    pub generate_analytics_metadata: bool,
365}
366
367impl Default for PhaseConfig {
368    fn default() -> Self {
369        Self {
370            generate_master_data: true,
371            generate_document_flows: true,
372            generate_ocpm_events: false, // Off by default
373            generate_journal_entries: true,
374            inject_anomalies: false,
375            inject_data_quality: false, // Off by default (to preserve clean test data)
376            validate_balances: true,
377            show_progress: true,
378            vendors_per_company: 50,
379            customers_per_company: 100,
380            materials_per_company: 200,
381            assets_per_company: 50,
382            employees_per_company: 100,
383            p2p_chains: 100,
384            o2c_chains: 100,
385            generate_audit: false, // Off by default
386            audit_engagements: 5,
387            workpapers_per_engagement: 20,
388            evidence_per_workpaper: 5,
389            risks_per_engagement: 15,
390            findings_per_engagement: 8,
391            judgments_per_engagement: 10,
392            generate_banking: false,                // Off by default
393            generate_graph_export: false,           // Off by default
394            generate_sourcing: false,               // Off by default
395            generate_bank_reconciliation: false,    // Off by default
396            generate_financial_statements: false,   // Off by default
397            generate_accounting_standards: false,   // Off by default
398            generate_manufacturing: false,          // Off by default
399            generate_sales_kpi_budgets: false,      // Off by default
400            generate_tax: false,                    // Off by default
401            generate_esg: false,                    // Off by default
402            generate_intercompany: false,           // Off by default
403            generate_evolution_events: true,        // On by default
404            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
405            generate_compliance_regulations: false, // Off by default
406            generate_period_close: true,            // On by default
407            generate_hr: false,                     // Off by default
408            generate_treasury: false,               // Off by default
409            generate_project_accounting: false,     // Off by default
410            generate_legal_documents: false,        // v3.3.0 — off by default
411            generate_it_controls: false,            // v3.3.0 — off by default
412            generate_analytics_metadata: false,     // v3.3.0 — off by default
413        }
414    }
415}
416
417impl PhaseConfig {
418    /// Derive phase flags from [`GeneratorConfig`].
419    ///
420    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
421    /// CLI flags can override individual fields after calling this method.
422    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
423        Self {
424            // Always-on phases
425            generate_master_data: true,
426            generate_document_flows: true,
427            generate_journal_entries: true,
428            validate_balances: true,
429            generate_period_close: true,
430            generate_evolution_events: true,
431            show_progress: true,
432
433            // Feature-gated phases — derived from config sections
434            generate_audit: cfg.audit.enabled,
435            generate_banking: cfg.banking.enabled,
436            generate_graph_export: cfg.graph_export.enabled,
437            generate_sourcing: cfg.source_to_pay.enabled,
438            generate_intercompany: cfg.intercompany.enabled,
439            generate_financial_statements: cfg.financial_reporting.enabled,
440            generate_bank_reconciliation: cfg.financial_reporting.enabled,
441            generate_accounting_standards: cfg.accounting_standards.enabled,
442            generate_manufacturing: cfg.manufacturing.enabled,
443            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
444            generate_tax: cfg.tax.enabled,
445            generate_esg: cfg.esg.enabled,
446            generate_ocpm_events: cfg.ocpm.enabled,
447            generate_compliance_regulations: cfg.compliance_regulations.enabled,
448            generate_hr: cfg.hr.enabled,
449            generate_treasury: cfg.treasury.enabled,
450            generate_project_accounting: cfg.project_accounting.enabled,
451
452            // v3.3.0: L1 generator wiring
453            // Legal documents emitted when compliance_regulations is enabled
454            // and the nested legal_documents.enabled flag is set.
455            generate_legal_documents: cfg.compliance_regulations.enabled
456                && cfg.compliance_regulations.legal_documents.enabled,
457            // IT general controls emitted when audit is enabled and the
458            // nested it_controls.enabled flag is set.
459            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
460            // Analytics metadata phase (prior-year, industry benchmarks,
461            // management reports, drift events).
462            generate_analytics_metadata: cfg.analytics_metadata.enabled,
463
464            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
465            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
466
467            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
468            inject_data_quality: cfg.data_quality.enabled,
469
470            // Count defaults (CLI can override after calling this method)
471            vendors_per_company: 50,
472            customers_per_company: 100,
473            materials_per_company: 200,
474            assets_per_company: 50,
475            employees_per_company: 100,
476            p2p_chains: 100,
477            o2c_chains: 100,
478            audit_engagements: 5,
479            workpapers_per_engagement: 20,
480            evidence_per_workpaper: 5,
481            risks_per_engagement: 15,
482            findings_per_engagement: 8,
483            judgments_per_engagement: 10,
484        }
485    }
486}
487
488/// Master data snapshot containing all generated entities.
489#[derive(Debug, Clone, Default)]
490pub struct MasterDataSnapshot {
491    /// Generated vendors.
492    pub vendors: Vec<Vendor>,
493    /// Generated customers.
494    pub customers: Vec<Customer>,
495    /// Generated materials.
496    pub materials: Vec<Material>,
497    /// Generated fixed assets.
498    pub assets: Vec<FixedAsset>,
499    /// Generated employees.
500    pub employees: Vec<Employee>,
501    /// Generated cost center hierarchy (two-level: departments + sub-departments).
502    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
503    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
504    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
505    /// v3.3.0+: organizational profiles (one per company) with
506    /// industry / geography / structure / complexity metadata. Emitted
507    /// alongside master data when `generate_master_data = true`.
508    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
509}
510
511/// Info about a completed hypergraph export.
512#[derive(Debug, Clone)]
513pub struct HypergraphExportInfo {
514    /// Number of nodes exported.
515    pub node_count: usize,
516    /// Number of pairwise edges exported.
517    pub edge_count: usize,
518    /// Number of hyperedges exported.
519    pub hyperedge_count: usize,
520    /// Output directory path.
521    pub output_path: PathBuf,
522}
523
524/// Document flow snapshot containing all generated document chains.
525#[derive(Debug, Clone, Default)]
526pub struct DocumentFlowSnapshot {
527    /// P2P document chains.
528    pub p2p_chains: Vec<P2PDocumentChain>,
529    /// O2C document chains.
530    pub o2c_chains: Vec<O2CDocumentChain>,
531    /// All purchase orders (flattened).
532    pub purchase_orders: Vec<documents::PurchaseOrder>,
533    /// All goods receipts (flattened).
534    pub goods_receipts: Vec<documents::GoodsReceipt>,
535    /// All vendor invoices (flattened).
536    pub vendor_invoices: Vec<documents::VendorInvoice>,
537    /// All sales orders (flattened).
538    pub sales_orders: Vec<documents::SalesOrder>,
539    /// All deliveries (flattened).
540    pub deliveries: Vec<documents::Delivery>,
541    /// All customer invoices (flattened).
542    pub customer_invoices: Vec<documents::CustomerInvoice>,
543    /// All payments (flattened).
544    pub payments: Vec<documents::Payment>,
545    /// Cross-document references collected from all document headers
546    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
547    pub document_references: Vec<documents::DocumentReference>,
548}
549
550/// Subledger snapshot containing generated subledger records.
551#[derive(Debug, Clone, Default)]
552pub struct SubledgerSnapshot {
553    /// AP invoices linked from document flow vendor invoices.
554    pub ap_invoices: Vec<APInvoice>,
555    /// AR invoices linked from document flow customer invoices.
556    pub ar_invoices: Vec<ARInvoice>,
557    /// FA subledger records (asset acquisitions from FA generator).
558    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
559    /// Inventory positions from inventory generator.
560    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
561    /// Inventory movements from inventory generator.
562    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
563    /// AR aging reports, one per company, computed after payment settlement.
564    pub ar_aging_reports: Vec<ARAgingReport>,
565    /// AP aging reports, one per company, computed after payment settlement.
566    pub ap_aging_reports: Vec<APAgingReport>,
567    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
568    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
569    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
570    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
571    /// Dunning runs executed after AR aging (one per company per dunning cycle).
572    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
573    /// Dunning letters generated across all dunning runs.
574    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
575}
576
577/// OCPM snapshot containing generated OCPM event log data.
578#[derive(Debug, Clone, Default)]
579pub struct OcpmSnapshot {
580    /// OCPM event log (if generated)
581    pub event_log: Option<OcpmEventLog>,
582    /// Number of events generated
583    pub event_count: usize,
584    /// Number of objects generated
585    pub object_count: usize,
586    /// Number of cases generated
587    pub case_count: usize,
588}
589
590/// Audit data snapshot containing all generated audit-related entities.
591#[derive(Debug, Clone, Default)]
592pub struct AuditSnapshot {
593    /// Audit engagements per ISA 210/220.
594    pub engagements: Vec<AuditEngagement>,
595    /// Workpapers per ISA 230.
596    pub workpapers: Vec<Workpaper>,
597    /// Audit evidence per ISA 500.
598    pub evidence: Vec<AuditEvidence>,
599    /// Risk assessments per ISA 315/330.
600    pub risk_assessments: Vec<RiskAssessment>,
601    /// Audit findings per ISA 265.
602    pub findings: Vec<AuditFinding>,
603    /// Professional judgments per ISA 200.
604    pub judgments: Vec<ProfessionalJudgment>,
605    /// External confirmations per ISA 505.
606    pub confirmations: Vec<ExternalConfirmation>,
607    /// Confirmation responses per ISA 505.
608    pub confirmation_responses: Vec<ConfirmationResponse>,
609    /// Audit procedure steps per ISA 330/530.
610    pub procedure_steps: Vec<AuditProcedureStep>,
611    /// Audit samples per ISA 530.
612    pub samples: Vec<AuditSample>,
613    /// Analytical procedure results per ISA 520.
614    pub analytical_results: Vec<AnalyticalProcedureResult>,
615    /// Internal audit functions per ISA 610.
616    pub ia_functions: Vec<InternalAuditFunction>,
617    /// Internal audit reports per ISA 610.
618    pub ia_reports: Vec<InternalAuditReport>,
619    /// Related parties per ISA 550.
620    pub related_parties: Vec<RelatedParty>,
621    /// Related party transactions per ISA 550.
622    pub related_party_transactions: Vec<RelatedPartyTransaction>,
623    // ---- ISA 600: Group Audits ----
624    /// Component auditors assigned by jurisdiction (ISA 600).
625    pub component_auditors: Vec<ComponentAuditor>,
626    /// Group audit plan with materiality allocations (ISA 600).
627    pub group_audit_plan: Option<GroupAuditPlan>,
628    /// Component instructions issued to component auditors (ISA 600).
629    pub component_instructions: Vec<ComponentInstruction>,
630    /// Reports received from component auditors (ISA 600).
631    pub component_reports: Vec<ComponentAuditorReport>,
632    // ---- ISA 210: Engagement Letters ----
633    /// Engagement letters per ISA 210.
634    pub engagement_letters: Vec<EngagementLetter>,
635    // ---- ISA 560 / IAS 10: Subsequent Events ----
636    /// Subsequent events per ISA 560 / IAS 10.
637    pub subsequent_events: Vec<SubsequentEvent>,
638    // ---- ISA 402: Service Organization Controls ----
639    /// Service organizations identified per ISA 402.
640    pub service_organizations: Vec<ServiceOrganization>,
641    /// SOC reports obtained per ISA 402.
642    pub soc_reports: Vec<SocReport>,
643    /// User entity controls documented per ISA 402.
644    pub user_entity_controls: Vec<UserEntityControl>,
645    // ---- ISA 570: Going Concern ----
646    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
647    pub going_concern_assessments:
648        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
649    // ---- ISA 540: Accounting Estimates ----
650    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
651    pub accounting_estimates:
652        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
653    // ---- ISA 700/701/705/706: Audit Opinions ----
654    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
655    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
656    /// Key Audit Matters per ISA 701 (flattened across all opinions).
657    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
658    // ---- SOX 302 / 404 ----
659    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
660    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
661    /// SOX Section 404 ICFR assessments (one per entity per year).
662    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
663    // ---- ISA 320: Materiality ----
664    /// Materiality calculations per entity per period (ISA 320).
665    pub materiality_calculations:
666        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
667    // ---- ISA 315: Combined Risk Assessments ----
668    /// Combined Risk Assessments per account area / assertion (ISA 315).
669    pub combined_risk_assessments:
670        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
671    // ---- ISA 530: Sampling Plans ----
672    /// Sampling plans per CRA at Moderate or higher (ISA 530).
673    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
674    /// Individual sampled items (key items + representative items) per ISA 530.
675    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
676    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
677    /// Significant classes of transactions per ISA 315 (one set per entity).
678    pub significant_transaction_classes:
679        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
680    // ---- ISA 520: Unusual Item Markers ----
681    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
682    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
683    // ---- ISA 520: Analytical Relationships ----
684    /// Analytical relationships (ratios, trends, correlations) per entity.
685    pub analytical_relationships:
686        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
687    // ---- PCAOB-ISA Cross-Reference ----
688    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
689    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
690    // ---- ISA Standard Reference ----
691    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
692    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
693    // ---- ISA 220 / ISA 300: Audit Scopes ----
694    /// Audit scope records (one per engagement) describing the audit boundary.
695    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
696    // ---- FSM Event Trail ----
697    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
698    /// Contains the ordered sequence of state-transition and procedure-step events
699    /// generated by the audit FSM engine.
700    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
701    // ---- v3.3.0: L1 generator wiring ----
702    /// Legal documents (engagement letters, management reps, legal
703    /// opinions, regulatory filings, board resolutions) per entity.
704    /// Emitted by `LegalDocumentGenerator` when
705    /// `compliance_regulations.legal_documents.enabled = true`.
706    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
707    /// IT general controls — access logs (login/privileged action
708    /// audit trail). Emitted by `ItControlsGenerator` when
709    /// `audit.it_controls.enabled = true`.
710    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
711    /// IT general controls — change management records (code deploys,
712    /// config changes, patches). Emitted by `ItControlsGenerator`.
713    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
714}
715
716/// Banking KYC/AML data snapshot containing all generated banking entities.
717#[derive(Debug, Clone, Default)]
718pub struct BankingSnapshot {
719    /// Banking customers (retail, business, trust).
720    pub customers: Vec<BankingCustomer>,
721    /// Bank accounts.
722    pub accounts: Vec<BankAccount>,
723    /// Bank transactions with AML labels.
724    pub transactions: Vec<BankTransaction>,
725    /// Transaction-level AML labels with features.
726    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
727    /// Customer-level AML labels.
728    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
729    /// Account-level AML labels.
730    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
731    /// Relationship-level AML labels.
732    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
733    /// Case narratives for AML scenarios.
734    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
735    /// Number of suspicious transactions.
736    pub suspicious_count: usize,
737    /// Number of AML scenarios generated.
738    pub scenario_count: usize,
739}
740
741/// Graph export snapshot containing exported graph metadata.
742#[derive(Debug, Clone, Default, Serialize)]
743pub struct GraphExportSnapshot {
744    /// Whether graph export was performed.
745    pub exported: bool,
746    /// Number of graphs exported.
747    pub graph_count: usize,
748    /// Exported graph metadata (by format name).
749    pub exports: HashMap<String, GraphExportInfo>,
750}
751
752/// Information about an exported graph.
753#[derive(Debug, Clone, Serialize)]
754pub struct GraphExportInfo {
755    /// Graph name.
756    pub name: String,
757    /// Export format (pytorch_geometric, neo4j, dgl).
758    pub format: String,
759    /// Output directory path.
760    pub output_path: PathBuf,
761    /// Number of nodes.
762    pub node_count: usize,
763    /// Number of edges.
764    pub edge_count: usize,
765}
766
767/// S2C sourcing data snapshot.
768#[derive(Debug, Clone, Default)]
769pub struct SourcingSnapshot {
770    /// Spend analyses.
771    pub spend_analyses: Vec<SpendAnalysis>,
772    /// Sourcing projects.
773    pub sourcing_projects: Vec<SourcingProject>,
774    /// Supplier qualifications.
775    pub qualifications: Vec<SupplierQualification>,
776    /// RFx events (RFI, RFP, RFQ).
777    pub rfx_events: Vec<RfxEvent>,
778    /// Supplier bids.
779    pub bids: Vec<SupplierBid>,
780    /// Bid evaluations.
781    pub bid_evaluations: Vec<BidEvaluation>,
782    /// Procurement contracts.
783    pub contracts: Vec<ProcurementContract>,
784    /// Catalog items.
785    pub catalog_items: Vec<CatalogItem>,
786    /// Supplier scorecards.
787    pub scorecards: Vec<SupplierScorecard>,
788}
789
790/// A single period's trial balance with metadata.
791#[derive(Debug, Clone, Serialize, Deserialize)]
792pub struct PeriodTrialBalance {
793    /// Fiscal year.
794    pub fiscal_year: u16,
795    /// Fiscal period (1-12).
796    pub fiscal_period: u8,
797    /// Period start date.
798    pub period_start: NaiveDate,
799    /// Period end date.
800    pub period_end: NaiveDate,
801    /// Trial balance entries for this period.
802    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
803}
804
805/// Financial reporting snapshot (financial statements + bank reconciliations).
806#[derive(Debug, Clone, Default)]
807pub struct FinancialReportingSnapshot {
808    /// Financial statements (balance sheet, income statement, cash flow).
809    /// For multi-entity configs this includes all standalone statements.
810    pub financial_statements: Vec<FinancialStatement>,
811    /// Standalone financial statements keyed by entity code.
812    /// Each entity has its own slice of statements.
813    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
814    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
815    pub consolidated_statements: Vec<FinancialStatement>,
816    /// Consolidation schedules (one per period) showing pre/post elimination detail.
817    pub consolidation_schedules: Vec<ConsolidationSchedule>,
818    /// Bank reconciliations.
819    pub bank_reconciliations: Vec<BankReconciliation>,
820    /// Period-close trial balances (one per period).
821    pub trial_balances: Vec<PeriodTrialBalance>,
822    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
823    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
824    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
825    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
826    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
827    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
828}
829
830/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
831#[derive(Debug, Clone, Default)]
832pub struct HrSnapshot {
833    /// Payroll runs (actual data).
834    pub payroll_runs: Vec<PayrollRun>,
835    /// Payroll line items (actual data).
836    pub payroll_line_items: Vec<PayrollLineItem>,
837    /// Time entries (actual data).
838    pub time_entries: Vec<TimeEntry>,
839    /// Expense reports (actual data).
840    pub expense_reports: Vec<ExpenseReport>,
841    /// Benefit enrollments (actual data).
842    pub benefit_enrollments: Vec<BenefitEnrollment>,
843    /// Defined benefit pension plans (IAS 19 / ASC 715).
844    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
845    /// Pension obligation (DBO) roll-forwards.
846    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
847    /// Plan asset roll-forwards.
848    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
849    /// Pension disclosures.
850    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
851    /// Journal entries generated from pension expense and OCI remeasurements.
852    pub pension_journal_entries: Vec<JournalEntry>,
853    /// Stock grants (ASC 718 / IFRS 2).
854    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
855    /// Stock-based compensation period expense records.
856    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
857    /// Journal entries generated from stock-based compensation expense.
858    pub stock_comp_journal_entries: Vec<JournalEntry>,
859    /// Payroll runs.
860    pub payroll_run_count: usize,
861    /// Payroll line item count.
862    pub payroll_line_item_count: usize,
863    /// Time entry count.
864    pub time_entry_count: usize,
865    /// Expense report count.
866    pub expense_report_count: usize,
867    /// Benefit enrollment count.
868    pub benefit_enrollment_count: usize,
869    /// Pension plan count.
870    pub pension_plan_count: usize,
871    /// Stock grant count.
872    pub stock_grant_count: usize,
873}
874
875/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
876#[derive(Debug, Clone, Default)]
877pub struct AccountingStandardsSnapshot {
878    /// Revenue recognition contracts (actual data).
879    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
880    /// Impairment tests (actual data).
881    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
882    /// Business combinations (IFRS 3 / ASC 805).
883    pub business_combinations:
884        Vec<datasynth_core::models::business_combination::BusinessCombination>,
885    /// Journal entries generated from business combinations (Day 1 + amortization).
886    pub business_combination_journal_entries: Vec<JournalEntry>,
887    /// ECL models (IFRS 9 / ASC 326).
888    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
889    /// ECL provision movements.
890    pub ecl_provision_movements:
891        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
892    /// Journal entries from ECL provision.
893    pub ecl_journal_entries: Vec<JournalEntry>,
894    /// Provisions (IAS 37 / ASC 450).
895    pub provisions: Vec<datasynth_core::models::provision::Provision>,
896    /// Provision movement roll-forwards (IAS 37 / ASC 450).
897    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
898    /// Contingent liabilities (IAS 37 / ASC 450).
899    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
900    /// Journal entries from provisions.
901    pub provision_journal_entries: Vec<JournalEntry>,
902    /// IAS 21 functional currency translation results (one per entity per period).
903    pub currency_translation_results:
904        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
905    /// Revenue recognition contract count.
906    pub revenue_contract_count: usize,
907    /// Impairment test count.
908    pub impairment_test_count: usize,
909    /// Business combination count.
910    pub business_combination_count: usize,
911    /// ECL model count.
912    pub ecl_model_count: usize,
913    /// Provision count.
914    pub provision_count: usize,
915    /// Currency translation result count (IAS 21).
916    pub currency_translation_count: usize,
917    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
918    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
919    /// ROU asset + lease liability details.
920    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
921    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
922    pub fair_value_measurements:
923        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
924    /// Framework difference records (dual-reporting only).
925    pub framework_differences:
926        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
927    /// Per-entity framework reconciliation (dual-reporting only).
928    pub framework_reconciliations:
929        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
930    /// Counts for stats logging.
931    pub lease_count: usize,
932    pub fair_value_measurement_count: usize,
933    pub framework_difference_count: usize,
934}
935
936/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
937#[derive(Debug, Clone, Default)]
938pub struct ComplianceRegulationsSnapshot {
939    /// Flattened standard records for output.
940    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
941    /// Cross-reference records.
942    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
943    /// Jurisdiction profile records.
944    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
945    /// Generated audit procedures.
946    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
947    /// Generated compliance findings.
948    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
949    /// Generated regulatory filings.
950    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
951    /// Compliance graph (if graph integration enabled).
952    pub compliance_graph: Option<datasynth_graph::Graph>,
953}
954
955/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
956#[derive(Debug, Clone, Default)]
957pub struct ManufacturingSnapshot {
958    /// Production orders (actual data).
959    pub production_orders: Vec<ProductionOrder>,
960    /// Quality inspections (actual data).
961    pub quality_inspections: Vec<QualityInspection>,
962    /// Cycle counts (actual data).
963    pub cycle_counts: Vec<CycleCount>,
964    /// BOM components (actual data).
965    pub bom_components: Vec<BomComponent>,
966    /// Inventory movements (actual data).
967    pub inventory_movements: Vec<InventoryMovement>,
968    /// Production order count.
969    pub production_order_count: usize,
970    /// Quality inspection count.
971    pub quality_inspection_count: usize,
972    /// Cycle count count.
973    pub cycle_count_count: usize,
974    /// BOM component count.
975    pub bom_component_count: usize,
976    /// Inventory movement count.
977    pub inventory_movement_count: usize,
978}
979
980/// Sales, KPI, and budget data snapshot.
981#[derive(Debug, Clone, Default)]
982pub struct SalesKpiBudgetsSnapshot {
983    /// Sales quotes (actual data).
984    pub sales_quotes: Vec<SalesQuote>,
985    /// Management KPIs (actual data).
986    pub kpis: Vec<ManagementKpi>,
987    /// Budgets (actual data).
988    pub budgets: Vec<Budget>,
989    /// Sales quote count.
990    pub sales_quote_count: usize,
991    /// Management KPI count.
992    pub kpi_count: usize,
993    /// Budget line count.
994    pub budget_line_count: usize,
995}
996
997/// Anomaly labels generated during injection.
998#[derive(Debug, Clone, Default)]
999pub struct AnomalyLabels {
1000    /// All anomaly labels.
1001    pub labels: Vec<LabeledAnomaly>,
1002    /// Summary statistics.
1003    pub summary: Option<AnomalySummary>,
1004    /// Count by anomaly type.
1005    pub by_type: HashMap<String, usize>,
1006}
1007
1008/// Balance validation results from running balance tracker.
1009#[derive(Debug, Clone, Default)]
1010pub struct BalanceValidationResult {
1011    /// Whether validation was performed.
1012    pub validated: bool,
1013    /// Whether balance sheet equation is satisfied.
1014    pub is_balanced: bool,
1015    /// Number of entries processed.
1016    pub entries_processed: u64,
1017    /// Total debits across all entries.
1018    pub total_debits: rust_decimal::Decimal,
1019    /// Total credits across all entries.
1020    pub total_credits: rust_decimal::Decimal,
1021    /// Number of accounts tracked.
1022    pub accounts_tracked: usize,
1023    /// Number of companies tracked.
1024    pub companies_tracked: usize,
1025    /// Validation errors encountered.
1026    pub validation_errors: Vec<ValidationError>,
1027    /// Whether any unbalanced entries were found.
1028    pub has_unbalanced_entries: bool,
1029}
1030
1031/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1032#[derive(Debug, Clone, Default)]
1033pub struct TaxSnapshot {
1034    /// Tax jurisdictions.
1035    pub jurisdictions: Vec<TaxJurisdiction>,
1036    /// Tax codes.
1037    pub codes: Vec<TaxCode>,
1038    /// Tax lines computed on documents.
1039    pub tax_lines: Vec<TaxLine>,
1040    /// Tax returns filed per period.
1041    pub tax_returns: Vec<TaxReturn>,
1042    /// Tax provisions.
1043    pub tax_provisions: Vec<TaxProvision>,
1044    /// Withholding tax records.
1045    pub withholding_records: Vec<WithholdingTaxRecord>,
1046    /// Tax anomaly labels.
1047    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1048    /// Jurisdiction count.
1049    pub jurisdiction_count: usize,
1050    /// Code count.
1051    pub code_count: usize,
1052    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1053    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1054    /// Journal entries posting tax payable/receivable from computed tax lines.
1055    pub tax_posting_journal_entries: Vec<JournalEntry>,
1056}
1057
1058/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1059#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1060pub struct IntercompanySnapshot {
1061    /// Group ownership structure (parent/subsidiary/associate relationships).
1062    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1063    /// IC matched pairs (transaction pairs between related entities).
1064    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1065    /// IC journal entries generated from matched pairs (seller side).
1066    pub seller_journal_entries: Vec<JournalEntry>,
1067    /// IC journal entries generated from matched pairs (buyer side).
1068    pub buyer_journal_entries: Vec<JournalEntry>,
1069    /// Elimination entries for consolidation.
1070    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1071    /// NCI measurements derived from group structure ownership percentages.
1072    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1073    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1074    #[serde(skip)]
1075    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1076    /// IC matched pair count.
1077    pub matched_pair_count: usize,
1078    /// IC elimination entry count.
1079    pub elimination_entry_count: usize,
1080    /// IC matching rate (0.0 to 1.0).
1081    pub match_rate: f64,
1082}
1083
1084/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1085#[derive(Debug, Clone, Default)]
1086pub struct EsgSnapshot {
1087    /// Emission records (scope 1, 2, 3).
1088    pub emissions: Vec<EmissionRecord>,
1089    /// Energy consumption records.
1090    pub energy: Vec<EnergyConsumption>,
1091    /// Water usage records.
1092    pub water: Vec<WaterUsage>,
1093    /// Waste records.
1094    pub waste: Vec<WasteRecord>,
1095    /// Workforce diversity metrics.
1096    pub diversity: Vec<WorkforceDiversityMetric>,
1097    /// Pay equity metrics.
1098    pub pay_equity: Vec<PayEquityMetric>,
1099    /// Safety incidents.
1100    pub safety_incidents: Vec<SafetyIncident>,
1101    /// Safety metrics.
1102    pub safety_metrics: Vec<SafetyMetric>,
1103    /// Governance metrics.
1104    pub governance: Vec<GovernanceMetric>,
1105    /// Supplier ESG assessments.
1106    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1107    /// Materiality assessments.
1108    pub materiality: Vec<MaterialityAssessment>,
1109    /// ESG disclosures.
1110    pub disclosures: Vec<EsgDisclosure>,
1111    /// Climate scenarios.
1112    pub climate_scenarios: Vec<ClimateScenario>,
1113    /// ESG anomaly labels.
1114    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1115    /// Total emission record count.
1116    pub emission_count: usize,
1117    /// Total disclosure count.
1118    pub disclosure_count: usize,
1119}
1120
1121/// Treasury data snapshot (cash management, hedging, debt, pooling).
1122#[derive(Debug, Clone, Default)]
1123pub struct TreasurySnapshot {
1124    /// Cash positions (daily balances per account).
1125    pub cash_positions: Vec<CashPosition>,
1126    /// Cash forecasts.
1127    pub cash_forecasts: Vec<CashForecast>,
1128    /// Cash pools.
1129    pub cash_pools: Vec<CashPool>,
1130    /// Cash pool sweep transactions.
1131    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1132    /// Hedging instruments.
1133    pub hedging_instruments: Vec<HedgingInstrument>,
1134    /// Hedge relationships (ASC 815/IFRS 9 designations).
1135    pub hedge_relationships: Vec<HedgeRelationship>,
1136    /// Debt instruments.
1137    pub debt_instruments: Vec<DebtInstrument>,
1138    /// Bank guarantees and letters of credit.
1139    pub bank_guarantees: Vec<BankGuarantee>,
1140    /// Intercompany netting runs.
1141    pub netting_runs: Vec<NettingRun>,
1142    /// Treasury anomaly labels.
1143    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1144    /// Journal entries generated from treasury instruments (debt interest accruals,
1145    /// hedge MTM, cash pool sweeps).
1146    pub journal_entries: Vec<JournalEntry>,
1147}
1148
1149/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1150#[derive(Debug, Clone, Default)]
1151pub struct ProjectAccountingSnapshot {
1152    /// Projects with WBS hierarchies.
1153    pub projects: Vec<Project>,
1154    /// Project cost lines (linked from source documents).
1155    pub cost_lines: Vec<ProjectCostLine>,
1156    /// Revenue recognition records.
1157    pub revenue_records: Vec<ProjectRevenue>,
1158    /// Earned value metrics.
1159    pub earned_value_metrics: Vec<EarnedValueMetric>,
1160    /// Change orders.
1161    pub change_orders: Vec<ChangeOrder>,
1162    /// Project milestones.
1163    pub milestones: Vec<ProjectMilestone>,
1164}
1165
1166/// Complete result of enhanced generation run.
1167#[derive(Debug, Default)]
1168pub struct EnhancedGenerationResult {
1169    /// Generated chart of accounts.
1170    pub chart_of_accounts: ChartOfAccounts,
1171    /// Master data snapshot.
1172    pub master_data: MasterDataSnapshot,
1173    /// Document flow snapshot.
1174    pub document_flows: DocumentFlowSnapshot,
1175    /// Subledger snapshot (linked from document flows).
1176    pub subledger: SubledgerSnapshot,
1177    /// OCPM event log snapshot (if OCPM generation enabled).
1178    pub ocpm: OcpmSnapshot,
1179    /// Audit data snapshot (if audit generation enabled).
1180    pub audit: AuditSnapshot,
1181    /// Banking KYC/AML data snapshot (if banking generation enabled).
1182    pub banking: BankingSnapshot,
1183    /// Graph export snapshot (if graph export enabled).
1184    pub graph_export: GraphExportSnapshot,
1185    /// S2C sourcing data snapshot (if sourcing generation enabled).
1186    pub sourcing: SourcingSnapshot,
1187    /// Financial reporting snapshot (financial statements + bank reconciliations).
1188    pub financial_reporting: FinancialReportingSnapshot,
1189    /// HR data snapshot (payroll, time entries, expenses).
1190    pub hr: HrSnapshot,
1191    /// Accounting standards snapshot (revenue recognition, impairment).
1192    pub accounting_standards: AccountingStandardsSnapshot,
1193    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1194    pub manufacturing: ManufacturingSnapshot,
1195    /// Sales, KPI, and budget snapshot.
1196    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1197    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1198    pub tax: TaxSnapshot,
1199    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1200    pub esg: EsgSnapshot,
1201    /// Treasury data snapshot (cash management, hedging, debt).
1202    pub treasury: TreasurySnapshot,
1203    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1204    pub project_accounting: ProjectAccountingSnapshot,
1205    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1206    pub process_evolution: Vec<ProcessEvolutionEvent>,
1207    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1208    pub organizational_events: Vec<OrganizationalEvent>,
1209    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1210    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1211    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1212    pub intercompany: IntercompanySnapshot,
1213    /// Generated journal entries.
1214    pub journal_entries: Vec<JournalEntry>,
1215    /// Anomaly labels (if injection enabled).
1216    pub anomaly_labels: AnomalyLabels,
1217    /// Balance validation results (if validation enabled).
1218    pub balance_validation: BalanceValidationResult,
1219    /// Data quality statistics (if injection enabled).
1220    pub data_quality_stats: DataQualityStats,
1221    /// Data quality issue records (if injection enabled).
1222    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1223    /// Generation statistics.
1224    pub statistics: EnhancedGenerationStatistics,
1225    /// Data lineage graph (if tracking enabled).
1226    pub lineage: Option<super::lineage::LineageGraph>,
1227    /// Quality gate evaluation result.
1228    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1229    /// Internal controls (if controls generation enabled).
1230    pub internal_controls: Vec<InternalControl>,
1231    /// SoD (Segregation of Duties) violations identified during control application.
1232    ///
1233    /// Each record corresponds to a journal entry where `sod_violation == true`.
1234    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1235    /// Opening balances (if opening balance generation enabled).
1236    pub opening_balances: Vec<GeneratedOpeningBalance>,
1237    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1238    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1239    /// Counterfactual (original, mutated) JE pairs for ML training.
1240    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1241    /// Fraud red-flag indicators on P2P/O2C documents.
1242    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1243    /// Collusion rings (coordinated fraud networks).
1244    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1245    /// Bi-temporal version chains for vendor entities.
1246    pub temporal_vendor_chains:
1247        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1248    /// Entity relationship graph (nodes + edges with strength scores).
1249    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1250    /// Cross-process links (P2P ↔ O2C via inventory movements).
1251    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1252    /// Industry-specific GL accounts and metadata.
1253    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1254    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1255    pub compliance_regulations: ComplianceRegulationsSnapshot,
1256    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1257    /// industry benchmarks, management reports, drift events). Empty
1258    /// when `analytics_metadata.enabled = false`.
1259    pub analytics_metadata: AnalyticsMetadataSnapshot,
1260    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1261    /// KS) over the generated amount distribution.  `None` when
1262    /// `distributions.validation.enabled = false`.
1263    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1264    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1265    /// customer value-segment labels, and industry-specific metadata
1266    /// populated from the previously-inert `vendor_network`,
1267    /// `customer_segmentation`, and `industry_specific` schema
1268    /// sections. Empty when those sections are disabled.
1269    pub interconnectivity: InterconnectivitySnapshot,
1270}
1271
1272/// v4.1.3+: interconnectivity snapshot. Populated when
1273/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1274/// `industry_specific.enabled` are set. Holds tier / segment / industry
1275/// labels for generated entities so downstream tooling (graph export,
1276/// risk models) can consume them without re-deriving from scratch.
1277#[derive(Debug, Clone, Default)]
1278pub struct InterconnectivitySnapshot {
1279    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1280    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1281    pub vendor_tiers: Vec<(String, u8)>,
1282    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1283    /// `"reliable_strategic" / "standard_operational" / "transactional"
1284    /// / "problematic"`.
1285    pub vendor_clusters: Vec<(String, String)>,
1286    /// `(customer_id, value_segment)` pairs where value_segment is one
1287    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1288    pub customer_value_segments: Vec<(String, String)>,
1289    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1290    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1291    /// "churned" / "won_back"`.
1292    pub customer_lifecycle_stages: Vec<(String, String)>,
1293    /// Summary: industry-specific knob applied, if any (e.g.
1294    /// `"manufacturing.bom_depth=3"`).
1295    pub industry_metadata: Vec<String>,
1296}
1297
1298/// v3.3.0: snapshot for the analytics-metadata phase.
1299#[derive(Debug, Clone, Default)]
1300pub struct AnalyticsMetadataSnapshot {
1301    /// Prior-year comparative balances per account, per entity.
1302    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1303    /// Industry benchmarks for the configured industry.
1304    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1305    /// Management-report artefacts (dashboards, MDA sections).
1306    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1307    /// Drift-event labels emitted from the post-generation sweep.
1308    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1309}
1310
1311/// Enhanced statistics about a generation run.
1312#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1313pub struct EnhancedGenerationStatistics {
1314    /// Total journal entries generated.
1315    pub total_entries: u64,
1316    /// Total line items generated.
1317    pub total_line_items: u64,
1318    /// Number of accounts in CoA.
1319    pub accounts_count: usize,
1320    /// Number of companies.
1321    pub companies_count: usize,
1322    /// Period in months.
1323    pub period_months: u32,
1324    /// Master data counts.
1325    pub vendor_count: usize,
1326    pub customer_count: usize,
1327    pub material_count: usize,
1328    pub asset_count: usize,
1329    pub employee_count: usize,
1330    /// Document flow counts.
1331    pub p2p_chain_count: usize,
1332    pub o2c_chain_count: usize,
1333    /// Subledger counts.
1334    pub ap_invoice_count: usize,
1335    pub ar_invoice_count: usize,
1336    /// OCPM counts.
1337    pub ocpm_event_count: usize,
1338    pub ocpm_object_count: usize,
1339    pub ocpm_case_count: usize,
1340    /// Audit counts.
1341    pub audit_engagement_count: usize,
1342    pub audit_workpaper_count: usize,
1343    pub audit_evidence_count: usize,
1344    pub audit_risk_count: usize,
1345    pub audit_finding_count: usize,
1346    pub audit_judgment_count: usize,
1347    /// ISA 505 confirmation counts.
1348    #[serde(default)]
1349    pub audit_confirmation_count: usize,
1350    #[serde(default)]
1351    pub audit_confirmation_response_count: usize,
1352    /// ISA 330/530 procedure step and sample counts.
1353    #[serde(default)]
1354    pub audit_procedure_step_count: usize,
1355    #[serde(default)]
1356    pub audit_sample_count: usize,
1357    /// ISA 520 analytical procedure counts.
1358    #[serde(default)]
1359    pub audit_analytical_result_count: usize,
1360    /// ISA 610 internal audit counts.
1361    #[serde(default)]
1362    pub audit_ia_function_count: usize,
1363    #[serde(default)]
1364    pub audit_ia_report_count: usize,
1365    /// ISA 550 related party counts.
1366    #[serde(default)]
1367    pub audit_related_party_count: usize,
1368    #[serde(default)]
1369    pub audit_related_party_transaction_count: usize,
1370    /// Anomaly counts.
1371    pub anomalies_injected: usize,
1372    /// Data quality issue counts.
1373    pub data_quality_issues: usize,
1374    /// Banking counts.
1375    pub banking_customer_count: usize,
1376    pub banking_account_count: usize,
1377    pub banking_transaction_count: usize,
1378    pub banking_suspicious_count: usize,
1379    /// Graph export counts.
1380    pub graph_export_count: usize,
1381    pub graph_node_count: usize,
1382    pub graph_edge_count: usize,
1383    /// LLM enrichment timing (milliseconds).
1384    #[serde(default)]
1385    pub llm_enrichment_ms: u64,
1386    /// Number of vendor names enriched by LLM.
1387    #[serde(default)]
1388    pub llm_vendors_enriched: usize,
1389    /// v4.1.1+: number of customer names enriched by LLM.
1390    #[serde(default)]
1391    pub llm_customers_enriched: usize,
1392    /// v4.1.1+: number of material descriptions enriched by LLM.
1393    #[serde(default)]
1394    pub llm_materials_enriched: usize,
1395    /// v4.1.1+: number of audit finding titles enriched by LLM.
1396    #[serde(default)]
1397    pub llm_findings_enriched: usize,
1398    /// Diffusion enhancement timing (milliseconds).
1399    #[serde(default)]
1400    pub diffusion_enhancement_ms: u64,
1401    /// Number of diffusion samples generated.
1402    #[serde(default)]
1403    pub diffusion_samples_generated: usize,
1404    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1405    /// `None` when the neural/hybrid backend is not active.
1406    #[serde(default, skip_serializing_if = "Option::is_none")]
1407    pub neural_hybrid_weight: Option<f64>,
1408    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1409    #[serde(default, skip_serializing_if = "Option::is_none")]
1410    pub neural_hybrid_strategy: Option<String>,
1411    /// How many columns were routed through the neural backend.
1412    #[serde(default, skip_serializing_if = "Option::is_none")]
1413    pub neural_routed_column_count: Option<usize>,
1414    /// Causal generation timing (milliseconds).
1415    #[serde(default)]
1416    pub causal_generation_ms: u64,
1417    /// Number of causal samples generated.
1418    #[serde(default)]
1419    pub causal_samples_generated: usize,
1420    /// Whether causal validation passed.
1421    #[serde(default)]
1422    pub causal_validation_passed: Option<bool>,
1423    /// S2C sourcing counts.
1424    #[serde(default)]
1425    pub sourcing_project_count: usize,
1426    #[serde(default)]
1427    pub rfx_event_count: usize,
1428    #[serde(default)]
1429    pub bid_count: usize,
1430    #[serde(default)]
1431    pub contract_count: usize,
1432    #[serde(default)]
1433    pub catalog_item_count: usize,
1434    #[serde(default)]
1435    pub scorecard_count: usize,
1436    /// Financial reporting counts.
1437    #[serde(default)]
1438    pub financial_statement_count: usize,
1439    #[serde(default)]
1440    pub bank_reconciliation_count: usize,
1441    /// HR counts.
1442    #[serde(default)]
1443    pub payroll_run_count: usize,
1444    #[serde(default)]
1445    pub time_entry_count: usize,
1446    #[serde(default)]
1447    pub expense_report_count: usize,
1448    #[serde(default)]
1449    pub benefit_enrollment_count: usize,
1450    #[serde(default)]
1451    pub pension_plan_count: usize,
1452    #[serde(default)]
1453    pub stock_grant_count: usize,
1454    /// Accounting standards counts.
1455    #[serde(default)]
1456    pub revenue_contract_count: usize,
1457    #[serde(default)]
1458    pub impairment_test_count: usize,
1459    #[serde(default)]
1460    pub business_combination_count: usize,
1461    #[serde(default)]
1462    pub ecl_model_count: usize,
1463    #[serde(default)]
1464    pub provision_count: usize,
1465    /// Manufacturing counts.
1466    #[serde(default)]
1467    pub production_order_count: usize,
1468    #[serde(default)]
1469    pub quality_inspection_count: usize,
1470    #[serde(default)]
1471    pub cycle_count_count: usize,
1472    #[serde(default)]
1473    pub bom_component_count: usize,
1474    #[serde(default)]
1475    pub inventory_movement_count: usize,
1476    /// Sales & reporting counts.
1477    #[serde(default)]
1478    pub sales_quote_count: usize,
1479    #[serde(default)]
1480    pub kpi_count: usize,
1481    #[serde(default)]
1482    pub budget_line_count: usize,
1483    /// Tax counts.
1484    #[serde(default)]
1485    pub tax_jurisdiction_count: usize,
1486    #[serde(default)]
1487    pub tax_code_count: usize,
1488    /// ESG counts.
1489    #[serde(default)]
1490    pub esg_emission_count: usize,
1491    #[serde(default)]
1492    pub esg_disclosure_count: usize,
1493    /// Intercompany counts.
1494    #[serde(default)]
1495    pub ic_matched_pair_count: usize,
1496    #[serde(default)]
1497    pub ic_elimination_count: usize,
1498    /// Number of intercompany journal entries (seller + buyer side).
1499    #[serde(default)]
1500    pub ic_transaction_count: usize,
1501    /// Number of fixed asset subledger records.
1502    #[serde(default)]
1503    pub fa_subledger_count: usize,
1504    /// Number of inventory subledger records.
1505    #[serde(default)]
1506    pub inventory_subledger_count: usize,
1507    /// Treasury debt instrument count.
1508    #[serde(default)]
1509    pub treasury_debt_instrument_count: usize,
1510    /// Treasury hedging instrument count.
1511    #[serde(default)]
1512    pub treasury_hedging_instrument_count: usize,
1513    /// Project accounting project count.
1514    #[serde(default)]
1515    pub project_count: usize,
1516    /// Project accounting change order count.
1517    #[serde(default)]
1518    pub project_change_order_count: usize,
1519    /// Tax provision count.
1520    #[serde(default)]
1521    pub tax_provision_count: usize,
1522    /// Opening balance count.
1523    #[serde(default)]
1524    pub opening_balance_count: usize,
1525    /// Subledger reconciliation count.
1526    #[serde(default)]
1527    pub subledger_reconciliation_count: usize,
1528    /// Tax line count.
1529    #[serde(default)]
1530    pub tax_line_count: usize,
1531    /// Project cost line count.
1532    #[serde(default)]
1533    pub project_cost_line_count: usize,
1534    /// Cash position count.
1535    #[serde(default)]
1536    pub cash_position_count: usize,
1537    /// Cash forecast count.
1538    #[serde(default)]
1539    pub cash_forecast_count: usize,
1540    /// Cash pool count.
1541    #[serde(default)]
1542    pub cash_pool_count: usize,
1543    /// Process evolution event count.
1544    #[serde(default)]
1545    pub process_evolution_event_count: usize,
1546    /// Organizational event count.
1547    #[serde(default)]
1548    pub organizational_event_count: usize,
1549    /// Counterfactual pair count.
1550    #[serde(default)]
1551    pub counterfactual_pair_count: usize,
1552    /// Number of fraud red-flag indicators generated.
1553    #[serde(default)]
1554    pub red_flag_count: usize,
1555    /// Number of collusion rings generated.
1556    #[serde(default)]
1557    pub collusion_ring_count: usize,
1558    /// Number of bi-temporal vendor version chains generated.
1559    #[serde(default)]
1560    pub temporal_version_chain_count: usize,
1561    /// Number of nodes in the entity relationship graph.
1562    #[serde(default)]
1563    pub entity_relationship_node_count: usize,
1564    /// Number of edges in the entity relationship graph.
1565    #[serde(default)]
1566    pub entity_relationship_edge_count: usize,
1567    /// Number of cross-process links generated.
1568    #[serde(default)]
1569    pub cross_process_link_count: usize,
1570    /// Number of disruption events generated.
1571    #[serde(default)]
1572    pub disruption_event_count: usize,
1573    /// Number of industry-specific GL accounts generated.
1574    #[serde(default)]
1575    pub industry_gl_account_count: usize,
1576    /// Number of period-close journal entries generated (tax provision + closing entries).
1577    #[serde(default)]
1578    pub period_close_je_count: usize,
1579}
1580
1581/// Enhanced orchestrator with full feature integration.
1582pub struct EnhancedOrchestrator {
1583    config: GeneratorConfig,
1584    phase_config: PhaseConfig,
1585    coa: Option<Arc<ChartOfAccounts>>,
1586    master_data: MasterDataSnapshot,
1587    seed: u64,
1588    multi_progress: Option<MultiProgress>,
1589    /// Resource guard for memory, disk, and CPU monitoring
1590    resource_guard: ResourceGuard,
1591    /// Output path for disk space monitoring
1592    output_path: Option<PathBuf>,
1593    /// Copula generators for preserving correlations (from fingerprint)
1594    copula_generators: Vec<CopulaGeneratorSpec>,
1595    /// Country pack registry for localized data generation
1596    country_pack_registry: datasynth_core::CountryPackRegistry,
1597    /// Optional streaming sink for phase-by-phase output
1598    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1599    /// Shared template provider for user-supplied template packs.
1600    ///
1601    /// Constructed from `config.templates.path` at orchestrator creation
1602    /// time. When the path is `None`, this is still populated with an
1603    /// embedded-only provider so generators can always call trait methods
1604    /// without an `Option<…>` guard. v3.2.0+.
1605    template_provider: datasynth_core::templates::SharedTemplateProvider,
1606    /// v3.4.1+ temporal context for business-day / holiday awareness.
1607    ///
1608    /// Populated only when `temporal_patterns.business_days.enabled`. When
1609    /// `None`, document-flow / HR / treasury / period-close generators keep
1610    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1611    /// for the same seed).
1612    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1613}
1614
1615impl EnhancedOrchestrator {
1616    /// Create a new enhanced orchestrator.
1617    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1618        datasynth_config::validate_config(&config)?;
1619
1620        let seed = config.global.seed.unwrap_or_else(rand::random);
1621
1622        // Build resource guard from config
1623        let resource_guard = Self::build_resource_guard(&config, None);
1624
1625        // Build country pack registry from config
1626        let country_pack_registry = match &config.country_packs {
1627            Some(cp) => {
1628                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1629                    .map_err(|e| SynthError::config(e.to_string()))?
1630            }
1631            None => datasynth_core::CountryPackRegistry::builtin_only()
1632                .map_err(|e| SynthError::config(e.to_string()))?,
1633        };
1634
1635        // Build the shared template provider from config.templates.path.
1636        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1637        // `Some(path)` → load file/dir and honour `merge_strategy`.
1638        let template_provider = Self::build_template_provider(&config)?;
1639
1640        // v3.4.1: build a shared temporal context when
1641        // `temporal_patterns.business_days.enabled`. `None` preserves the
1642        // raw-RNG date-offset behaviour per-generator.
1643        let temporal_context = Self::build_temporal_context(&config)?;
1644
1645        Ok(Self {
1646            config,
1647            phase_config,
1648            coa: None,
1649            master_data: MasterDataSnapshot::default(),
1650            seed,
1651            multi_progress: None,
1652            resource_guard,
1653            output_path: None,
1654            copula_generators: Vec::new(),
1655            country_pack_registry,
1656            phase_sink: None,
1657            template_provider,
1658            temporal_context,
1659        })
1660    }
1661
1662    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1663    ///
1664    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1665    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1666    /// enabled. Returns `Err` only for unrecoverable config errors.
1667    fn build_temporal_context(
1668        config: &GeneratorConfig,
1669    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1670        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1671
1672        let tp = &config.temporal_patterns;
1673        if !tp.enabled || !tp.business_days.enabled {
1674            return Ok(None);
1675        }
1676
1677        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1678            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1679        let end_date = start_date + chrono::Months::new(config.global.period_months);
1680
1681        let region_code = tp
1682            .calendars
1683            .regions
1684            .first()
1685            .cloned()
1686            .unwrap_or_else(|| "US".to_string());
1687        let region = parse_region_code(&region_code);
1688
1689        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1690    }
1691
1692    /// Build the shared template provider from `config.templates`.
1693    ///
1694    /// Always returns a provider — falls back to embedded-only when
1695    /// `config.templates.path` is `None`. The merge-strategy from config
1696    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1697    /// orchestrator-construction time are fatal (preferable to silently
1698    /// using embedded pools when the user supplied a bad path).
1699    fn build_template_provider(
1700        config: &GeneratorConfig,
1701    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1702        use datasynth_core::templates::{
1703            loader::{MergeStrategy, TemplateLoader},
1704            DefaultTemplateProvider,
1705        };
1706        use std::sync::Arc;
1707
1708        let provider = match &config.templates.path {
1709            None => DefaultTemplateProvider::new(),
1710            Some(path) => {
1711                let data = if path.is_dir() {
1712                    TemplateLoader::load_from_directory(path)
1713                } else {
1714                    TemplateLoader::load_from_file(path)
1715                }
1716                .map_err(|e| {
1717                    SynthError::config(format!(
1718                        "Failed to load templates from {}: {e}",
1719                        path.display()
1720                    ))
1721                })?;
1722                let strategy = match config.templates.merge_strategy {
1723                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1724                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1725                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1726                        MergeStrategy::MergePreferFile
1727                    }
1728                };
1729                DefaultTemplateProvider::with_templates(data, strategy)
1730            }
1731        };
1732        Ok(Arc::new(provider))
1733    }
1734
1735    /// Create with default phase config.
1736    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1737        Self::new(config, PhaseConfig::default())
1738    }
1739
1740    /// Set a streaming phase sink for real-time output (builder pattern).
1741    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1742        self.phase_sink = Some(sink);
1743        self
1744    }
1745
1746    /// Set a streaming phase sink on an existing orchestrator.
1747    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1748        self.phase_sink = Some(sink);
1749    }
1750
1751    /// Emit a batch of items to the phase sink (if configured).
1752    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1753        if let Some(ref sink) = self.phase_sink {
1754            for item in items {
1755                if let Ok(value) = serde_json::to_value(item) {
1756                    if let Err(e) = sink.emit(phase, type_name, &value) {
1757                        warn!(
1758                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1759                        );
1760                    }
1761                }
1762            }
1763            if let Err(e) = sink.phase_complete(phase) {
1764                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1765            }
1766        }
1767    }
1768
1769    /// Enable/disable progress bars.
1770    pub fn with_progress(mut self, show: bool) -> Self {
1771        self.phase_config.show_progress = show;
1772        if show {
1773            self.multi_progress = Some(MultiProgress::new());
1774        }
1775        self
1776    }
1777
1778    /// Set the output path for disk space monitoring.
1779    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1780        let path = path.into();
1781        self.output_path = Some(path.clone());
1782        // Rebuild resource guard with the output path
1783        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1784        self
1785    }
1786
1787    /// Access the country pack registry.
1788    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1789        &self.country_pack_registry
1790    }
1791
1792    /// Look up a country pack by country code string.
1793    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1794        self.country_pack_registry.get_by_str(country)
1795    }
1796
1797    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1798    /// company, defaulting to `"US"` if no companies are configured.
1799    fn primary_country_code(&self) -> &str {
1800        self.config
1801            .companies
1802            .first()
1803            .map(|c| c.country.as_str())
1804            .unwrap_or("US")
1805    }
1806
1807    /// Resolve the country pack for the primary (first) company.
1808    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1809        self.country_pack_for(self.primary_country_code())
1810    }
1811
1812    /// Resolve the CoA framework from config/country-pack.
1813    fn resolve_coa_framework(&self) -> CoAFramework {
1814        if self.config.accounting_standards.enabled {
1815            match self.config.accounting_standards.framework {
1816                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1817                    return CoAFramework::FrenchPcg;
1818                }
1819                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1820                    return CoAFramework::GermanSkr04;
1821                }
1822                _ => {}
1823            }
1824        }
1825        // Fallback: derive from country pack
1826        let pack = self.primary_pack();
1827        match pack.accounting.framework.as_str() {
1828            "french_gaap" => CoAFramework::FrenchPcg,
1829            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1830            _ => CoAFramework::UsGaap,
1831        }
1832    }
1833
1834    /// Check if copula generators are available.
1835    ///
1836    /// Returns true if the orchestrator has copula generators for preserving
1837    /// correlations (typically from fingerprint-based generation).
1838    pub fn has_copulas(&self) -> bool {
1839        !self.copula_generators.is_empty()
1840    }
1841
1842    /// Get the copula generators.
1843    ///
1844    /// Returns a reference to the copula generators for use during generation.
1845    /// These can be used to generate correlated samples that preserve the
1846    /// statistical relationships from the source data.
1847    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1848        &self.copula_generators
1849    }
1850
1851    /// Get a mutable reference to the copula generators.
1852    ///
1853    /// Allows generators to sample from copulas during data generation.
1854    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1855        &mut self.copula_generators
1856    }
1857
1858    /// Sample correlated values from a named copula.
1859    ///
1860    /// Returns None if the copula doesn't exist.
1861    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1862        self.copula_generators
1863            .iter_mut()
1864            .find(|c| c.name == copula_name)
1865            .map(|c| c.generator.sample())
1866    }
1867
1868    /// Create an orchestrator from a fingerprint file.
1869    ///
1870    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1871    /// and creates an orchestrator configured to generate data matching
1872    /// the statistical properties of the original data.
1873    ///
1874    /// # Arguments
1875    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1876    /// * `phase_config` - Phase configuration for generation
1877    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1878    ///
1879    /// # Example
1880    /// ```no_run
1881    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1882    /// use std::path::Path;
1883    ///
1884    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1885    ///     Path::new("fingerprint.dsf"),
1886    ///     PhaseConfig::default(),
1887    ///     1.0,
1888    /// ).unwrap();
1889    /// ```
1890    pub fn from_fingerprint(
1891        fingerprint_path: &std::path::Path,
1892        phase_config: PhaseConfig,
1893        scale: f64,
1894    ) -> SynthResult<Self> {
1895        info!("Loading fingerprint from: {}", fingerprint_path.display());
1896
1897        // Read the fingerprint
1898        let reader = FingerprintReader::new();
1899        let fingerprint = reader
1900            .read_from_file(fingerprint_path)
1901            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1902
1903        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1904    }
1905
1906    /// Create an orchestrator from a loaded fingerprint.
1907    ///
1908    /// # Arguments
1909    /// * `fingerprint` - The loaded fingerprint
1910    /// * `phase_config` - Phase configuration for generation
1911    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1912    pub fn from_fingerprint_data(
1913        fingerprint: Fingerprint,
1914        phase_config: PhaseConfig,
1915        scale: f64,
1916    ) -> SynthResult<Self> {
1917        info!(
1918            "Synthesizing config from fingerprint (version: {}, tables: {})",
1919            fingerprint.manifest.version,
1920            fingerprint.schema.tables.len()
1921        );
1922
1923        // Generate a seed for the synthesis
1924        let seed: u64 = rand::random();
1925        info!("Fingerprint synthesis seed: {}", seed);
1926
1927        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1928        let options = SynthesisOptions {
1929            scale,
1930            seed: Some(seed),
1931            preserve_correlations: true,
1932            inject_anomalies: true,
1933        };
1934        let synthesizer = ConfigSynthesizer::with_options(options);
1935
1936        // Synthesize full result including copula generators
1937        let synthesis_result = synthesizer
1938            .synthesize_full(&fingerprint, seed)
1939            .map_err(|e| {
1940                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1941            })?;
1942
1943        // Start with a base config from the fingerprint's industry if available
1944        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1945            Self::base_config_for_industry(industry)
1946        } else {
1947            Self::base_config_for_industry("manufacturing")
1948        };
1949
1950        // Apply the synthesized patches
1951        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1952
1953        // Log synthesis results
1954        info!(
1955            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1956            fingerprint.schema.tables.len(),
1957            scale,
1958            synthesis_result.copula_generators.len()
1959        );
1960
1961        if !synthesis_result.copula_generators.is_empty() {
1962            for spec in &synthesis_result.copula_generators {
1963                info!(
1964                    "  Copula '{}' for table '{}': {} columns",
1965                    spec.name,
1966                    spec.table,
1967                    spec.columns.len()
1968                );
1969            }
1970        }
1971
1972        // Create the orchestrator with the synthesized config
1973        let mut orchestrator = Self::new(config, phase_config)?;
1974
1975        // Store copula generators for use during generation
1976        orchestrator.copula_generators = synthesis_result.copula_generators;
1977
1978        Ok(orchestrator)
1979    }
1980
1981    /// Create a base config for a given industry.
1982    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1983        use datasynth_config::presets::create_preset;
1984        use datasynth_config::TransactionVolume;
1985        use datasynth_core::models::{CoAComplexity, IndustrySector};
1986
1987        let sector = match industry.to_lowercase().as_str() {
1988            "manufacturing" => IndustrySector::Manufacturing,
1989            "retail" => IndustrySector::Retail,
1990            "financial" | "financial_services" => IndustrySector::FinancialServices,
1991            "healthcare" => IndustrySector::Healthcare,
1992            "technology" | "tech" => IndustrySector::Technology,
1993            _ => IndustrySector::Manufacturing,
1994        };
1995
1996        // Create a preset with reasonable defaults
1997        create_preset(
1998            sector,
1999            1,  // company count
2000            12, // period months
2001            CoAComplexity::Medium,
2002            TransactionVolume::TenK,
2003        )
2004    }
2005
2006    /// Apply a config patch to a GeneratorConfig.
2007    fn apply_config_patch(
2008        mut config: GeneratorConfig,
2009        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2010    ) -> GeneratorConfig {
2011        use datasynth_fingerprint::synthesis::ConfigValue;
2012
2013        for (key, value) in patch.values() {
2014            match (key.as_str(), value) {
2015                // Transaction count is handled via TransactionVolume enum on companies
2016                // Log it but cannot directly set it (would need to modify company volumes)
2017                ("transactions.count", ConfigValue::Integer(n)) => {
2018                    info!(
2019                        "Fingerprint suggests {} transactions (apply via company volumes)",
2020                        n
2021                    );
2022                }
2023                ("global.period_months", ConfigValue::Integer(n)) => {
2024                    config.global.period_months = (*n).clamp(1, 120) as u32;
2025                }
2026                ("global.start_date", ConfigValue::String(s)) => {
2027                    config.global.start_date = s.clone();
2028                }
2029                ("global.seed", ConfigValue::Integer(n)) => {
2030                    config.global.seed = Some(*n as u64);
2031                }
2032                ("fraud.enabled", ConfigValue::Bool(b)) => {
2033                    config.fraud.enabled = *b;
2034                }
2035                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2036                    config.fraud.fraud_rate = *f;
2037                }
2038                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2039                    config.data_quality.enabled = *b;
2040                }
2041                // Handle anomaly injection paths (mapped to fraud config)
2042                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2043                    config.fraud.enabled = *b;
2044                }
2045                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2046                    config.fraud.fraud_rate = *f;
2047                }
2048                _ => {
2049                    debug!("Ignoring unknown config patch key: {}", key);
2050                }
2051            }
2052        }
2053
2054        config
2055    }
2056
2057    /// Build a resource guard from the configuration.
2058    fn build_resource_guard(
2059        config: &GeneratorConfig,
2060        output_path: Option<PathBuf>,
2061    ) -> ResourceGuard {
2062        let mut builder = ResourceGuardBuilder::new();
2063
2064        // Configure memory limit if set
2065        if config.global.memory_limit_mb > 0 {
2066            builder = builder.memory_limit(config.global.memory_limit_mb);
2067        }
2068
2069        // Configure disk monitoring for output path
2070        if let Some(path) = output_path {
2071            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2072        }
2073
2074        // Use conservative degradation settings for production safety
2075        builder = builder.conservative();
2076
2077        builder.build()
2078    }
2079
2080    /// Check resources (memory, disk, CPU) and return degradation level.
2081    ///
2082    /// Returns an error if hard limits are exceeded.
2083    /// Returns Ok(DegradationLevel) indicating current resource state.
2084    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2085        self.resource_guard.check()
2086    }
2087
2088    /// Check resources with logging.
2089    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2090        let level = self.resource_guard.check()?;
2091
2092        if level != DegradationLevel::Normal {
2093            warn!(
2094                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2095                phase,
2096                level,
2097                self.resource_guard.current_memory_mb(),
2098                self.resource_guard.available_disk_mb()
2099            );
2100        }
2101
2102        Ok(level)
2103    }
2104
2105    /// Get current degradation actions based on resource state.
2106    fn get_degradation_actions(&self) -> DegradationActions {
2107        self.resource_guard.get_actions()
2108    }
2109
2110    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2111    fn check_memory_limit(&self) -> SynthResult<()> {
2112        self.check_resources()?;
2113        Ok(())
2114    }
2115
2116    /// Run the complete generation workflow.
2117    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2118        info!("Starting enhanced generation workflow");
2119        info!(
2120            "Config: industry={:?}, period_months={}, companies={}",
2121            self.config.global.industry,
2122            self.config.global.period_months,
2123            self.config.companies.len()
2124        );
2125
2126        // Set decimal serialization mode (thread-local, affects JSON output).
2127        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2128        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2129        datasynth_core::serde_decimal::set_numeric_native(is_native);
2130        struct NumericModeGuard;
2131        impl Drop for NumericModeGuard {
2132            fn drop(&mut self) {
2133                datasynth_core::serde_decimal::set_numeric_native(false);
2134            }
2135        }
2136        let _numeric_guard = if is_native {
2137            Some(NumericModeGuard)
2138        } else {
2139            None
2140        };
2141
2142        // Initial resource check before starting
2143        let initial_level = self.check_resources_with_log("initial")?;
2144        if initial_level == DegradationLevel::Emergency {
2145            return Err(SynthError::resource(
2146                "Insufficient resources to start generation",
2147            ));
2148        }
2149
2150        let mut stats = EnhancedGenerationStatistics {
2151            companies_count: self.config.companies.len(),
2152            period_months: self.config.global.period_months,
2153            ..Default::default()
2154        };
2155
2156        // Phase 1: Chart of Accounts
2157        let coa = self.phase_chart_of_accounts(&mut stats)?;
2158
2159        // Phase 2: Master Data
2160        self.phase_master_data(&mut stats)?;
2161
2162        // Emit master data to stream sink
2163        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2164        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2165        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2166
2167        // Phase 3: Document Flows + Subledger Linking
2168        let (mut document_flows, mut subledger, fa_journal_entries) =
2169            self.phase_document_flows(&mut stats)?;
2170
2171        // Emit document flows to stream sink
2172        self.emit_phase_items(
2173            "document_flows",
2174            "PurchaseOrder",
2175            &document_flows.purchase_orders,
2176        );
2177        self.emit_phase_items(
2178            "document_flows",
2179            "GoodsReceipt",
2180            &document_flows.goods_receipts,
2181        );
2182        self.emit_phase_items(
2183            "document_flows",
2184            "VendorInvoice",
2185            &document_flows.vendor_invoices,
2186        );
2187        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2188        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2189
2190        // Phase 3b: Opening Balances (before JE generation)
2191        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2192
2193        // Phase 3c: Convert opening balances to journal entries and prepend them.
2194        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2195        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2196        // balance map type.
2197        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2198            .iter()
2199            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2200            .collect();
2201        if !opening_balance_jes.is_empty() {
2202            debug!(
2203                "Prepending {} opening balance JEs to entries",
2204                opening_balance_jes.len()
2205            );
2206        }
2207
2208        // Phase 4: Journal Entries
2209        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2210
2211        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2212        // starts from the correct initial state.
2213        if !opening_balance_jes.is_empty() {
2214            let mut combined = opening_balance_jes;
2215            combined.extend(entries);
2216            entries = combined;
2217        }
2218
2219        // Phase 4c: Append FA acquisition journal entries to main entries
2220        if !fa_journal_entries.is_empty() {
2221            debug!(
2222                "Appending {} FA acquisition JEs to main entries",
2223                fa_journal_entries.len()
2224            );
2225            entries.extend(fa_journal_entries);
2226        }
2227
2228        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2229        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2230
2231        // Get current degradation actions for optional phases
2232        let actions = self.get_degradation_actions();
2233
2234        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2235        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2236
2237        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2238        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2239        if !sourcing.contracts.is_empty() {
2240            let mut linked_count = 0usize;
2241            // Collect (vendor_id, po_id) pairs from P2P chains
2242            let po_vendor_pairs: Vec<(String, String)> = document_flows
2243                .p2p_chains
2244                .iter()
2245                .map(|chain| {
2246                    (
2247                        chain.purchase_order.vendor_id.clone(),
2248                        chain.purchase_order.header.document_id.clone(),
2249                    )
2250                })
2251                .collect();
2252
2253            for chain in &mut document_flows.p2p_chains {
2254                if chain.purchase_order.contract_id.is_none() {
2255                    if let Some(contract) = sourcing
2256                        .contracts
2257                        .iter()
2258                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2259                    {
2260                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2261                        linked_count += 1;
2262                    }
2263                }
2264            }
2265
2266            // Populate reverse FK: purchase_order_ids on each contract
2267            for contract in &mut sourcing.contracts {
2268                let po_ids: Vec<String> = po_vendor_pairs
2269                    .iter()
2270                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2271                    .map(|(_, po_id)| po_id.clone())
2272                    .collect();
2273                if !po_ids.is_empty() {
2274                    contract.purchase_order_ids = po_ids;
2275                }
2276            }
2277
2278            if linked_count > 0 {
2279                debug!(
2280                    "Linked {} purchase orders to S2C contracts by vendor match",
2281                    linked_count
2282                );
2283            }
2284        }
2285
2286        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2287        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2288
2289        // Phase 5c: Append IC journal entries to main entries
2290        if !intercompany.seller_journal_entries.is_empty()
2291            || !intercompany.buyer_journal_entries.is_empty()
2292        {
2293            let ic_je_count = intercompany.seller_journal_entries.len()
2294                + intercompany.buyer_journal_entries.len();
2295            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2296            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2297            debug!(
2298                "Appended {} IC journal entries to main entries",
2299                ic_je_count
2300            );
2301        }
2302
2303        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2304        if !intercompany.elimination_entries.is_empty() {
2305            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2306                &intercompany.elimination_entries,
2307            );
2308            if !elim_jes.is_empty() {
2309                debug!(
2310                    "Appended {} elimination journal entries to main entries",
2311                    elim_jes.len()
2312                );
2313                // IC elimination net-zero assertion (v2.5 hardening)
2314                let elim_debit: rust_decimal::Decimal =
2315                    elim_jes.iter().map(|je| je.total_debit()).sum();
2316                let elim_credit: rust_decimal::Decimal =
2317                    elim_jes.iter().map(|je| je.total_credit()).sum();
2318                let elim_diff = (elim_debit - elim_credit).abs();
2319                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2320                if elim_diff > tolerance {
2321                    return Err(datasynth_core::error::SynthError::generation(format!(
2322                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2323                        elim_debit, elim_credit, elim_diff, tolerance
2324                    )));
2325                }
2326                debug!(
2327                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2328                    elim_debit, elim_credit, elim_diff
2329                );
2330                entries.extend(elim_jes);
2331            }
2332        }
2333
2334        // Phase 5e: Wire IC source documents into document flow snapshot
2335        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2336            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2337                document_flows
2338                    .customer_invoices
2339                    .extend(ic_docs.seller_invoices.iter().cloned());
2340                document_flows
2341                    .purchase_orders
2342                    .extend(ic_docs.buyer_orders.iter().cloned());
2343                document_flows
2344                    .goods_receipts
2345                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2346                document_flows
2347                    .vendor_invoices
2348                    .extend(ic_docs.buyer_invoices.iter().cloned());
2349                debug!(
2350                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2351                    ic_docs.seller_invoices.len(),
2352                    ic_docs.buyer_orders.len(),
2353                    ic_docs.buyer_goods_receipts.len(),
2354                    ic_docs.buyer_invoices.len(),
2355                );
2356            }
2357        }
2358
2359        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2360        let hr = self.phase_hr_data(&mut stats)?;
2361
2362        // Phase 6b: Generate JEs from payroll runs
2363        if !hr.payroll_runs.is_empty() {
2364            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2365            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2366            entries.extend(payroll_jes);
2367        }
2368
2369        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2370        if !hr.pension_journal_entries.is_empty() {
2371            debug!(
2372                "Generated {} JEs from pension plans",
2373                hr.pension_journal_entries.len()
2374            );
2375            entries.extend(hr.pension_journal_entries.iter().cloned());
2376        }
2377
2378        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2379        if !hr.stock_comp_journal_entries.is_empty() {
2380            debug!(
2381                "Generated {} JEs from stock-based compensation",
2382                hr.stock_comp_journal_entries.len()
2383            );
2384            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2385        }
2386
2387        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2388        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2389
2390        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2391        if !manufacturing_snap.production_orders.is_empty() {
2392            let currency = self
2393                .config
2394                .companies
2395                .first()
2396                .map(|c| c.currency.as_str())
2397                .unwrap_or("USD");
2398            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2399                &manufacturing_snap.production_orders,
2400                &manufacturing_snap.quality_inspections,
2401                currency,
2402            );
2403            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2404            entries.extend(mfg_jes);
2405        }
2406
2407        // Phase 7a-warranty: Generate warranty provisions per company
2408        if !manufacturing_snap.quality_inspections.is_empty() {
2409            let framework = match self.config.accounting_standards.framework {
2410                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2411                _ => "US_GAAP",
2412            };
2413            for company in &self.config.companies {
2414                let company_orders: Vec<_> = manufacturing_snap
2415                    .production_orders
2416                    .iter()
2417                    .filter(|o| o.company_code == company.code)
2418                    .cloned()
2419                    .collect();
2420                let company_inspections: Vec<_> = manufacturing_snap
2421                    .quality_inspections
2422                    .iter()
2423                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2424                    .cloned()
2425                    .collect();
2426                if company_inspections.is_empty() {
2427                    continue;
2428                }
2429                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2430                let warranty_result = warranty_gen.generate(
2431                    &company.code,
2432                    &company_orders,
2433                    &company_inspections,
2434                    &company.currency,
2435                    framework,
2436                );
2437                if !warranty_result.journal_entries.is_empty() {
2438                    debug!(
2439                        "Generated {} warranty provision JEs for {}",
2440                        warranty_result.journal_entries.len(),
2441                        company.code
2442                    );
2443                    entries.extend(warranty_result.journal_entries);
2444                }
2445            }
2446        }
2447
2448        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2449        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2450        {
2451            let cogs_currency = self
2452                .config
2453                .companies
2454                .first()
2455                .map(|c| c.currency.as_str())
2456                .unwrap_or("USD");
2457            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2458                &document_flows.deliveries,
2459                &manufacturing_snap.production_orders,
2460                cogs_currency,
2461            );
2462            if !cogs_jes.is_empty() {
2463                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2464                entries.extend(cogs_jes);
2465            }
2466        }
2467
2468        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2469        //
2470        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2471        // subledger inventory positions.  Here we reconcile them so that position balances
2472        // reflect the actual stock movements within the generation period.
2473        if !manufacturing_snap.inventory_movements.is_empty()
2474            && !subledger.inventory_positions.is_empty()
2475        {
2476            use datasynth_core::models::MovementType as MfgMovementType;
2477            let mut receipt_count = 0usize;
2478            let mut issue_count = 0usize;
2479            for movement in &manufacturing_snap.inventory_movements {
2480                // Find a matching position by material code and company
2481                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2482                    p.material_id == movement.material_code
2483                        && p.company_code == movement.entity_code
2484                }) {
2485                    match movement.movement_type {
2486                        MfgMovementType::GoodsReceipt => {
2487                            // Increase stock and update weighted-average cost
2488                            pos.add_quantity(
2489                                movement.quantity,
2490                                movement.value,
2491                                movement.movement_date,
2492                            );
2493                            receipt_count += 1;
2494                        }
2495                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2496                            // Decrease stock (best-effort; silently skip if insufficient)
2497                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2498                            issue_count += 1;
2499                        }
2500                        _ => {}
2501                    }
2502                }
2503            }
2504            debug!(
2505                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2506                manufacturing_snap.inventory_movements.len(),
2507                receipt_count,
2508                issue_count,
2509            );
2510        }
2511
2512        // Update final entry/line-item stats after all JE-generating phases
2513        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2514        if !entries.is_empty() {
2515            stats.total_entries = entries.len() as u64;
2516            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2517            debug!(
2518                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2519                stats.total_entries, stats.total_line_items
2520            );
2521        }
2522
2523        // Phase 7b: Apply internal controls to journal entries
2524        if self.config.internal_controls.enabled && !entries.is_empty() {
2525            info!("Phase 7b: Applying internal controls to journal entries");
2526            let control_config = ControlGeneratorConfig {
2527                exception_rate: self.config.internal_controls.exception_rate,
2528                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2529                enable_sox_marking: true,
2530                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2531                    self.config.internal_controls.sox_materiality_threshold,
2532                )
2533                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2534                ..Default::default()
2535            };
2536            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2537            for entry in &mut entries {
2538                control_gen.apply_controls(entry, &coa);
2539            }
2540            let with_controls = entries
2541                .iter()
2542                .filter(|e| !e.header.control_ids.is_empty())
2543                .count();
2544            info!(
2545                "Applied controls to {} entries ({} with control IDs assigned)",
2546                entries.len(),
2547                with_controls
2548            );
2549        }
2550
2551        // Phase 7c: Extract SoD violations from annotated journal entries.
2552        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2553        // Here we materialise those flags into standalone SodViolation records.
2554        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2555            .iter()
2556            .filter(|e| e.header.sod_violation)
2557            .filter_map(|e| {
2558                e.header.sod_conflict_type.map(|ct| {
2559                    use datasynth_core::models::{RiskLevel, SodViolation};
2560                    let severity = match ct {
2561                        datasynth_core::models::SodConflictType::PaymentReleaser
2562                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2563                            RiskLevel::Critical
2564                        }
2565                        datasynth_core::models::SodConflictType::PreparerApprover
2566                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2567                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2568                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2569                            RiskLevel::High
2570                        }
2571                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2572                            RiskLevel::Medium
2573                        }
2574                    };
2575                    let action = format!(
2576                        "SoD conflict {:?} on entry {} ({})",
2577                        ct, e.header.document_id, e.header.company_code
2578                    );
2579                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2580                })
2581            })
2582            .collect();
2583        if !sod_violations.is_empty() {
2584            info!(
2585                "Phase 7c: Extracted {} SoD violations from {} entries",
2586                sod_violations.len(),
2587                entries.len()
2588            );
2589        }
2590
2591        // Emit journal entries to stream sink (after all JE-generating phases)
2592        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2593
2594        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2595        //
2596        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2597        // document-level fraud are exempt from subsequent line-level flag
2598        // overwrites, and so downstream consumers see a coherent picture.
2599        //
2600        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2601        {
2602            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2603            if self.config.fraud.enabled && doc_rate > 0.0 {
2604                use datasynth_core::fraud_propagation::{
2605                    inject_document_fraud, propagate_documents_to_entries,
2606                };
2607                use datasynth_core::utils::weighted_select;
2608                use datasynth_core::FraudType;
2609                use rand_chacha::rand_core::SeedableRng;
2610
2611                let dist = &self.config.fraud.fraud_type_distribution;
2612                let fraud_type_weights: [(FraudType, f64); 8] = [
2613                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2614                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2615                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2616                    (
2617                        FraudType::ImproperCapitalization,
2618                        dist.expense_capitalization,
2619                    ),
2620                    (FraudType::SplitTransaction, dist.split_transaction),
2621                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2622                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2623                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2624                ];
2625                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2626                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2627                    if weights_sum <= 0.0 {
2628                        FraudType::FictitiousEntry
2629                    } else {
2630                        *weighted_select(rng, &fraud_type_weights)
2631                    }
2632                };
2633
2634                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2635                let mut doc_tagged = 0usize;
2636                macro_rules! inject_into {
2637                    ($collection:expr) => {{
2638                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2639                            $collection.iter_mut().map(|d| &mut d.header).collect();
2640                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2641                    }};
2642                }
2643                inject_into!(document_flows.purchase_orders);
2644                inject_into!(document_flows.goods_receipts);
2645                inject_into!(document_flows.vendor_invoices);
2646                inject_into!(document_flows.payments);
2647                inject_into!(document_flows.sales_orders);
2648                inject_into!(document_flows.deliveries);
2649                inject_into!(document_flows.customer_invoices);
2650                if doc_tagged > 0 {
2651                    info!(
2652                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2653                    );
2654                }
2655
2656                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2657                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2658                        Vec::new();
2659                    headers.extend(
2660                        document_flows
2661                            .purchase_orders
2662                            .iter()
2663                            .map(|d| d.header.clone()),
2664                    );
2665                    headers.extend(
2666                        document_flows
2667                            .goods_receipts
2668                            .iter()
2669                            .map(|d| d.header.clone()),
2670                    );
2671                    headers.extend(
2672                        document_flows
2673                            .vendor_invoices
2674                            .iter()
2675                            .map(|d| d.header.clone()),
2676                    );
2677                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2678                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2679                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2680                    headers.extend(
2681                        document_flows
2682                            .customer_invoices
2683                            .iter()
2684                            .map(|d| d.header.clone()),
2685                    );
2686                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2687                    if propagated > 0 {
2688                        info!(
2689                            "Propagated document-level fraud to {propagated} derived journal entries"
2690                        );
2691                    }
2692                }
2693            }
2694        }
2695
2696        // Phase 8: Anomaly Injection (after all JE-generating phases)
2697        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2698
2699        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2700        // through the anomaly injector.
2701        //
2702        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2703        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2704        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2705        //   - Any external mutation that sets is_fraud after the fact
2706        //
2707        // The anomaly injector already applies the same bias inline when it
2708        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2709        // so gating this sweep on `!is_anomaly` avoids double-application.
2710        //
2711        // Without this sweep, fraud entries from these paths show 0 lift on
2712        // the canonical forensic signals (is_round_1000, is_off_hours,
2713        // is_weekend, is_post_close), which is exactly what the SDK-side
2714        // evaluator caught in v3.1 — fraud features had worse lift than
2715        // baseline. See DS-3.1 post-deploy feedback.
2716        {
2717            use datasynth_core::fraud_bias::{
2718                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2719            };
2720            use rand_chacha::rand_core::SeedableRng;
2721            let cfg = FraudBehavioralBiasConfig::default();
2722            if cfg.enabled {
2723                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2724                let mut swept = 0usize;
2725                for entry in entries.iter_mut() {
2726                    if entry.header.is_fraud && !entry.header.is_anomaly {
2727                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2728                        swept += 1;
2729                    }
2730                }
2731                if swept > 0 {
2732                    info!(
2733                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2734                         (doc-propagated + je_generator intrinsic fraud)"
2735                    );
2736                }
2737            }
2738        }
2739
2740        // Emit anomaly labels to stream sink
2741        self.emit_phase_items(
2742            "anomaly_injection",
2743            "LabeledAnomaly",
2744            &anomaly_labels.labels,
2745        );
2746
2747        // Propagate fraud labels from journal entries to source documents.
2748        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2749        // instead of tracing through document_references.json.
2750        //
2751        // Gated by `fraud.propagate_to_document` (default true) — disable when
2752        // downstream consumers want document fraud flags to reflect only
2753        // document-level injection, not line-level.
2754        if self.config.fraud.propagate_to_document {
2755            use std::collections::HashMap;
2756            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2757            //
2758            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2759            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2760            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2761            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2762            // we register BOTH the prefixed form (raw reference) AND the bare form
2763            // (post-colon portion) in the map. Also register the JE's document_id
2764            // UUID so documents that set `journal_entry_id` match via that path.
2765            //
2766            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2767            // looked up "foo", silently producing 0 propagations.
2768            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2769            for je in &entries {
2770                if je.header.is_fraud {
2771                    if let Some(ref fraud_type) = je.header.fraud_type {
2772                        if let Some(ref reference) = je.header.reference {
2773                            // Register the full reference ("GR:PO-2024-000001")
2774                            fraud_map.insert(reference.clone(), *fraud_type);
2775                            // Also register the bare document ID ("PO-2024-000001")
2776                            // by stripping the "PREFIX:" if present.
2777                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2778                                if !bare.is_empty() {
2779                                    fraud_map.insert(bare.to_string(), *fraud_type);
2780                                }
2781                            }
2782                        }
2783                        // Also tag via journal_entry_id on document headers
2784                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2785                    }
2786                }
2787            }
2788            if !fraud_map.is_empty() {
2789                let mut propagated = 0usize;
2790                // Use DocumentHeader::propagate_fraud method for each doc type
2791                macro_rules! propagate_to {
2792                    ($collection:expr) => {
2793                        for doc in &mut $collection {
2794                            if doc.header.propagate_fraud(&fraud_map) {
2795                                propagated += 1;
2796                            }
2797                        }
2798                    };
2799                }
2800                propagate_to!(document_flows.purchase_orders);
2801                propagate_to!(document_flows.goods_receipts);
2802                propagate_to!(document_flows.vendor_invoices);
2803                propagate_to!(document_flows.payments);
2804                propagate_to!(document_flows.sales_orders);
2805                propagate_to!(document_flows.deliveries);
2806                propagate_to!(document_flows.customer_invoices);
2807                if propagated > 0 {
2808                    info!(
2809                        "Propagated fraud labels to {} document flow records",
2810                        propagated
2811                    );
2812                }
2813            }
2814        }
2815
2816        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2817        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2818
2819        // Emit red flags to stream sink
2820        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2821
2822        // Phase 26b: Collusion Ring Generation (after red flags)
2823        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2824
2825        // Emit collusion rings to stream sink
2826        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2827
2828        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2829        let balance_validation = self.phase_balance_validation(&entries)?;
2830
2831        // Phase 9b: GL-to-Subledger Reconciliation
2832        let subledger_reconciliation =
2833            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2834
2835        // Phase 10: Data Quality Injection
2836        let (data_quality_stats, quality_issues) =
2837            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2838
2839        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2840        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2841
2842        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2843        {
2844            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2845
2846            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2847            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2848            let mut unbalanced_clean = 0usize;
2849            for je in &entries {
2850                if je.header.is_fraud || je.header.is_anomaly {
2851                    continue;
2852                }
2853                let diff = (je.total_debit() - je.total_credit()).abs();
2854                if diff > tolerance {
2855                    unbalanced_clean += 1;
2856                    if unbalanced_clean <= 3 {
2857                        warn!(
2858                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2859                            je.header.document_id,
2860                            je.total_debit(),
2861                            je.total_credit(),
2862                            diff
2863                        );
2864                    }
2865                }
2866            }
2867            if unbalanced_clean > 0 {
2868                return Err(datasynth_core::error::SynthError::generation(format!(
2869                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2870                     First few logged above. Tolerance={}",
2871                    unbalanced_clean, tolerance
2872                )));
2873            }
2874            debug!(
2875                "Phase 10c: All {} non-anomaly JEs individually balanced",
2876                entries
2877                    .iter()
2878                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2879                    .count()
2880            );
2881
2882            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2883            let company_codes: Vec<String> = self
2884                .config
2885                .companies
2886                .iter()
2887                .map(|c| c.code.clone())
2888                .collect();
2889            for company_code in &company_codes {
2890                let mut assets = rust_decimal::Decimal::ZERO;
2891                let mut liab_equity = rust_decimal::Decimal::ZERO;
2892
2893                for entry in &entries {
2894                    if entry.header.company_code != *company_code {
2895                        continue;
2896                    }
2897                    for line in &entry.lines {
2898                        let acct = &line.gl_account;
2899                        let net = line.debit_amount - line.credit_amount;
2900                        // Asset accounts (1xxx): normal debit balance
2901                        if acct.starts_with('1') {
2902                            assets += net;
2903                        }
2904                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2905                        else if acct.starts_with('2') || acct.starts_with('3') {
2906                            liab_equity -= net; // credit-normal, so negate debit-net
2907                        }
2908                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2909                        // so they net to zero after closing entries
2910                    }
2911                }
2912
2913                let bs_diff = (assets - liab_equity).abs();
2914                if bs_diff > tolerance {
2915                    warn!(
2916                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2917                         revenue/expense closing entries may not fully offset",
2918                        company_code, assets, liab_equity, bs_diff
2919                    );
2920                    // Warn rather than error: multi-period datasets may have timing
2921                    // differences from accruals/deferrals that resolve in later periods.
2922                    // The TB footing check (Assert 1) is the hard gate.
2923                } else {
2924                    debug!(
2925                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2926                        company_code, assets, liab_equity, bs_diff
2927                    );
2928                }
2929            }
2930
2931            info!("Phase 10c: All generation-time accounting assertions passed");
2932        }
2933
2934        // Phase 11: Audit Data
2935        let audit = self.phase_audit_data(&entries, &mut stats)?;
2936
2937        // Phase 12: Banking KYC/AML Data
2938        let mut banking = self.phase_banking_data(&mut stats)?;
2939
2940        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2941        // Creates coherence between the accounting layer (payments, JEs) and the
2942        // banking layer (bank transactions). A vendor invoice payment now appears
2943        // on both sides with cross-references and fraud labels propagated.
2944        if self.phase_config.generate_banking
2945            && !document_flows.payments.is_empty()
2946            && !banking.accounts.is_empty()
2947        {
2948            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2949            if bridge_rate > 0.0 {
2950                let mut bridge =
2951                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2952                        self.seed,
2953                    );
2954                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2955                    &document_flows.payments,
2956                    &banking.customers,
2957                    &banking.accounts,
2958                    bridge_rate,
2959                );
2960                info!(
2961                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2962                    bridge_stats.bridged_count,
2963                    bridge_stats.transactions_emitted,
2964                    bridge_stats.fraud_propagated,
2965                );
2966                let bridged_count = bridged_txns.len();
2967                banking.transactions.extend(bridged_txns);
2968
2969                // Re-run velocity computation so bridged txns also get features
2970                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2971                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2972                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2973                        &mut banking.transactions,
2974                    );
2975                }
2976
2977                // Recompute suspicious count after bridging
2978                banking.suspicious_count = banking
2979                    .transactions
2980                    .iter()
2981                    .filter(|t| t.is_suspicious)
2982                    .count();
2983                stats.banking_transaction_count = banking.transactions.len();
2984                stats.banking_suspicious_count = banking.suspicious_count;
2985            }
2986        }
2987
2988        // Phase 13: Graph Export
2989        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2990
2991        // Phase 14: LLM Enrichment
2992        self.phase_llm_enrichment(&mut stats);
2993
2994        // Phase 15: Diffusion Enhancement
2995        self.phase_diffusion_enhancement(&entries, &mut stats);
2996
2997        // Phase 16: Causal Overlay
2998        self.phase_causal_overlay(&mut stats);
2999
3000        // Phase 17: Bank Reconciliation + Financial Statements
3001        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3002        // provision data (from accounting_standards / tax snapshots) can be wired in.
3003        let mut financial_reporting = self.phase_financial_reporting(
3004            &document_flows,
3005            &entries,
3006            &coa,
3007            &hr,
3008            &audit,
3009            &mut stats,
3010        )?;
3011
3012        // BS coherence check: assets = liabilities + equity
3013        {
3014            use datasynth_core::models::StatementType;
3015            for stmt in &financial_reporting.consolidated_statements {
3016                if stmt.statement_type == StatementType::BalanceSheet {
3017                    let total_assets: rust_decimal::Decimal = stmt
3018                        .line_items
3019                        .iter()
3020                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3021                        .map(|li| li.amount)
3022                        .sum();
3023                    let total_le: rust_decimal::Decimal = stmt
3024                        .line_items
3025                        .iter()
3026                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3027                        .map(|li| li.amount)
3028                        .sum();
3029                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3030                        warn!(
3031                            "BS equation imbalance: assets={}, L+E={}",
3032                            total_assets, total_le
3033                        );
3034                    }
3035                }
3036            }
3037        }
3038
3039        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3040        let accounting_standards =
3041            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3042
3043        // Phase 18a: Merge ECL journal entries into main GL
3044        if !accounting_standards.ecl_journal_entries.is_empty() {
3045            debug!(
3046                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3047                accounting_standards.ecl_journal_entries.len()
3048            );
3049            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3050        }
3051
3052        // Phase 18a: Merge provision journal entries into main GL
3053        if !accounting_standards.provision_journal_entries.is_empty() {
3054            debug!(
3055                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3056                accounting_standards.provision_journal_entries.len()
3057            );
3058            entries.extend(
3059                accounting_standards
3060                    .provision_journal_entries
3061                    .iter()
3062                    .cloned(),
3063            );
3064        }
3065
3066        // Phase 18b: OCPM Events (after all process data is available)
3067        let mut ocpm = self.phase_ocpm_events(
3068            &document_flows,
3069            &sourcing,
3070            &hr,
3071            &manufacturing_snap,
3072            &banking,
3073            &audit,
3074            &financial_reporting,
3075            &mut stats,
3076        )?;
3077
3078        // Emit OCPM events to stream sink
3079        if let Some(ref event_log) = ocpm.event_log {
3080            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3081        }
3082
3083        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3084        if let Some(ref event_log) = ocpm.event_log {
3085            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3086            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3087                std::collections::HashMap::new();
3088            for (idx, event) in event_log.events.iter().enumerate() {
3089                if let Some(ref doc_ref) = event.document_ref {
3090                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3091                }
3092            }
3093
3094            if !doc_index.is_empty() {
3095                let mut annotated = 0usize;
3096                for entry in &mut entries {
3097                    let doc_id_str = entry.header.document_id.to_string();
3098                    // Collect matching event indices from document_id and reference
3099                    let mut matched_indices: Vec<usize> = Vec::new();
3100                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3101                        matched_indices.extend(indices);
3102                    }
3103                    if let Some(ref reference) = entry.header.reference {
3104                        let bare_ref = reference
3105                            .find(':')
3106                            .map(|i| &reference[i + 1..])
3107                            .unwrap_or(reference.as_str());
3108                        if let Some(indices) = doc_index.get(bare_ref) {
3109                            for &idx in indices {
3110                                if !matched_indices.contains(&idx) {
3111                                    matched_indices.push(idx);
3112                                }
3113                            }
3114                        }
3115                    }
3116                    // Apply matches to JE header
3117                    if !matched_indices.is_empty() {
3118                        for &idx in &matched_indices {
3119                            let event = &event_log.events[idx];
3120                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3121                                entry.header.ocpm_event_ids.push(event.event_id);
3122                            }
3123                            for obj_ref in &event.object_refs {
3124                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3125                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3126                                }
3127                            }
3128                            if entry.header.ocpm_case_id.is_none() {
3129                                entry.header.ocpm_case_id = event.case_id;
3130                            }
3131                        }
3132                        annotated += 1;
3133                    }
3134                }
3135                debug!(
3136                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3137                    annotated
3138                );
3139            }
3140        }
3141
3142        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3143        // IC eliminations, opening balances, standards-driven entries) so
3144        // every JournalEntry carries at least one `ocpm_event_ids` link.
3145        if let Some(ref mut event_log) = ocpm.event_log {
3146            let synthesized =
3147                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3148            if synthesized > 0 {
3149                info!(
3150                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3151                );
3152            }
3153
3154            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3155            // events and their owning CaseTrace. Without this, every exported
3156            // OCEL event has `is_anomaly = false` even when the underlying JE
3157            // was flagged.
3158            let anomaly_events =
3159                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3160            if anomaly_events > 0 {
3161                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3162            }
3163
3164            // Phase 18f: Inject process-variant imperfections (rework, skipped
3165            // steps, out-of-order events) so conformance checkers see
3166            // realistic variant counts and fitness < 1.0. Uses the P2P
3167            // process rates as the single source of truth.
3168            let p2p_cfg = &self.config.ocpm.p2p_process;
3169            let any_imperfection = p2p_cfg.rework_probability > 0.0
3170                || p2p_cfg.skip_step_probability > 0.0
3171                || p2p_cfg.out_of_order_probability > 0.0;
3172            if any_imperfection {
3173                use rand_chacha::rand_core::SeedableRng;
3174                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3175                    rework_rate: p2p_cfg.rework_probability,
3176                    skip_rate: p2p_cfg.skip_step_probability,
3177                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3178                };
3179                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3180                let stats =
3181                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3182                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3183                    info!(
3184                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3185                        stats.rework, stats.skipped, stats.out_of_order
3186                    );
3187                }
3188            }
3189        }
3190
3191        // Phase 19: Sales Quotes, Management KPIs, Budgets
3192        let sales_kpi_budgets =
3193            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3194
3195        // Phase 22: Treasury Data Generation
3196        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3197        // are included in the pre-tax income used by phase_tax_generation.
3198        let treasury =
3199            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3200
3201        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3202        if !treasury.journal_entries.is_empty() {
3203            debug!(
3204                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3205                treasury.journal_entries.len()
3206            );
3207            entries.extend(treasury.journal_entries.iter().cloned());
3208        }
3209
3210        // Phase 20: Tax Generation
3211        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3212
3213        // Phase 20 JEs: Merge tax posting journal entries into main GL
3214        if !tax.tax_posting_journal_entries.is_empty() {
3215            debug!(
3216                "Merging {} tax posting JEs into GL",
3217                tax.tax_posting_journal_entries.len()
3218            );
3219            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3220        }
3221
3222        // Phase 20b: FINAL fraud behavioral bias sweep.
3223        //
3224        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3225        // period close) extend `entries` with new journal entries that may
3226        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3227        // already-fraudulent transactions). Those late additions miss the
3228        // Phase 8b sweep and ship without bias applied — which is exactly
3229        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3230        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3231        //
3232        // Running the sweep one more time here guarantees every is_fraud
3233        // entry — regardless of which phase added it — has bias applied.
3234        // `!is_anomaly` gates out anomaly-injector entries (which already
3235        // got biased inline); the sweep is otherwise idempotent-ish:
3236        // weekend / off_hours re-fire to another valid weekend / off-hour,
3237        // post_close is guarded by `!is_post_close`, and round-dollar
3238        // rescaling on an already-round amount is a no-op (ratio = 1).
3239        {
3240            use datasynth_core::fraud_bias::{
3241                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3242            };
3243            use rand_chacha::rand_core::SeedableRng;
3244            let cfg = FraudBehavioralBiasConfig::default();
3245            if cfg.enabled {
3246                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3247                let mut swept = 0usize;
3248                for entry in entries.iter_mut() {
3249                    if entry.header.is_fraud && !entry.header.is_anomaly {
3250                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3251                        swept += 1;
3252                    }
3253                }
3254                if swept > 0 {
3255                    info!(
3256                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3257                         non-anomaly fraud entries (covers late-added JEs from \
3258                         ECL / provisions / treasury / tax / period-close)"
3259                    );
3260                }
3261            }
3262        }
3263
3264        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3265        // Build supplementary cash flow items from upstream JE data (depreciation,
3266        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3267        {
3268            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3269
3270            let framework_str = {
3271                use datasynth_config::schema::AccountingFrameworkConfig;
3272                match self
3273                    .config
3274                    .accounting_standards
3275                    .framework
3276                    .unwrap_or_default()
3277                {
3278                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3279                        "IFRS"
3280                    }
3281                    _ => "US_GAAP",
3282                }
3283            };
3284
3285            // Sum depreciation debits (account 6000) from close JEs
3286            let depreciation_total: rust_decimal::Decimal = entries
3287                .iter()
3288                .filter(|je| je.header.document_type == "CL")
3289                .flat_map(|je| je.lines.iter())
3290                .filter(|l| l.gl_account.starts_with("6000"))
3291                .map(|l| l.debit_amount)
3292                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3293
3294            // Sum interest expense debits (account 7100)
3295            let interest_paid: rust_decimal::Decimal = entries
3296                .iter()
3297                .flat_map(|je| je.lines.iter())
3298                .filter(|l| l.gl_account.starts_with("7100"))
3299                .map(|l| l.debit_amount)
3300                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3301
3302            // Sum tax expense debits (account 8000)
3303            let tax_paid: rust_decimal::Decimal = entries
3304                .iter()
3305                .flat_map(|je| je.lines.iter())
3306                .filter(|l| l.gl_account.starts_with("8000"))
3307                .map(|l| l.debit_amount)
3308                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3309
3310            // Sum capex debits on fixed assets (account 1500)
3311            let capex: rust_decimal::Decimal = entries
3312                .iter()
3313                .flat_map(|je| je.lines.iter())
3314                .filter(|l| l.gl_account.starts_with("1500"))
3315                .map(|l| l.debit_amount)
3316                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3317
3318            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3319            let dividends_paid: rust_decimal::Decimal = entries
3320                .iter()
3321                .flat_map(|je| je.lines.iter())
3322                .filter(|l| l.gl_account == "2170")
3323                .map(|l| l.debit_amount)
3324                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3325
3326            let cf_data = CashFlowSourceData {
3327                depreciation_total,
3328                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3329                delta_ar: rust_decimal::Decimal::ZERO,
3330                delta_ap: rust_decimal::Decimal::ZERO,
3331                delta_inventory: rust_decimal::Decimal::ZERO,
3332                capex,
3333                debt_issuance: rust_decimal::Decimal::ZERO,
3334                debt_repayment: rust_decimal::Decimal::ZERO,
3335                interest_paid,
3336                tax_paid,
3337                dividends_paid,
3338                framework: framework_str.to_string(),
3339            };
3340
3341            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3342            if !enhanced_cf_items.is_empty() {
3343                // Merge into ALL cash flow statements (standalone + consolidated)
3344                use datasynth_core::models::StatementType;
3345                let merge_count = enhanced_cf_items.len();
3346                for stmt in financial_reporting
3347                    .financial_statements
3348                    .iter_mut()
3349                    .chain(financial_reporting.consolidated_statements.iter_mut())
3350                    .chain(
3351                        financial_reporting
3352                            .standalone_statements
3353                            .values_mut()
3354                            .flat_map(|v| v.iter_mut()),
3355                    )
3356                {
3357                    if stmt.statement_type == StatementType::CashFlowStatement {
3358                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3359                    }
3360                }
3361                info!(
3362                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3363                    merge_count
3364                );
3365            }
3366        }
3367
3368        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3369        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3370        self.generate_notes_to_financial_statements(
3371            &mut financial_reporting,
3372            &accounting_standards,
3373            &tax,
3374            &hr,
3375            &audit,
3376            &treasury,
3377        );
3378
3379        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3380        // When we have 2+ companies, derive segment data from actual journal entries
3381        // to complement or replace the FS-generator-based segments.
3382        if self.config.companies.len() >= 2 && !entries.is_empty() {
3383            let companies: Vec<(String, String)> = self
3384                .config
3385                .companies
3386                .iter()
3387                .map(|c| (c.code.clone(), c.name.clone()))
3388                .collect();
3389            let ic_elim: rust_decimal::Decimal =
3390                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3391            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3392                .unwrap_or(NaiveDate::MIN);
3393            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3394            let period_label = format!(
3395                "{}-{:02}",
3396                end_date.year(),
3397                (end_date - chrono::Days::new(1)).month()
3398            );
3399
3400            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3401            let (je_segments, je_recon) =
3402                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3403            if !je_segments.is_empty() {
3404                info!(
3405                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3406                    je_segments.len(),
3407                    ic_elim,
3408                );
3409                // Replace if existing segment_reports were empty; otherwise supplement
3410                if financial_reporting.segment_reports.is_empty() {
3411                    financial_reporting.segment_reports = je_segments;
3412                    financial_reporting.segment_reconciliations = vec![je_recon];
3413                } else {
3414                    financial_reporting.segment_reports.extend(je_segments);
3415                    financial_reporting.segment_reconciliations.push(je_recon);
3416                }
3417            }
3418        }
3419
3420        // Phase 21: ESG Data Generation
3421        let esg_snap =
3422            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3423
3424        // Phase 23: Project Accounting Data Generation
3425        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3426
3427        // Phase 24: Process Evolution + Organizational Events
3428        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3429
3430        // Phase 24b: Disruption Events
3431        let disruption_events = self.phase_disruption_events(&mut stats)?;
3432
3433        // Phase 27: Bi-Temporal Vendor Version Chains
3434        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3435
3436        // Phase 28: Entity Relationship Graph + Cross-Process Links
3437        let (entity_relationship_graph, cross_process_links) =
3438            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3439
3440        // Phase 29: Industry-specific GL accounts
3441        let industry_output = self.phase_industry_data(&mut stats);
3442
3443        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3444        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3445
3446        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3447        //
3448        // The neural / hybrid diffusion path was a documented L2 stub
3449        // in v3.x; actual neural-network training requires ML
3450        // infrastructure (PyTorch / candle bindings, GPU access,
3451        // training loops) that was never wired through the
3452        // orchestrator. Rather than keep a silently-no-op block that
3453        // misleads users into thinking neural training happens, v4.0
3454        // acknowledges the config — exposing stats so downstream
3455        // tooling can see the request — but emits a clear warning
3456        // when a non-statistical backend is requested. The statistical
3457        // diffusion backend continues to run via
3458        // `phase_diffusion_enhancement`.
3459        //
3460        // Users who need real neural diffusion: track the roadmap item
3461        // in the v4.x backlog and consider contributing the backend
3462        // (the `DiffusionBackend` trait is the integration point).
3463        if self.config.diffusion.enabled
3464            && (self.config.diffusion.backend == "neural"
3465                || self.config.diffusion.backend == "hybrid")
3466        {
3467            let neural = &self.config.diffusion.neural;
3468            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3469            stats.neural_hybrid_weight = Some(weight);
3470            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3471            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3472            warn!(
3473                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3474                 the neural/hybrid training path is not yet shipped. Config \
3475                 is captured in stats (weight={weight:.2}, strategy={}, \
3476                 columns={}) but no neural training runs. Statistical \
3477                 diffusion (backend='statistical') continues to work.",
3478                self.config.diffusion.backend,
3479                neural.hybrid_strategy,
3480                neural.neural_columns.len(),
3481            );
3482        }
3483
3484        // Phase 19b: Hypergraph Export (after all data is available)
3485        self.phase_hypergraph_export(
3486            &coa,
3487            &entries,
3488            &document_flows,
3489            &sourcing,
3490            &hr,
3491            &manufacturing_snap,
3492            &banking,
3493            &audit,
3494            &financial_reporting,
3495            &ocpm,
3496            &compliance_regulations,
3497            &mut stats,
3498        )?;
3499
3500        // Phase 10c: Additional graph builders (approval, entity, banking)
3501        // These run after all data is available since they need banking/IC data.
3502        if self.phase_config.generate_graph_export {
3503            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3504        }
3505
3506        // Log informational messages for config sections not yet fully wired
3507        if self.config.streaming.enabled {
3508            info!("Note: streaming config is enabled but batch mode does not use it");
3509        }
3510        if self.config.vendor_network.enabled {
3511            debug!("Vendor network config available; relationship graph generation is partial");
3512        }
3513        if self.config.customer_segmentation.enabled {
3514            debug!("Customer segmentation config available; segment-aware generation is partial");
3515        }
3516
3517        // Log final resource statistics
3518        let resource_stats = self.resource_guard.stats();
3519        info!(
3520            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3521            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3522            resource_stats.disk.estimated_bytes_written,
3523            resource_stats.degradation_level
3524        );
3525
3526        // Flush any remaining stream sink data
3527        if let Some(ref sink) = self.phase_sink {
3528            if let Err(e) = sink.flush() {
3529                warn!("Stream sink flush failed: {e}");
3530            }
3531        }
3532
3533        // Build data lineage graph
3534        let lineage = self.build_lineage_graph();
3535
3536        // Evaluate quality gates if enabled in config
3537        let gate_result = if self.config.quality_gates.enabled {
3538            let profile_name = &self.config.quality_gates.profile;
3539            match datasynth_eval::gates::get_profile(profile_name) {
3540                Some(profile) => {
3541                    // Build an evaluation populated with actual generation metrics.
3542                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3543
3544                    // Populate balance sheet evaluation from balance validation results
3545                    if balance_validation.validated {
3546                        eval.coherence.balance =
3547                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3548                                equation_balanced: balance_validation.is_balanced,
3549                                max_imbalance: (balance_validation.total_debits
3550                                    - balance_validation.total_credits)
3551                                    .abs(),
3552                                periods_evaluated: 1,
3553                                periods_imbalanced: if balance_validation.is_balanced {
3554                                    0
3555                                } else {
3556                                    1
3557                                },
3558                                period_results: Vec::new(),
3559                                companies_evaluated: self.config.companies.len(),
3560                            });
3561                    }
3562
3563                    // Set coherence passes based on balance validation
3564                    eval.coherence.passes = balance_validation.is_balanced;
3565                    if !balance_validation.is_balanced {
3566                        eval.coherence
3567                            .failures
3568                            .push("Balance sheet equation not satisfied".to_string());
3569                    }
3570
3571                    // Set statistical score based on entry count (basic sanity)
3572                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3573                    eval.statistical.passes = !entries.is_empty();
3574
3575                    // Set quality score from data quality stats
3576                    eval.quality.overall_score = 0.9; // Default high for generated data
3577                    eval.quality.passes = true;
3578
3579                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3580                    info!(
3581                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3582                        profile_name, result.gates_passed, result.gates_total, result.summary
3583                    );
3584                    Some(result)
3585                }
3586                None => {
3587                    warn!(
3588                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3589                        profile_name
3590                    );
3591                    None
3592                }
3593            }
3594        } else {
3595            None
3596        };
3597
3598        // Generate internal controls if enabled
3599        let internal_controls = if self.config.internal_controls.enabled {
3600            InternalControl::standard_controls()
3601        } else {
3602            Vec::new()
3603        };
3604
3605        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3606        // phases (including fraud-bias sweep at Phase 20b) so derived
3607        // outputs reflect final data.
3608        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3609
3610        // v3.5.1: statistical validation over the final amount
3611        // distribution. Runs *after* all JE-adding phases so the report
3612        // reflects everything the user will see in the output. Returns
3613        // `None` unless `distributions.validation.enabled = true`.
3614        let statistical_validation = self.phase_statistical_validation(&entries)?;
3615
3616        // v4.1.3+: interconnectivity snapshot — tier assignments,
3617        // value-segment labels, industry-specific metadata. Runs after
3618        // master data is settled so it can index stable IDs.
3619        let interconnectivity = self.phase_interconnectivity();
3620
3621        Ok(EnhancedGenerationResult {
3622            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3623            master_data: std::mem::take(&mut self.master_data),
3624            document_flows,
3625            subledger,
3626            ocpm,
3627            audit,
3628            banking,
3629            graph_export,
3630            sourcing,
3631            financial_reporting,
3632            hr,
3633            accounting_standards,
3634            manufacturing: manufacturing_snap,
3635            sales_kpi_budgets,
3636            tax,
3637            esg: esg_snap,
3638            treasury,
3639            project_accounting,
3640            process_evolution,
3641            organizational_events,
3642            disruption_events,
3643            intercompany,
3644            journal_entries: entries,
3645            anomaly_labels,
3646            balance_validation,
3647            data_quality_stats,
3648            quality_issues,
3649            statistics: stats,
3650            lineage: Some(lineage),
3651            gate_result,
3652            internal_controls,
3653            sod_violations,
3654            opening_balances,
3655            subledger_reconciliation,
3656            counterfactual_pairs,
3657            red_flags,
3658            collusion_rings,
3659            temporal_vendor_chains,
3660            entity_relationship_graph,
3661            cross_process_links,
3662            industry_output,
3663            compliance_regulations,
3664            analytics_metadata,
3665            statistical_validation,
3666            interconnectivity,
3667        })
3668    }
3669
3670    /// v4.1.3+: populate the interconnectivity snapshot from
3671    /// previously-inert schema sections. Empty when all sections are
3672    /// disabled.
3673    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3674        use rand::{RngExt, SeedableRng};
3675        use rand_chacha::ChaCha8Rng;
3676
3677        let mut snap = InterconnectivitySnapshot::default();
3678        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3679
3680        // --- Vendor network ---
3681        let vn = &self.config.vendor_network;
3682        if vn.enabled {
3683            let total = self.master_data.vendors.len();
3684            if total > 0 {
3685                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3686                let remaining_after_t1 = total.saturating_sub(tier1_count);
3687                let depth = vn.depth.clamp(1, 3);
3688                let tier2_count = if depth >= 2 {
3689                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3690                    (tier1_count * avg).min(remaining_after_t1)
3691                } else {
3692                    0
3693                };
3694                let tier3_count = total
3695                    .saturating_sub(tier1_count)
3696                    .saturating_sub(tier2_count);
3697
3698                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3699                    let tier = if idx < tier1_count {
3700                        1
3701                    } else if idx < tier1_count + tier2_count {
3702                        2
3703                    } else {
3704                        3
3705                    };
3706                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3707
3708                    // Cluster assignment via configured ratios.
3709                    let cl = &vn.clusters;
3710                    let roll: f64 = rng.random();
3711                    let cluster = if roll < cl.reliable_strategic {
3712                        "reliable_strategic"
3713                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3714                        "standard_operational"
3715                    } else if roll
3716                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3717                    {
3718                        "transactional"
3719                    } else {
3720                        "problematic"
3721                    };
3722                    snap.vendor_clusters
3723                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3724                }
3725                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3726            }
3727        }
3728
3729        // --- Customer segmentation ---
3730        let cs = &self.config.customer_segmentation;
3731        if cs.enabled {
3732            let seg = &cs.value_segments;
3733            for customer in &self.master_data.customers {
3734                let roll: f64 = rng.random();
3735                let value_segment = if roll < seg.enterprise.customer_share {
3736                    "enterprise"
3737                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3738                    "mid_market"
3739                } else if roll
3740                    < seg.enterprise.customer_share
3741                        + seg.mid_market.customer_share
3742                        + seg.smb.customer_share
3743                {
3744                    "smb"
3745                } else {
3746                    "consumer"
3747                };
3748                snap.customer_value_segments
3749                    .push((customer.customer_id.clone(), value_segment.to_string()));
3750
3751                let roll2: f64 = rng.random();
3752                let life = &cs.lifecycle;
3753                let lifecycle = if roll2 < life.prospect_rate {
3754                    "prospect"
3755                } else if roll2 < life.prospect_rate + life.new_rate {
3756                    "new"
3757                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3758                    "growth"
3759                } else if roll2
3760                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3761                {
3762                    "mature"
3763                } else if roll2
3764                    < life.prospect_rate
3765                        + life.new_rate
3766                        + life.growth_rate
3767                        + life.mature_rate
3768                        + life.at_risk_rate
3769                {
3770                    "at_risk"
3771                } else if roll2
3772                    < life.prospect_rate
3773                        + life.new_rate
3774                        + life.growth_rate
3775                        + life.mature_rate
3776                        + life.at_risk_rate
3777                        + life.churned_rate
3778                {
3779                    "churned"
3780                } else {
3781                    "won_back"
3782                };
3783                snap.customer_lifecycle_stages
3784                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3785            }
3786        }
3787
3788        // --- Industry-specific metadata (minimal) ---
3789        let is = &self.config.industry_specific;
3790        if is.enabled {
3791            snap.industry_metadata.push(format!(
3792                "industry_specific.enabled=true (industry={:?})",
3793                self.config.global.industry
3794            ));
3795        }
3796
3797        snap
3798    }
3799
3800    // ========================================================================
3801    // Generation Phase Methods
3802    // ========================================================================
3803
3804    /// Phase 1: Generate Chart of Accounts and update statistics.
3805    fn phase_chart_of_accounts(
3806        &mut self,
3807        stats: &mut EnhancedGenerationStatistics,
3808    ) -> SynthResult<Arc<ChartOfAccounts>> {
3809        info!("Phase 1: Generating Chart of Accounts");
3810        let coa = self.generate_coa()?;
3811        stats.accounts_count = coa.account_count();
3812        info!(
3813            "Chart of Accounts generated: {} accounts",
3814            stats.accounts_count
3815        );
3816        self.check_resources_with_log("post-coa")?;
3817        Ok(coa)
3818    }
3819
3820    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3821    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3822        if self.phase_config.generate_master_data {
3823            info!("Phase 2: Generating Master Data");
3824            self.generate_master_data()?;
3825            stats.vendor_count = self.master_data.vendors.len();
3826            stats.customer_count = self.master_data.customers.len();
3827            stats.material_count = self.master_data.materials.len();
3828            stats.asset_count = self.master_data.assets.len();
3829            stats.employee_count = self.master_data.employees.len();
3830            info!(
3831                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3832                stats.vendor_count, stats.customer_count, stats.material_count,
3833                stats.asset_count, stats.employee_count
3834            );
3835            self.check_resources_with_log("post-master-data")?;
3836        } else {
3837            debug!("Phase 2: Skipped (master data generation disabled)");
3838        }
3839        Ok(())
3840    }
3841
3842    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3843    fn phase_document_flows(
3844        &mut self,
3845        stats: &mut EnhancedGenerationStatistics,
3846    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3847        let mut document_flows = DocumentFlowSnapshot::default();
3848        let mut subledger = SubledgerSnapshot::default();
3849        // Dunning JEs (interest + charges) accumulated here and merged into the
3850        // main FA-JE list below so they appear in the GL.
3851        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3852
3853        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3854            info!("Phase 3: Generating Document Flows");
3855            self.generate_document_flows(&mut document_flows)?;
3856            stats.p2p_chain_count = document_flows.p2p_chains.len();
3857            stats.o2c_chain_count = document_flows.o2c_chains.len();
3858            info!(
3859                "Document flows generated: {} P2P chains, {} O2C chains",
3860                stats.p2p_chain_count, stats.o2c_chain_count
3861            );
3862
3863            // Phase 3b: Link document flows to subledgers (for data coherence)
3864            debug!("Phase 3b: Linking document flows to subledgers");
3865            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3866            stats.ap_invoice_count = subledger.ap_invoices.len();
3867            stats.ar_invoice_count = subledger.ar_invoices.len();
3868            debug!(
3869                "Subledgers linked: {} AP invoices, {} AR invoices",
3870                stats.ap_invoice_count, stats.ar_invoice_count
3871            );
3872
3873            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3874            // Without this step the subledger is systematically overstated because
3875            // amount_remaining is set at invoice creation and never reduced by
3876            // the payments that were generated in the document-flow phase.
3877            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3878            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3879            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3880            debug!("Payment settlements applied to AP and AR subledgers");
3881
3882            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3883            // The as-of date is the last day of the configured period.
3884            if let Ok(start_date) =
3885                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3886            {
3887                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3888                    - chrono::Days::new(1);
3889                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3890                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3891                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3892                // derived from JE-level aggregation and will typically differ. This is a known
3893                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3894                // generated independently. A future reconciliation phase should align them by
3895                // using subledger totals as the authoritative source for BS Receivables.
3896                for company in &self.config.companies {
3897                    let ar_report = ARAgingReport::from_invoices(
3898                        company.code.clone(),
3899                        &subledger.ar_invoices,
3900                        as_of_date,
3901                    );
3902                    subledger.ar_aging_reports.push(ar_report);
3903
3904                    let ap_report = APAgingReport::from_invoices(
3905                        company.code.clone(),
3906                        &subledger.ap_invoices,
3907                        as_of_date,
3908                    );
3909                    subledger.ap_aging_reports.push(ap_report);
3910                }
3911                debug!(
3912                    "AR/AP aging reports built: {} AR, {} AP",
3913                    subledger.ar_aging_reports.len(),
3914                    subledger.ap_aging_reports.len()
3915                );
3916
3917                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3918                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3919                {
3920                    use datasynth_generators::DunningGenerator;
3921                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3922                    for company in &self.config.companies {
3923                        let currency = company.currency.as_str();
3924                        // Collect mutable references to AR invoices for this company
3925                        // (dunning generator updates dunning_info on invoices in-place).
3926                        let mut company_invoices: Vec<
3927                            datasynth_core::models::subledger::ar::ARInvoice,
3928                        > = subledger
3929                            .ar_invoices
3930                            .iter()
3931                            .filter(|inv| inv.company_code == company.code)
3932                            .cloned()
3933                            .collect();
3934
3935                        if company_invoices.is_empty() {
3936                            continue;
3937                        }
3938
3939                        let result = dunning_gen.execute_dunning_run(
3940                            &company.code,
3941                            as_of_date,
3942                            &mut company_invoices,
3943                            currency,
3944                        );
3945
3946                        // Write back updated dunning info to the main AR invoice list
3947                        for updated in &company_invoices {
3948                            if let Some(orig) = subledger
3949                                .ar_invoices
3950                                .iter_mut()
3951                                .find(|i| i.invoice_number == updated.invoice_number)
3952                            {
3953                                orig.dunning_info = updated.dunning_info.clone();
3954                            }
3955                        }
3956
3957                        subledger.dunning_runs.push(result.dunning_run);
3958                        subledger.dunning_letters.extend(result.letters);
3959                        // Dunning JEs (interest + charges) collected into local buffer.
3960                        dunning_journal_entries.extend(result.journal_entries);
3961                    }
3962                    debug!(
3963                        "Dunning runs complete: {} runs, {} letters",
3964                        subledger.dunning_runs.len(),
3965                        subledger.dunning_letters.len()
3966                    );
3967                }
3968            }
3969
3970            self.check_resources_with_log("post-document-flows")?;
3971        } else {
3972            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3973        }
3974
3975        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3976        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3977        if !self.master_data.assets.is_empty() {
3978            debug!("Generating FA subledger records");
3979            let company_code = self
3980                .config
3981                .companies
3982                .first()
3983                .map(|c| c.code.as_str())
3984                .unwrap_or("1000");
3985            let currency = self
3986                .config
3987                .companies
3988                .first()
3989                .map(|c| c.currency.as_str())
3990                .unwrap_or("USD");
3991
3992            let mut fa_gen = datasynth_generators::FAGenerator::new(
3993                datasynth_generators::FAGeneratorConfig::default(),
3994                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3995            );
3996
3997            for asset in &self.master_data.assets {
3998                let (record, je) = fa_gen.generate_asset_acquisition(
3999                    company_code,
4000                    &format!("{:?}", asset.asset_class),
4001                    &asset.description,
4002                    asset.acquisition_date,
4003                    currency,
4004                    asset.cost_center.as_deref(),
4005                );
4006                subledger.fa_records.push(record);
4007                fa_journal_entries.push(je);
4008            }
4009
4010            stats.fa_subledger_count = subledger.fa_records.len();
4011            debug!(
4012                "FA subledger records generated: {} (with {} acquisition JEs)",
4013                stats.fa_subledger_count,
4014                fa_journal_entries.len()
4015            );
4016        }
4017
4018        // Generate Inventory subledger records from master data materials
4019        if !self.master_data.materials.is_empty() {
4020            debug!("Generating Inventory subledger records");
4021            let first_company = self.config.companies.first();
4022            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4023            let inv_currency = first_company
4024                .map(|c| c.currency.clone())
4025                .unwrap_or_else(|| "USD".to_string());
4026
4027            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4028                datasynth_generators::InventoryGeneratorConfig::default(),
4029                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4030                inv_currency.clone(),
4031            );
4032
4033            for (i, material) in self.master_data.materials.iter().enumerate() {
4034                let plant = format!("PLANT{:02}", (i % 3) + 1);
4035                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4036                let initial_qty = rust_decimal::Decimal::from(
4037                    material
4038                        .safety_stock
4039                        .to_string()
4040                        .parse::<i64>()
4041                        .unwrap_or(100),
4042                );
4043
4044                let position = inv_gen.generate_position(
4045                    company_code,
4046                    &plant,
4047                    &storage_loc,
4048                    &material.material_id,
4049                    &material.description,
4050                    initial_qty,
4051                    Some(material.standard_cost),
4052                    &inv_currency,
4053                );
4054                subledger.inventory_positions.push(position);
4055            }
4056
4057            stats.inventory_subledger_count = subledger.inventory_positions.len();
4058            debug!(
4059                "Inventory subledger records generated: {}",
4060                stats.inventory_subledger_count
4061            );
4062        }
4063
4064        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4065        if !subledger.fa_records.is_empty() {
4066            if let Ok(start_date) =
4067                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4068            {
4069                let company_code = self
4070                    .config
4071                    .companies
4072                    .first()
4073                    .map(|c| c.code.as_str())
4074                    .unwrap_or("1000");
4075                let fiscal_year = start_date.year();
4076                let start_period = start_date.month();
4077                let end_period =
4078                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4079
4080                let depr_cfg = FaDepreciationScheduleConfig {
4081                    fiscal_year,
4082                    start_period,
4083                    end_period,
4084                    seed_offset: 800,
4085                };
4086                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4087                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4088                let run_count = runs.len();
4089                subledger.depreciation_runs = runs;
4090                debug!(
4091                    "Depreciation runs generated: {} runs for {} periods",
4092                    run_count, self.config.global.period_months
4093                );
4094            }
4095        }
4096
4097        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4098        if !subledger.inventory_positions.is_empty() {
4099            if let Ok(start_date) =
4100                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4101            {
4102                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4103                    - chrono::Days::new(1);
4104
4105                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4106                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4107
4108                for company in &self.config.companies {
4109                    let result = inv_val_gen.generate(
4110                        &company.code,
4111                        &subledger.inventory_positions,
4112                        as_of_date,
4113                    );
4114                    subledger.inventory_valuations.push(result);
4115                }
4116                debug!(
4117                    "Inventory valuations generated: {} company reports",
4118                    subledger.inventory_valuations.len()
4119                );
4120            }
4121        }
4122
4123        Ok((document_flows, subledger, fa_journal_entries))
4124    }
4125
4126    /// Phase 3c: Generate OCPM events from document flows.
4127    #[allow(clippy::too_many_arguments)]
4128    fn phase_ocpm_events(
4129        &mut self,
4130        document_flows: &DocumentFlowSnapshot,
4131        sourcing: &SourcingSnapshot,
4132        hr: &HrSnapshot,
4133        manufacturing: &ManufacturingSnapshot,
4134        banking: &BankingSnapshot,
4135        audit: &AuditSnapshot,
4136        financial_reporting: &FinancialReportingSnapshot,
4137        stats: &mut EnhancedGenerationStatistics,
4138    ) -> SynthResult<OcpmSnapshot> {
4139        let degradation = self.check_resources()?;
4140        if degradation >= DegradationLevel::Reduced {
4141            debug!(
4142                "Phase skipped due to resource pressure (degradation: {:?})",
4143                degradation
4144            );
4145            return Ok(OcpmSnapshot::default());
4146        }
4147        if self.phase_config.generate_ocpm_events {
4148            info!("Phase 3c: Generating OCPM Events");
4149            let ocpm_snapshot = self.generate_ocpm_events(
4150                document_flows,
4151                sourcing,
4152                hr,
4153                manufacturing,
4154                banking,
4155                audit,
4156                financial_reporting,
4157            )?;
4158            stats.ocpm_event_count = ocpm_snapshot.event_count;
4159            stats.ocpm_object_count = ocpm_snapshot.object_count;
4160            stats.ocpm_case_count = ocpm_snapshot.case_count;
4161            info!(
4162                "OCPM events generated: {} events, {} objects, {} cases",
4163                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4164            );
4165            self.check_resources_with_log("post-ocpm")?;
4166            Ok(ocpm_snapshot)
4167        } else {
4168            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4169            Ok(OcpmSnapshot::default())
4170        }
4171    }
4172
4173    /// Phase 4: Generate journal entries from document flows and standalone generation.
4174    fn phase_journal_entries(
4175        &mut self,
4176        coa: &Arc<ChartOfAccounts>,
4177        document_flows: &DocumentFlowSnapshot,
4178        _stats: &mut EnhancedGenerationStatistics,
4179    ) -> SynthResult<Vec<JournalEntry>> {
4180        let mut entries = Vec::new();
4181
4182        // Phase 4a: Generate JEs from document flows (for data coherence)
4183        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4184            debug!("Phase 4a: Generating JEs from document flows");
4185            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4186            debug!("Generated {} JEs from document flows", flow_entries.len());
4187            entries.extend(flow_entries);
4188        }
4189
4190        // Phase 4b: Generate standalone journal entries
4191        if self.phase_config.generate_journal_entries {
4192            info!("Phase 4: Generating Journal Entries");
4193            let je_entries = self.generate_journal_entries(coa)?;
4194            info!("Generated {} standalone journal entries", je_entries.len());
4195            entries.extend(je_entries);
4196        } else {
4197            debug!("Phase 4: Skipped (journal entry generation disabled)");
4198        }
4199
4200        if !entries.is_empty() {
4201            // Note: stats.total_entries/total_line_items are set in generate()
4202            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4203            self.check_resources_with_log("post-journal-entries")?;
4204        }
4205
4206        Ok(entries)
4207    }
4208
4209    /// Phase 5: Inject anomalies into journal entries.
4210    fn phase_anomaly_injection(
4211        &mut self,
4212        entries: &mut [JournalEntry],
4213        actions: &DegradationActions,
4214        stats: &mut EnhancedGenerationStatistics,
4215    ) -> SynthResult<AnomalyLabels> {
4216        if self.phase_config.inject_anomalies
4217            && !entries.is_empty()
4218            && !actions.skip_anomaly_injection
4219        {
4220            info!("Phase 5: Injecting Anomalies");
4221            let result = self.inject_anomalies(entries)?;
4222            stats.anomalies_injected = result.labels.len();
4223            info!("Injected {} anomalies", stats.anomalies_injected);
4224            self.check_resources_with_log("post-anomaly-injection")?;
4225            Ok(result)
4226        } else if actions.skip_anomaly_injection {
4227            warn!("Phase 5: Skipped due to resource degradation");
4228            Ok(AnomalyLabels::default())
4229        } else {
4230            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4231            Ok(AnomalyLabels::default())
4232        }
4233    }
4234
4235    /// Phase 6: Validate balance sheet equation on journal entries.
4236    fn phase_balance_validation(
4237        &mut self,
4238        entries: &[JournalEntry],
4239    ) -> SynthResult<BalanceValidationResult> {
4240        if self.phase_config.validate_balances && !entries.is_empty() {
4241            debug!("Phase 6: Validating Balances");
4242            let balance_validation = self.validate_journal_entries(entries)?;
4243            if balance_validation.is_balanced {
4244                debug!("Balance validation passed");
4245            } else {
4246                warn!(
4247                    "Balance validation found {} errors",
4248                    balance_validation.validation_errors.len()
4249                );
4250            }
4251            Ok(balance_validation)
4252        } else {
4253            Ok(BalanceValidationResult::default())
4254        }
4255    }
4256
4257    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4258    fn phase_data_quality_injection(
4259        &mut self,
4260        entries: &mut [JournalEntry],
4261        actions: &DegradationActions,
4262        stats: &mut EnhancedGenerationStatistics,
4263    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4264        if self.phase_config.inject_data_quality
4265            && !entries.is_empty()
4266            && !actions.skip_data_quality
4267        {
4268            info!("Phase 7: Injecting Data Quality Variations");
4269            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4270            stats.data_quality_issues = dq_stats.records_with_issues;
4271            info!("Injected {} data quality issues", stats.data_quality_issues);
4272            self.check_resources_with_log("post-data-quality")?;
4273            Ok((dq_stats, quality_issues))
4274        } else if actions.skip_data_quality {
4275            warn!("Phase 7: Skipped due to resource degradation");
4276            // v4.4.1: report the denominator (entries seen) even when
4277            // injection is skipped, so downstream consumers can tell
4278            // "skipped, 0/N" apart from "ran but found nothing".
4279            Ok((stats_with_denominator(entries.len()), Vec::new()))
4280        } else {
4281            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4282            Ok((stats_with_denominator(entries.len()), Vec::new()))
4283        }
4284    }
4285
4286    /// Phase 10b: Generate period-close journal entries.
4287    ///
4288    /// Generates:
4289    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4290    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4291    ///    for the configured period.
4292    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4293    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4294    ///    earnings via the Income Summary (3600) clearing account.
4295    fn phase_period_close(
4296        &mut self,
4297        entries: &mut Vec<JournalEntry>,
4298        subledger: &SubledgerSnapshot,
4299        stats: &mut EnhancedGenerationStatistics,
4300    ) -> SynthResult<()> {
4301        if !self.phase_config.generate_period_close || entries.is_empty() {
4302            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4303            return Ok(());
4304        }
4305
4306        info!("Phase 10b: Generating period-close journal entries");
4307
4308        use datasynth_core::accounts::{
4309            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4310        };
4311        use rust_decimal::Decimal;
4312
4313        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4314            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4315        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4316        // Posting date for close entries is the last day of the period
4317        let close_date = end_date - chrono::Days::new(1);
4318
4319        // Statutory tax rate (21% — configurable rates come in later tiers)
4320        let tax_rate = Decimal::new(21, 2); // 0.21
4321
4322        // Collect company codes from config
4323        let company_codes: Vec<String> = self
4324            .config
4325            .companies
4326            .iter()
4327            .map(|c| c.code.clone())
4328            .collect();
4329
4330        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4331        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4332        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4333
4334        // --- Depreciation JEs (per asset) ---
4335        // Compute period depreciation for each active fixed asset using straight-line method.
4336        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4337        let period_months = self.config.global.period_months;
4338        for asset in &subledger.fa_records {
4339            // Skip assets that are inactive / fully depreciated / non-depreciable
4340            use datasynth_core::models::subledger::fa::AssetStatus;
4341            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4342                continue;
4343            }
4344            let useful_life_months = asset.useful_life_months();
4345            if useful_life_months == 0 {
4346                // Land or CIP — not depreciated
4347                continue;
4348            }
4349            let salvage_value = asset.salvage_value();
4350            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4351            if depreciable_base == Decimal::ZERO {
4352                continue;
4353            }
4354            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4355                * Decimal::from(period_months))
4356            .round_dp(2);
4357            if period_depr <= Decimal::ZERO {
4358                continue;
4359            }
4360
4361            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4362            depr_header.document_type = "CL".to_string();
4363            depr_header.header_text = Some(format!(
4364                "Depreciation - {} {}",
4365                asset.asset_number, asset.description
4366            ));
4367            depr_header.created_by = "CLOSE_ENGINE".to_string();
4368            depr_header.source = TransactionSource::Automated;
4369            depr_header.business_process = Some(BusinessProcess::R2R);
4370
4371            let doc_id = depr_header.document_id;
4372            let mut depr_je = JournalEntry::new(depr_header);
4373
4374            // DR Depreciation Expense (6000)
4375            depr_je.add_line(JournalEntryLine::debit(
4376                doc_id,
4377                1,
4378                expense_accounts::DEPRECIATION.to_string(),
4379                period_depr,
4380            ));
4381            // CR Accumulated Depreciation (1510)
4382            depr_je.add_line(JournalEntryLine::credit(
4383                doc_id,
4384                2,
4385                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4386                period_depr,
4387            ));
4388
4389            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4390            close_jes.push(depr_je);
4391        }
4392
4393        if !subledger.fa_records.is_empty() {
4394            debug!(
4395                "Generated {} depreciation JEs from {} FA records",
4396                close_jes.len(),
4397                subledger.fa_records.len()
4398            );
4399        }
4400
4401        // --- Accrual entries (standard period-end accruals per company) ---
4402        // Generate standard accrued expense entries (utilities, rent, interest) using
4403        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4404        {
4405            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4406            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4407            // v3.4.3: snap reversal dates to business days. No-op when
4408            // temporal_patterns.business_days is disabled.
4409            if let Some(ctx) = &self.temporal_context {
4410                accrual_gen.set_temporal_context(Arc::clone(ctx));
4411            }
4412
4413            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4414            let accrual_items: &[(&str, &str, &str)] = &[
4415                ("Accrued Utilities", "6200", "2100"),
4416                ("Accrued Rent", "6300", "2100"),
4417                ("Accrued Interest", "6100", "2150"),
4418            ];
4419
4420            for company_code in &company_codes {
4421                // Estimate company revenue from existing JEs
4422                let company_revenue: Decimal = entries
4423                    .iter()
4424                    .filter(|e| e.header.company_code == *company_code)
4425                    .flat_map(|e| e.lines.iter())
4426                    .filter(|l| l.gl_account.starts_with('4'))
4427                    .map(|l| l.credit_amount - l.debit_amount)
4428                    .fold(Decimal::ZERO, |acc, v| acc + v);
4429
4430                if company_revenue <= Decimal::ZERO {
4431                    continue;
4432                }
4433
4434                // Use 0.5% of period revenue per accrual item as a proxy
4435                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4436                if accrual_base <= Decimal::ZERO {
4437                    continue;
4438                }
4439
4440                for (description, expense_acct, liability_acct) in accrual_items {
4441                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4442                        company_code,
4443                        description,
4444                        accrual_base,
4445                        expense_acct,
4446                        liability_acct,
4447                        close_date,
4448                        None,
4449                    );
4450                    close_jes.push(accrual_je);
4451                    if let Some(rev_je) = reversal_je {
4452                        close_jes.push(rev_je);
4453                    }
4454                }
4455            }
4456
4457            debug!(
4458                "Generated accrual entries for {} companies",
4459                company_codes.len()
4460            );
4461        }
4462
4463        for company_code in &company_codes {
4464            // Calculate net income for this company from existing JEs:
4465            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4466            // Revenue (4xxx): credit-normal, so net = credits - debits
4467            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4468            let mut total_revenue = Decimal::ZERO;
4469            let mut total_expenses = Decimal::ZERO;
4470
4471            for entry in entries.iter() {
4472                if entry.header.company_code != *company_code {
4473                    continue;
4474                }
4475                for line in &entry.lines {
4476                    let category = AccountCategory::from_account(&line.gl_account);
4477                    match category {
4478                        AccountCategory::Revenue => {
4479                            // Revenue is credit-normal: net revenue = credits - debits
4480                            total_revenue += line.credit_amount - line.debit_amount;
4481                        }
4482                        AccountCategory::Cogs
4483                        | AccountCategory::OperatingExpense
4484                        | AccountCategory::OtherIncomeExpense
4485                        | AccountCategory::Tax => {
4486                            // Expenses are debit-normal: net expense = debits - credits
4487                            total_expenses += line.debit_amount - line.credit_amount;
4488                        }
4489                        _ => {}
4490                    }
4491                }
4492            }
4493
4494            let pre_tax_income = total_revenue - total_expenses;
4495
4496            // Skip if no income statement activity
4497            if pre_tax_income == Decimal::ZERO {
4498                debug!(
4499                    "Company {}: no pre-tax income, skipping period close",
4500                    company_code
4501                );
4502                continue;
4503            }
4504
4505            // --- Tax provision / DTA JE ---
4506            if pre_tax_income > Decimal::ZERO {
4507                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4508                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4509
4510                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4511                tax_header.document_type = "CL".to_string();
4512                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4513                tax_header.created_by = "CLOSE_ENGINE".to_string();
4514                tax_header.source = TransactionSource::Automated;
4515                tax_header.business_process = Some(BusinessProcess::R2R);
4516
4517                let doc_id = tax_header.document_id;
4518                let mut tax_je = JournalEntry::new(tax_header);
4519
4520                // DR Tax Expense (8000)
4521                tax_je.add_line(JournalEntryLine::debit(
4522                    doc_id,
4523                    1,
4524                    tax_accounts::TAX_EXPENSE.to_string(),
4525                    tax_amount,
4526                ));
4527                // CR Income Tax Payable (2130)
4528                tax_je.add_line(JournalEntryLine::credit(
4529                    doc_id,
4530                    2,
4531                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4532                    tax_amount,
4533                ));
4534
4535                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4536                close_jes.push(tax_je);
4537            } else {
4538                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4539                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4540                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4541                if dta_amount > Decimal::ZERO {
4542                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4543                    dta_header.document_type = "CL".to_string();
4544                    dta_header.header_text =
4545                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4546                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4547                    dta_header.source = TransactionSource::Automated;
4548                    dta_header.business_process = Some(BusinessProcess::R2R);
4549
4550                    let doc_id = dta_header.document_id;
4551                    let mut dta_je = JournalEntry::new(dta_header);
4552
4553                    // DR Deferred Tax Asset (1600)
4554                    dta_je.add_line(JournalEntryLine::debit(
4555                        doc_id,
4556                        1,
4557                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4558                        dta_amount,
4559                    ));
4560                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4561                    // reflecting the benefit of the future deductible temporary difference.
4562                    dta_je.add_line(JournalEntryLine::credit(
4563                        doc_id,
4564                        2,
4565                        tax_accounts::TAX_EXPENSE.to_string(),
4566                        dta_amount,
4567                    ));
4568
4569                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4570                    close_jes.push(dta_je);
4571                    debug!(
4572                        "Company {}: loss year — recognised DTA of {}",
4573                        company_code, dta_amount
4574                    );
4575                }
4576            }
4577
4578            // --- Dividend JEs (v2.4) ---
4579            // If the entity is profitable after tax, declare a 10% dividend payout.
4580            // This runs AFTER tax provision so the dividend is based on post-tax income
4581            // but BEFORE the retained earnings close so the RE transfer reflects the
4582            // reduced balance.
4583            let tax_provision = if pre_tax_income > Decimal::ZERO {
4584                (pre_tax_income * tax_rate).round_dp(2)
4585            } else {
4586                Decimal::ZERO
4587            };
4588            let net_income = pre_tax_income - tax_provision;
4589
4590            if net_income > Decimal::ZERO {
4591                use datasynth_generators::DividendGenerator;
4592                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4593                let mut div_gen = DividendGenerator::new(self.seed + 460);
4594                let currency_str = self
4595                    .config
4596                    .companies
4597                    .iter()
4598                    .find(|c| c.code == *company_code)
4599                    .map(|c| c.currency.as_str())
4600                    .unwrap_or("USD");
4601                let div_result = div_gen.generate(
4602                    company_code,
4603                    close_date,
4604                    Decimal::new(1, 0), // $1 per share placeholder
4605                    dividend_amount,
4606                    currency_str,
4607                );
4608                let div_je_count = div_result.journal_entries.len();
4609                close_jes.extend(div_result.journal_entries);
4610                debug!(
4611                    "Company {}: declared dividend of {} ({} JEs)",
4612                    company_code, dividend_amount, div_je_count
4613                );
4614            }
4615
4616            // --- Income statement closing JE ---
4617            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4618            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4619            // close the pre-tax loss into Retained Earnings as-is.
4620            if net_income != Decimal::ZERO {
4621                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4622                close_header.document_type = "CL".to_string();
4623                close_header.header_text =
4624                    Some(format!("Income statement close - {}", company_code));
4625                close_header.created_by = "CLOSE_ENGINE".to_string();
4626                close_header.source = TransactionSource::Automated;
4627                close_header.business_process = Some(BusinessProcess::R2R);
4628
4629                let doc_id = close_header.document_id;
4630                let mut close_je = JournalEntry::new(close_header);
4631
4632                let abs_net_income = net_income.abs();
4633
4634                if net_income > Decimal::ZERO {
4635                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4636                    close_je.add_line(JournalEntryLine::debit(
4637                        doc_id,
4638                        1,
4639                        equity_accounts::INCOME_SUMMARY.to_string(),
4640                        abs_net_income,
4641                    ));
4642                    close_je.add_line(JournalEntryLine::credit(
4643                        doc_id,
4644                        2,
4645                        equity_accounts::RETAINED_EARNINGS.to_string(),
4646                        abs_net_income,
4647                    ));
4648                } else {
4649                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4650                    close_je.add_line(JournalEntryLine::debit(
4651                        doc_id,
4652                        1,
4653                        equity_accounts::RETAINED_EARNINGS.to_string(),
4654                        abs_net_income,
4655                    ));
4656                    close_je.add_line(JournalEntryLine::credit(
4657                        doc_id,
4658                        2,
4659                        equity_accounts::INCOME_SUMMARY.to_string(),
4660                        abs_net_income,
4661                    ));
4662                }
4663
4664                debug_assert!(
4665                    close_je.is_balanced(),
4666                    "Income statement closing JE must be balanced"
4667                );
4668                close_jes.push(close_je);
4669            }
4670        }
4671
4672        let close_count = close_jes.len();
4673        if close_count > 0 {
4674            info!("Generated {} period-close journal entries", close_count);
4675            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4676            entries.extend(close_jes);
4677            stats.period_close_je_count = close_count;
4678
4679            // Update total entry/line-item stats
4680            stats.total_entries = entries.len() as u64;
4681            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4682        } else {
4683            debug!("No period-close entries generated (no income statement activity)");
4684        }
4685
4686        Ok(())
4687    }
4688
4689    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4690    fn phase_audit_data(
4691        &mut self,
4692        entries: &[JournalEntry],
4693        stats: &mut EnhancedGenerationStatistics,
4694    ) -> SynthResult<AuditSnapshot> {
4695        if self.phase_config.generate_audit {
4696            info!("Phase 8: Generating Audit Data");
4697            let audit_snapshot = self.generate_audit_data(entries)?;
4698            stats.audit_engagement_count = audit_snapshot.engagements.len();
4699            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4700            stats.audit_evidence_count = audit_snapshot.evidence.len();
4701            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4702            stats.audit_finding_count = audit_snapshot.findings.len();
4703            stats.audit_judgment_count = audit_snapshot.judgments.len();
4704            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4705            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4706            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4707            stats.audit_sample_count = audit_snapshot.samples.len();
4708            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4709            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4710            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4711            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4712            stats.audit_related_party_transaction_count =
4713                audit_snapshot.related_party_transactions.len();
4714            info!(
4715                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4716                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4717                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4718                 {} RP transactions",
4719                stats.audit_engagement_count,
4720                stats.audit_workpaper_count,
4721                stats.audit_evidence_count,
4722                stats.audit_risk_count,
4723                stats.audit_finding_count,
4724                stats.audit_judgment_count,
4725                stats.audit_confirmation_count,
4726                stats.audit_procedure_step_count,
4727                stats.audit_sample_count,
4728                stats.audit_analytical_result_count,
4729                stats.audit_ia_function_count,
4730                stats.audit_ia_report_count,
4731                stats.audit_related_party_count,
4732                stats.audit_related_party_transaction_count,
4733            );
4734            self.check_resources_with_log("post-audit")?;
4735            Ok(audit_snapshot)
4736        } else {
4737            debug!("Phase 8: Skipped (audit generation disabled)");
4738            Ok(AuditSnapshot::default())
4739        }
4740    }
4741
4742    /// Phase 9: Generate banking KYC/AML data.
4743    fn phase_banking_data(
4744        &mut self,
4745        stats: &mut EnhancedGenerationStatistics,
4746    ) -> SynthResult<BankingSnapshot> {
4747        if self.phase_config.generate_banking {
4748            info!("Phase 9: Generating Banking KYC/AML Data");
4749            let banking_snapshot = self.generate_banking_data()?;
4750            stats.banking_customer_count = banking_snapshot.customers.len();
4751            stats.banking_account_count = banking_snapshot.accounts.len();
4752            stats.banking_transaction_count = banking_snapshot.transactions.len();
4753            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4754            info!(
4755                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4756                stats.banking_customer_count, stats.banking_account_count,
4757                stats.banking_transaction_count, stats.banking_suspicious_count
4758            );
4759            self.check_resources_with_log("post-banking")?;
4760            Ok(banking_snapshot)
4761        } else {
4762            debug!("Phase 9: Skipped (banking generation disabled)");
4763            Ok(BankingSnapshot::default())
4764        }
4765    }
4766
4767    /// Phase 10: Export accounting network graphs for ML training.
4768    fn phase_graph_export(
4769        &mut self,
4770        entries: &[JournalEntry],
4771        coa: &Arc<ChartOfAccounts>,
4772        stats: &mut EnhancedGenerationStatistics,
4773    ) -> SynthResult<GraphExportSnapshot> {
4774        if self.phase_config.generate_graph_export && !entries.is_empty() {
4775            info!("Phase 10: Exporting Accounting Network Graphs");
4776            match self.export_graphs(entries, coa, stats) {
4777                Ok(snapshot) => {
4778                    info!(
4779                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4780                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4781                    );
4782                    Ok(snapshot)
4783                }
4784                Err(e) => {
4785                    warn!("Phase 10: Graph export failed: {}", e);
4786                    Ok(GraphExportSnapshot::default())
4787                }
4788            }
4789        } else {
4790            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4791            Ok(GraphExportSnapshot::default())
4792        }
4793    }
4794
4795    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4796    #[allow(clippy::too_many_arguments)]
4797    fn phase_hypergraph_export(
4798        &self,
4799        coa: &Arc<ChartOfAccounts>,
4800        entries: &[JournalEntry],
4801        document_flows: &DocumentFlowSnapshot,
4802        sourcing: &SourcingSnapshot,
4803        hr: &HrSnapshot,
4804        manufacturing: &ManufacturingSnapshot,
4805        banking: &BankingSnapshot,
4806        audit: &AuditSnapshot,
4807        financial_reporting: &FinancialReportingSnapshot,
4808        ocpm: &OcpmSnapshot,
4809        compliance: &ComplianceRegulationsSnapshot,
4810        stats: &mut EnhancedGenerationStatistics,
4811    ) -> SynthResult<()> {
4812        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4813            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4814            match self.export_hypergraph(
4815                coa,
4816                entries,
4817                document_flows,
4818                sourcing,
4819                hr,
4820                manufacturing,
4821                banking,
4822                audit,
4823                financial_reporting,
4824                ocpm,
4825                compliance,
4826                stats,
4827            ) {
4828                Ok(info) => {
4829                    info!(
4830                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4831                        info.node_count, info.edge_count, info.hyperedge_count
4832                    );
4833                }
4834                Err(e) => {
4835                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4836                }
4837            }
4838        } else {
4839            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4840        }
4841        Ok(())
4842    }
4843
4844    /// Phase 11: LLM Enrichment.
4845    ///
4846    /// Uses an LLM provider (mock by default) to enrich vendor names with
4847    /// realistic, context-aware names. This phase is non-blocking: failures
4848    /// log a warning but do not stop the generation pipeline.
4849    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4850        if !self.config.llm.enabled {
4851            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4852            return;
4853        }
4854
4855        info!("Phase 11: Starting LLM Enrichment");
4856        let start = std::time::Instant::now();
4857
4858        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4859            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4860            // and the corresponding API key environment variable is present.
4861            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4862                let schema_provider = &self.config.llm.provider;
4863                let api_key_env = match schema_provider.as_str() {
4864                    "openai" => Some("OPENAI_API_KEY"),
4865                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4866                    "custom" => Some("LLM_API_KEY"),
4867                    _ => None,
4868                };
4869                if let Some(key_env) = api_key_env {
4870                    if std::env::var(key_env).is_ok() {
4871                        let llm_config = datasynth_core::llm::LlmConfig {
4872                            model: self.config.llm.model.clone(),
4873                            api_key_env: key_env.to_string(),
4874                            ..datasynth_core::llm::LlmConfig::default()
4875                        };
4876                        match HttpLlmProvider::new(llm_config) {
4877                            Ok(p) => Arc::new(p),
4878                            Err(e) => {
4879                                warn!(
4880                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4881                                    e
4882                                );
4883                                Arc::new(MockLlmProvider::new(self.seed))
4884                            }
4885                        }
4886                    } else {
4887                        Arc::new(MockLlmProvider::new(self.seed))
4888                    }
4889                } else {
4890                    Arc::new(MockLlmProvider::new(self.seed))
4891                }
4892            };
4893            // v4.1.1+: multi-category enrichment. Vendors remain the
4894            // default path; customers and materials opt in via
4895            // `llm.enrich_customers` / `llm.enrich_materials` flags.
4896            let industry = format!("{:?}", self.config.global.industry);
4897
4898            let vendor_enricher =
4899                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
4900            let max_vendors = self
4901                .config
4902                .llm
4903                .max_vendor_enrichments
4904                .min(self.master_data.vendors.len());
4905            let mut vendors_enriched = 0usize;
4906            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
4907                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4908                    Ok(name) => {
4909                        vendor.name = name;
4910                        vendors_enriched += 1;
4911                    }
4912                    Err(e) => warn!(
4913                        "LLM vendor enrichment failed for {}: {}",
4914                        vendor.vendor_id, e
4915                    ),
4916                }
4917            }
4918
4919            let mut customers_enriched = 0usize;
4920            if self.config.llm.enrich_customers {
4921                let customer_enricher =
4922                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
4923                        &provider,
4924                    ));
4925                let max_customers = self
4926                    .config
4927                    .llm
4928                    .max_customer_enrichments
4929                    .min(self.master_data.customers.len());
4930                for customer in self.master_data.customers.iter_mut().take(max_customers) {
4931                    match customer_enricher.enrich_customer_name(
4932                        &industry,
4933                        "general",
4934                        &customer.country,
4935                    ) {
4936                        Ok(name) => {
4937                            customer.name = name;
4938                            customers_enriched += 1;
4939                        }
4940                        Err(e) => warn!(
4941                            "LLM customer enrichment failed for {}: {}",
4942                            customer.customer_id, e
4943                        ),
4944                    }
4945                }
4946            }
4947
4948            let mut materials_enriched = 0usize;
4949            if self.config.llm.enrich_materials {
4950                let material_enricher =
4951                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
4952                        &provider,
4953                    ));
4954                let max_materials = self
4955                    .config
4956                    .llm
4957                    .max_material_enrichments
4958                    .min(self.master_data.materials.len());
4959                for material in self.master_data.materials.iter_mut().take(max_materials) {
4960                    let material_type = format!("{:?}", material.material_type);
4961                    match material_enricher.enrich_material_description(&material_type, &industry) {
4962                        Ok(desc) => {
4963                            material.description = desc;
4964                            materials_enriched += 1;
4965                        }
4966                        Err(e) => warn!(
4967                            "LLM material enrichment failed for {}: {}",
4968                            material.material_id, e
4969                        ),
4970                    }
4971                }
4972            }
4973
4974            (vendors_enriched, customers_enriched, materials_enriched)
4975        }));
4976
4977        match result {
4978            Ok((v, c, m)) => {
4979                stats.llm_vendors_enriched = v;
4980                stats.llm_customers_enriched = c;
4981                stats.llm_materials_enriched = m;
4982                let elapsed = start.elapsed();
4983                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4984                info!(
4985                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
4986                    v, c, m, stats.llm_enrichment_ms
4987                );
4988            }
4989            Err(_) => {
4990                let elapsed = start.elapsed();
4991                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4992                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4993            }
4994        }
4995    }
4996
4997    /// Phase 12: Diffusion Enhancement.
4998    ///
4999    /// Generates a sample set matching distribution properties from the
5000    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5001    /// - `"statistical"` (default) — moment-matching backend, always fast.
5002    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5003    ///   the `neural` Cargo feature; falls back to statistical when the
5004    ///   feature isn't compiled in, with a loud warning.
5005    ///
5006    /// This phase is non-blocking: failures log a warning but do not
5007    /// stop the pipeline.
5008    fn phase_diffusion_enhancement(
5009        &self,
5010        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5011        stats: &mut EnhancedGenerationStatistics,
5012    ) {
5013        if !self.config.diffusion.enabled {
5014            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5015            return;
5016        }
5017
5018        info!("Phase 12: Starting Diffusion Enhancement");
5019        let start = std::time::Instant::now();
5020
5021        let backend_choice = self.config.diffusion.backend.as_str();
5022        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5023
5024        if use_neural {
5025            #[cfg(feature = "neural")]
5026            {
5027                match self.run_neural_diffusion_phase(entries) {
5028                    Ok(sample_count) => {
5029                        stats.diffusion_samples_generated = sample_count;
5030                        let elapsed = start.elapsed();
5031                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5032                        info!(
5033                            "Phase 12 complete ({}): {} samples in {}ms",
5034                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5035                        );
5036                        return;
5037                    }
5038                    Err(e) => {
5039                        warn!(
5040                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5041                        );
5042                        // Fall through to statistical path below.
5043                    }
5044                }
5045            }
5046            #[cfg(not(feature = "neural"))]
5047            {
5048                warn!(
5049                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5050                     not compiled in — falling back to statistical. Rebuild with \
5051                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5052                    backend_choice
5053                );
5054            }
5055        } else if !matches!(backend_choice, "statistical" | "") {
5056            warn!(
5057                "Phase 12: unknown backend '{}', falling back to statistical",
5058                backend_choice
5059            );
5060        }
5061
5062        // Statistical path (default + fallback).
5063        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5064            let means = vec![5000.0, 3.0, 2.0];
5065            let stds = vec![2000.0, 1.5, 1.0];
5066
5067            let diffusion_config = DiffusionConfig {
5068                n_steps: self.config.diffusion.n_steps,
5069                seed: self.seed,
5070                ..Default::default()
5071            };
5072
5073            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5074            let n_samples = self.config.diffusion.sample_size;
5075            let n_features = 3;
5076            backend.generate(n_samples, n_features, self.seed).len()
5077        }));
5078
5079        match result {
5080            Ok(sample_count) => {
5081                stats.diffusion_samples_generated = sample_count;
5082                let elapsed = start.elapsed();
5083                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5084                info!(
5085                    "Phase 12 complete (statistical): {} samples in {}ms",
5086                    sample_count, stats.diffusion_enhancement_ms
5087                );
5088            }
5089            Err(_) => {
5090                let elapsed = start.elapsed();
5091                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5092                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5093            }
5094        }
5095    }
5096
5097    /// Neural-backend execution — either load a pre-trained checkpoint
5098    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5099    /// from the first batch of JE amounts. Returns the sample count
5100    /// produced; any error bubbles up to the statistical fallback.
5101    #[cfg(feature = "neural")]
5102    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5103        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5104
5105        if entries.is_empty() {
5106            return Err(SynthError::generation(
5107                "neural diffusion: no journal entries available as training data",
5108            ));
5109        }
5110
5111        let training_data: Vec<Vec<f64>> = entries
5112            .iter()
5113            .take(5000)
5114            .map(|je| {
5115                let total_amount: f64 = je
5116                    .lines
5117                    .iter()
5118                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5119                    .map(|l| {
5120                        use rust_decimal::prelude::ToPrimitive;
5121                        l.debit_amount.to_f64().unwrap_or(0.0)
5122                    })
5123                    .sum();
5124                let line_count = je.lines.len() as f64;
5125                // Use the approval-workflow depth as the third feature
5126                // (proxy for complexity / risk). `None` → 1.
5127                let approval_level = je
5128                    .header
5129                    .approval_workflow
5130                    .as_ref()
5131                    .map(|w| w.required_levels as f64)
5132                    .unwrap_or(1.0);
5133                vec![total_amount, line_count, approval_level]
5134            })
5135            .collect();
5136
5137        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5138
5139        let cfg = &self.config.diffusion;
5140        let neural_cfg = &cfg.neural;
5141
5142        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5143            neural_cfg.checkpoint_path.as_ref()
5144        {
5145            let path = std::path::Path::new(ckpt_path);
5146            info!(
5147                "  Neural diffusion: loading checkpoint from {}",
5148                path.display()
5149            );
5150            NeuralDiffusionBackend::load(path)
5151                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5152        } else {
5153            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5154            info!(
5155                "  Neural diffusion: training score network on {} rows × {} features, \
5156                     {} epochs, hidden_dims={:?}",
5157                training_data.len(),
5158                n_features,
5159                neural_cfg.training_epochs,
5160                neural_cfg.hidden_dims
5161            );
5162            let training_config = NeuralTrainingConfig {
5163                n_steps: cfg.n_steps,
5164                schedule: cfg.schedule.clone(),
5165                hidden_dims: neural_cfg.hidden_dims.clone(),
5166                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5167                learning_rate: neural_cfg.learning_rate,
5168                epochs: neural_cfg.training_epochs,
5169                batch_size: neural_cfg.batch_size,
5170            };
5171            let (backend, report) =
5172                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5173                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5174            info!(
5175                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5176                report.epochs_completed, report.final_loss
5177            );
5178            backend
5179        };
5180
5181        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5182        Ok(samples.len())
5183    }
5184
5185    /// Phase 13: Causal Overlay.
5186    ///
5187    /// Builds a structural causal model from a built-in template (e.g.,
5188    /// fraud_detection) and generates causal samples. Optionally validates
5189    /// that the output respects the causal structure. This phase is
5190    /// non-blocking: failures log a warning but do not stop the pipeline.
5191    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5192        if !self.config.causal.enabled {
5193            debug!("Phase 13: Skipped (causal generation disabled)");
5194            return;
5195        }
5196
5197        info!("Phase 13: Starting Causal Overlay");
5198        let start = std::time::Instant::now();
5199
5200        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5201            // Select template based on config
5202            let graph = match self.config.causal.template.as_str() {
5203                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5204                _ => CausalGraph::fraud_detection_template(),
5205            };
5206
5207            let scm = StructuralCausalModel::new(graph.clone())
5208                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5209
5210            let n_samples = self.config.causal.sample_size;
5211            let samples = scm
5212                .generate(n_samples, self.seed)
5213                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5214
5215            // Optionally validate causal structure
5216            let validation_passed = if self.config.causal.validate {
5217                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5218                if report.valid {
5219                    info!(
5220                        "Causal validation passed: all {} checks OK",
5221                        report.checks.len()
5222                    );
5223                } else {
5224                    warn!(
5225                        "Causal validation: {} violations detected: {:?}",
5226                        report.violations.len(),
5227                        report.violations
5228                    );
5229                }
5230                Some(report.valid)
5231            } else {
5232                None
5233            };
5234
5235            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5236        }));
5237
5238        match result {
5239            Ok(Ok((sample_count, validation_passed))) => {
5240                stats.causal_samples_generated = sample_count;
5241                stats.causal_validation_passed = validation_passed;
5242                let elapsed = start.elapsed();
5243                stats.causal_generation_ms = elapsed.as_millis() as u64;
5244                info!(
5245                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5246                    sample_count, stats.causal_generation_ms, validation_passed,
5247                );
5248            }
5249            Ok(Err(e)) => {
5250                let elapsed = start.elapsed();
5251                stats.causal_generation_ms = elapsed.as_millis() as u64;
5252                warn!("Phase 13: Causal generation failed: {}", e);
5253            }
5254            Err(_) => {
5255                let elapsed = start.elapsed();
5256                stats.causal_generation_ms = elapsed.as_millis() as u64;
5257                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5258            }
5259        }
5260    }
5261
5262    /// Phase 14: Generate S2C sourcing data.
5263    fn phase_sourcing_data(
5264        &mut self,
5265        stats: &mut EnhancedGenerationStatistics,
5266    ) -> SynthResult<SourcingSnapshot> {
5267        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5268            debug!("Phase 14: Skipped (sourcing generation disabled)");
5269            return Ok(SourcingSnapshot::default());
5270        }
5271        let degradation = self.check_resources()?;
5272        if degradation >= DegradationLevel::Reduced {
5273            debug!(
5274                "Phase skipped due to resource pressure (degradation: {:?})",
5275                degradation
5276            );
5277            return Ok(SourcingSnapshot::default());
5278        }
5279
5280        info!("Phase 14: Generating S2C Sourcing Data");
5281        let seed = self.seed;
5282
5283        // Gather vendor data from master data
5284        let vendor_ids: Vec<String> = self
5285            .master_data
5286            .vendors
5287            .iter()
5288            .map(|v| v.vendor_id.clone())
5289            .collect();
5290        if vendor_ids.is_empty() {
5291            debug!("Phase 14: Skipped (no vendors available)");
5292            return Ok(SourcingSnapshot::default());
5293        }
5294
5295        let categories: Vec<(String, String)> = vec![
5296            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5297            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5298            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5299            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5300            ("CAT-LOG".to_string(), "Logistics".to_string()),
5301        ];
5302        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5303            .iter()
5304            .map(|(id, name)| {
5305                (
5306                    id.clone(),
5307                    name.clone(),
5308                    rust_decimal::Decimal::from(100_000),
5309                )
5310            })
5311            .collect();
5312
5313        let company_code = self
5314            .config
5315            .companies
5316            .first()
5317            .map(|c| c.code.as_str())
5318            .unwrap_or("1000");
5319        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5320            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5321        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5322        let fiscal_year = start_date.year() as u16;
5323        let owner_ids: Vec<String> = self
5324            .master_data
5325            .employees
5326            .iter()
5327            .take(5)
5328            .map(|e| e.employee_id.clone())
5329            .collect();
5330        let owner_id = owner_ids
5331            .first()
5332            .map(std::string::String::as_str)
5333            .unwrap_or("BUYER-001");
5334
5335        // Step 1: Spend Analysis
5336        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5337        let spend_analyses =
5338            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5339
5340        // Step 2: Sourcing Projects
5341        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5342        let sourcing_projects = if owner_ids.is_empty() {
5343            Vec::new()
5344        } else {
5345            project_gen.generate(
5346                company_code,
5347                &categories_with_spend,
5348                &owner_ids,
5349                start_date,
5350                self.config.global.period_months,
5351            )
5352        };
5353        stats.sourcing_project_count = sourcing_projects.len();
5354
5355        // Step 3: Qualifications
5356        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5357        let mut qual_gen = QualificationGenerator::new(seed + 2);
5358        let qualifications = qual_gen.generate(
5359            company_code,
5360            &qual_vendor_ids,
5361            sourcing_projects.first().map(|p| p.project_id.as_str()),
5362            owner_id,
5363            start_date,
5364        );
5365
5366        // Step 4: RFx Events
5367        let mut rfx_gen = RfxGenerator::new(seed + 3);
5368        let rfx_events: Vec<RfxEvent> = sourcing_projects
5369            .iter()
5370            .map(|proj| {
5371                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5372                rfx_gen.generate(
5373                    company_code,
5374                    &proj.project_id,
5375                    &proj.category_id,
5376                    &qualified_vids,
5377                    owner_id,
5378                    start_date,
5379                    50000.0,
5380                )
5381            })
5382            .collect();
5383        stats.rfx_event_count = rfx_events.len();
5384
5385        // Step 5: Bids
5386        let mut bid_gen = BidGenerator::new(seed + 4);
5387        let mut all_bids = Vec::new();
5388        for rfx in &rfx_events {
5389            let bidder_count = vendor_ids.len().clamp(2, 5);
5390            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5391            let bids = bid_gen.generate(rfx, &responding, start_date);
5392            all_bids.extend(bids);
5393        }
5394        stats.bid_count = all_bids.len();
5395
5396        // Step 6: Bid Evaluations
5397        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5398        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5399            .iter()
5400            .map(|rfx| {
5401                let rfx_bids: Vec<SupplierBid> = all_bids
5402                    .iter()
5403                    .filter(|b| b.rfx_id == rfx.rfx_id)
5404                    .cloned()
5405                    .collect();
5406                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5407            })
5408            .collect();
5409
5410        // Step 7: Contracts from winning bids
5411        let mut contract_gen = ContractGenerator::new(seed + 6);
5412        let contracts: Vec<ProcurementContract> = bid_evaluations
5413            .iter()
5414            .zip(rfx_events.iter())
5415            .filter_map(|(eval, rfx)| {
5416                eval.ranked_bids.first().and_then(|winner| {
5417                    all_bids
5418                        .iter()
5419                        .find(|b| b.bid_id == winner.bid_id)
5420                        .map(|winning_bid| {
5421                            contract_gen.generate_from_bid(
5422                                winning_bid,
5423                                Some(&rfx.sourcing_project_id),
5424                                &rfx.category_id,
5425                                owner_id,
5426                                start_date,
5427                            )
5428                        })
5429                })
5430            })
5431            .collect();
5432        stats.contract_count = contracts.len();
5433
5434        // Step 8: Catalog Items
5435        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5436        let catalog_items = catalog_gen.generate(&contracts);
5437        stats.catalog_item_count = catalog_items.len();
5438
5439        // Step 9: Scorecards
5440        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5441        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5442            .iter()
5443            .fold(
5444                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5445                |mut acc, c| {
5446                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5447                    acc
5448                },
5449            )
5450            .into_iter()
5451            .collect();
5452        let scorecards = scorecard_gen.generate(
5453            company_code,
5454            &vendor_contracts,
5455            start_date,
5456            end_date,
5457            owner_id,
5458        );
5459        stats.scorecard_count = scorecards.len();
5460
5461        // Back-populate cross-references on sourcing projects (Task 35)
5462        // Link each project to its RFx events, contracts, and spend analyses
5463        let mut sourcing_projects = sourcing_projects;
5464        for project in &mut sourcing_projects {
5465            // Link RFx events generated for this project
5466            project.rfx_ids = rfx_events
5467                .iter()
5468                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5469                .map(|rfx| rfx.rfx_id.clone())
5470                .collect();
5471
5472            // Link contract awarded from this project's RFx
5473            project.contract_id = contracts
5474                .iter()
5475                .find(|c| {
5476                    c.sourcing_project_id
5477                        .as_deref()
5478                        .is_some_and(|sp| sp == project.project_id)
5479                })
5480                .map(|c| c.contract_id.clone());
5481
5482            // Link spend analysis for matching category (use category_id as the reference)
5483            project.spend_analysis_id = spend_analyses
5484                .iter()
5485                .find(|sa| sa.category_id == project.category_id)
5486                .map(|sa| sa.category_id.clone());
5487        }
5488
5489        info!(
5490            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5491            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5492            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5493        );
5494        self.check_resources_with_log("post-sourcing")?;
5495
5496        Ok(SourcingSnapshot {
5497            spend_analyses,
5498            sourcing_projects,
5499            qualifications,
5500            rfx_events,
5501            bids: all_bids,
5502            bid_evaluations,
5503            contracts,
5504            catalog_items,
5505            scorecards,
5506        })
5507    }
5508
5509    /// Build a [`GroupStructure`] from the current company configuration.
5510    ///
5511    /// The first company in the configuration is treated as the ultimate parent.
5512    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5513    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5514    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5515        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5516
5517        let parent_code = self
5518            .config
5519            .companies
5520            .first()
5521            .map(|c| c.code.clone())
5522            .unwrap_or_else(|| "PARENT".to_string());
5523
5524        let mut group = GroupStructure::new(parent_code);
5525
5526        for company in self.config.companies.iter().skip(1) {
5527            let sub =
5528                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5529            group.add_subsidiary(sub);
5530        }
5531
5532        group
5533    }
5534
5535    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5536    fn phase_intercompany(
5537        &mut self,
5538        journal_entries: &[JournalEntry],
5539        stats: &mut EnhancedGenerationStatistics,
5540    ) -> SynthResult<IntercompanySnapshot> {
5541        // Skip if intercompany is disabled in config
5542        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5543            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5544            return Ok(IntercompanySnapshot::default());
5545        }
5546
5547        // Intercompany requires at least 2 companies
5548        if self.config.companies.len() < 2 {
5549            debug!(
5550                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5551                self.config.companies.len()
5552            );
5553            return Ok(IntercompanySnapshot::default());
5554        }
5555
5556        info!("Phase 14b: Generating Intercompany Transactions");
5557
5558        // Build the group structure early — used by ISA 600 component auditor scope
5559        // and consolidated financial statement generators downstream.
5560        let group_structure = self.build_group_structure();
5561        debug!(
5562            "Group structure built: parent={}, subsidiaries={}",
5563            group_structure.parent_entity,
5564            group_structure.subsidiaries.len()
5565        );
5566
5567        let seed = self.seed;
5568        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5569            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5570        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5571
5572        // Build ownership structure from company configs
5573        // First company is treated as the parent, remaining are subsidiaries
5574        let parent_code = self.config.companies[0].code.clone();
5575        let mut ownership_structure =
5576            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5577
5578        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5579            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5580                format!("REL{:03}", i + 1),
5581                parent_code.clone(),
5582                company.code.clone(),
5583                rust_decimal::Decimal::from(100), // Default 100% ownership
5584                start_date,
5585            );
5586            ownership_structure.add_relationship(relationship);
5587        }
5588
5589        // Convert config transfer pricing method to core model enum
5590        let tp_method = match self.config.intercompany.transfer_pricing_method {
5591            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5592                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5593            }
5594            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5595                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5596            }
5597            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5598                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5599            }
5600            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5601                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5602            }
5603            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5604                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5605            }
5606        };
5607
5608        // Build IC generator config from schema config
5609        let ic_currency = self
5610            .config
5611            .companies
5612            .first()
5613            .map(|c| c.currency.clone())
5614            .unwrap_or_else(|| "USD".to_string());
5615        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5616            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5617            transfer_pricing_method: tp_method,
5618            markup_percent: rust_decimal::Decimal::from_f64_retain(
5619                self.config.intercompany.markup_percent,
5620            )
5621            .unwrap_or(rust_decimal::Decimal::from(5)),
5622            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5623            default_currency: ic_currency,
5624            ..Default::default()
5625        };
5626
5627        // Create IC generator
5628        let mut ic_generator = datasynth_generators::ICGenerator::new(
5629            ic_gen_config,
5630            ownership_structure.clone(),
5631            seed + 50,
5632        );
5633
5634        // Generate IC transactions for the period
5635        // Use ~3 transactions per day as a reasonable default
5636        let transactions_per_day = 3;
5637        let matched_pairs = ic_generator.generate_transactions_for_period(
5638            start_date,
5639            end_date,
5640            transactions_per_day,
5641        );
5642
5643        // Generate IC source P2P/O2C documents
5644        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5645        debug!(
5646            "Generated {} IC seller invoices, {} IC buyer POs",
5647            ic_doc_chains.seller_invoices.len(),
5648            ic_doc_chains.buyer_orders.len()
5649        );
5650
5651        // Generate journal entries from matched pairs
5652        let mut seller_entries = Vec::new();
5653        let mut buyer_entries = Vec::new();
5654        let fiscal_year = start_date.year();
5655
5656        for pair in &matched_pairs {
5657            let fiscal_period = pair.posting_date.month();
5658            let (seller_je, buyer_je) =
5659                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5660            seller_entries.push(seller_je);
5661            buyer_entries.push(buyer_je);
5662        }
5663
5664        // Run matching engine
5665        let matching_config = datasynth_generators::ICMatchingConfig {
5666            base_currency: self
5667                .config
5668                .companies
5669                .first()
5670                .map(|c| c.currency.clone())
5671                .unwrap_or_else(|| "USD".to_string()),
5672            ..Default::default()
5673        };
5674        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5675        matching_engine.load_matched_pairs(&matched_pairs);
5676        let matching_result = matching_engine.run_matching(end_date);
5677
5678        // Generate elimination entries if configured
5679        let mut elimination_entries = Vec::new();
5680        if self.config.intercompany.generate_eliminations {
5681            let elim_config = datasynth_generators::EliminationConfig {
5682                consolidation_entity: "GROUP".to_string(),
5683                base_currency: self
5684                    .config
5685                    .companies
5686                    .first()
5687                    .map(|c| c.currency.clone())
5688                    .unwrap_or_else(|| "USD".to_string()),
5689                ..Default::default()
5690            };
5691
5692            let mut elim_generator =
5693                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5694
5695            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5696            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5697                matching_result
5698                    .matched_balances
5699                    .iter()
5700                    .chain(matching_result.unmatched_balances.iter())
5701                    .cloned()
5702                    .collect();
5703
5704            // Build investment and equity maps from the group structure so that the
5705            // elimination generator can produce equity-investment elimination entries
5706            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5707            //
5708            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5709            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5710            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5711            //
5712            // Net assets are derived from the journal entries using account-range heuristics:
5713            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5714            // no JE data is available (IC phase runs early in the generation pipeline).
5715            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5716                std::collections::HashMap::new();
5717            let mut equity_amounts: std::collections::HashMap<
5718                String,
5719                std::collections::HashMap<String, rust_decimal::Decimal>,
5720            > = std::collections::HashMap::new();
5721            {
5722                use rust_decimal::Decimal;
5723                let hundred = Decimal::from(100u32);
5724                let ten_pct = Decimal::new(10, 2); // 0.10
5725                let thirty_pct = Decimal::new(30, 2); // 0.30
5726                let sixty_pct = Decimal::new(60, 2); // 0.60
5727                let parent_code = &group_structure.parent_entity;
5728                for sub in &group_structure.subsidiaries {
5729                    let net_assets = {
5730                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5731                        if na > Decimal::ZERO {
5732                            na
5733                        } else {
5734                            Decimal::from(1_000_000u64)
5735                        }
5736                    };
5737                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5738                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5739                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5740
5741                    // Split subsidiary equity into conventional components:
5742                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5743                    let mut eq_map = std::collections::HashMap::new();
5744                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5745                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5746                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5747                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5748                }
5749            }
5750
5751            let journal = elim_generator.generate_eliminations(
5752                &fiscal_period,
5753                end_date,
5754                &all_balances,
5755                &matched_pairs,
5756                &investment_amounts,
5757                &equity_amounts,
5758            );
5759
5760            elimination_entries = journal.entries.clone();
5761        }
5762
5763        let matched_pair_count = matched_pairs.len();
5764        let elimination_entry_count = elimination_entries.len();
5765        let match_rate = matching_result.match_rate;
5766
5767        stats.ic_matched_pair_count = matched_pair_count;
5768        stats.ic_elimination_count = elimination_entry_count;
5769        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5770
5771        info!(
5772            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5773            matched_pair_count,
5774            stats.ic_transaction_count,
5775            seller_entries.len(),
5776            buyer_entries.len(),
5777            elimination_entry_count,
5778            match_rate * 100.0
5779        );
5780        self.check_resources_with_log("post-intercompany")?;
5781
5782        // ----------------------------------------------------------------
5783        // NCI measurements: derive from group structure ownership percentages
5784        // ----------------------------------------------------------------
5785        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5786            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5787            use rust_decimal::Decimal;
5788
5789            let eight_pct = Decimal::new(8, 2); // 0.08
5790
5791            group_structure
5792                .subsidiaries
5793                .iter()
5794                .filter(|sub| {
5795                    sub.nci_percentage > Decimal::ZERO
5796                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5797                })
5798                .map(|sub| {
5799                    // Compute net assets from actual journal entries for this subsidiary.
5800                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5801                    // IC phase runs before the main JE batch has been populated).
5802                    let net_assets_from_jes =
5803                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5804
5805                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5806                        net_assets_from_jes.round_dp(2)
5807                    } else {
5808                        // Fallback: use a plausible base amount
5809                        Decimal::from(1_000_000u64)
5810                    };
5811
5812                    // Net income approximated as 8% of net assets
5813                    let net_income = (net_assets * eight_pct).round_dp(2);
5814
5815                    NciMeasurement::compute(
5816                        sub.entity_code.clone(),
5817                        sub.nci_percentage,
5818                        net_assets,
5819                        net_income,
5820                    )
5821                })
5822                .collect()
5823        };
5824
5825        if !nci_measurements.is_empty() {
5826            info!(
5827                "NCI measurements: {} subsidiaries with non-controlling interests",
5828                nci_measurements.len()
5829            );
5830        }
5831
5832        Ok(IntercompanySnapshot {
5833            group_structure: Some(group_structure),
5834            matched_pairs,
5835            seller_journal_entries: seller_entries,
5836            buyer_journal_entries: buyer_entries,
5837            elimination_entries,
5838            nci_measurements,
5839            ic_document_chains: Some(ic_doc_chains),
5840            matched_pair_count,
5841            elimination_entry_count,
5842            match_rate,
5843        })
5844    }
5845
5846    /// Phase 15: Generate bank reconciliations and financial statements.
5847    fn phase_financial_reporting(
5848        &mut self,
5849        document_flows: &DocumentFlowSnapshot,
5850        journal_entries: &[JournalEntry],
5851        coa: &Arc<ChartOfAccounts>,
5852        _hr: &HrSnapshot,
5853        _audit: &AuditSnapshot,
5854        stats: &mut EnhancedGenerationStatistics,
5855    ) -> SynthResult<FinancialReportingSnapshot> {
5856        let fs_enabled = self.phase_config.generate_financial_statements
5857            || self.config.financial_reporting.enabled;
5858        let br_enabled = self.phase_config.generate_bank_reconciliation;
5859
5860        if !fs_enabled && !br_enabled {
5861            debug!("Phase 15: Skipped (financial reporting disabled)");
5862            return Ok(FinancialReportingSnapshot::default());
5863        }
5864
5865        info!("Phase 15: Generating Financial Reporting Data");
5866
5867        let seed = self.seed;
5868        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5869            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5870
5871        let mut financial_statements = Vec::new();
5872        let mut bank_reconciliations = Vec::new();
5873        let mut trial_balances = Vec::new();
5874        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5875        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5876            Vec::new();
5877        // Standalone statements keyed by entity code
5878        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5879            std::collections::HashMap::new();
5880        // Consolidated statements (one per period)
5881        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5882        // Consolidation schedules (one per period)
5883        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5884
5885        // Generate financial statements from JE-derived trial balances.
5886        //
5887        // When journal entries are available, we use cumulative trial balances for
5888        // balance sheet accounts and current-period trial balances for income
5889        // statement accounts. We also track prior-period trial balances so the
5890        // generator can produce comparative amounts, and we build a proper
5891        // cash flow statement from working capital changes rather than random data.
5892        if fs_enabled {
5893            let has_journal_entries = !journal_entries.is_empty();
5894
5895            // Use FinancialStatementGenerator for balance sheet and income statement,
5896            // but build cash flow ourselves from TB data when JEs are available.
5897            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5898            // Separate generator for consolidated statements (different seed offset)
5899            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5900
5901            // Collect elimination JEs once (reused across periods)
5902            let elimination_entries: Vec<&JournalEntry> = journal_entries
5903                .iter()
5904                .filter(|je| je.header.is_elimination)
5905                .collect();
5906
5907            // Generate one set of statements per period, per entity
5908            for period in 0..self.config.global.period_months {
5909                let period_start = start_date + chrono::Months::new(period);
5910                let period_end =
5911                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5912                let fiscal_year = period_end.year() as u16;
5913                let fiscal_period = period_end.month() as u8;
5914                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5915
5916                // Build per-entity trial balances for this period (non-elimination JEs)
5917                // We accumulate them for the consolidation step.
5918                let mut entity_tb_map: std::collections::HashMap<
5919                    String,
5920                    std::collections::HashMap<String, rust_decimal::Decimal>,
5921                > = std::collections::HashMap::new();
5922
5923                // --- Standalone: one set of statements per company ---
5924                for (company_idx, company) in self.config.companies.iter().enumerate() {
5925                    let company_code = company.code.as_str();
5926                    let currency = company.currency.as_str();
5927                    // Use a unique seed offset per company to keep statements deterministic
5928                    // and distinct across companies
5929                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5930                    let mut company_fs_gen =
5931                        FinancialStatementGenerator::new(seed + company_seed_offset);
5932
5933                    if has_journal_entries {
5934                        let tb_entries = Self::build_cumulative_trial_balance(
5935                            journal_entries,
5936                            coa,
5937                            company_code,
5938                            start_date,
5939                            period_end,
5940                            fiscal_year,
5941                            fiscal_period,
5942                        );
5943
5944                        // Accumulate per-entity category balances for consolidation
5945                        let entity_cat_map =
5946                            entity_tb_map.entry(company_code.to_string()).or_default();
5947                        for tb_entry in &tb_entries {
5948                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5949                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5950                        }
5951
5952                        let stmts = company_fs_gen.generate(
5953                            company_code,
5954                            currency,
5955                            &tb_entries,
5956                            period_start,
5957                            period_end,
5958                            fiscal_year,
5959                            fiscal_period,
5960                            None,
5961                            "SYS-AUTOCLOSE",
5962                        );
5963
5964                        let mut entity_stmts = Vec::new();
5965                        for stmt in stmts {
5966                            if stmt.statement_type == StatementType::CashFlowStatement {
5967                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5968                                let cf_items = Self::build_cash_flow_from_trial_balances(
5969                                    &tb_entries,
5970                                    None,
5971                                    net_income,
5972                                );
5973                                entity_stmts.push(FinancialStatement {
5974                                    cash_flow_items: cf_items,
5975                                    ..stmt
5976                                });
5977                            } else {
5978                                entity_stmts.push(stmt);
5979                            }
5980                        }
5981
5982                        // Add to the flat financial_statements list (used by KPI/budget)
5983                        financial_statements.extend(entity_stmts.clone());
5984
5985                        // Store standalone per-entity
5986                        standalone_statements
5987                            .entry(company_code.to_string())
5988                            .or_default()
5989                            .extend(entity_stmts);
5990
5991                        // Only store trial balance for the first company in the period
5992                        // to avoid duplicates in the trial_balances list
5993                        if company_idx == 0 {
5994                            trial_balances.push(PeriodTrialBalance {
5995                                fiscal_year,
5996                                fiscal_period,
5997                                period_start,
5998                                period_end,
5999                                entries: tb_entries,
6000                            });
6001                        }
6002                    } else {
6003                        // Fallback: no JEs available
6004                        let tb_entries = Self::build_trial_balance_from_entries(
6005                            journal_entries,
6006                            coa,
6007                            company_code,
6008                            fiscal_year,
6009                            fiscal_period,
6010                        );
6011
6012                        let stmts = company_fs_gen.generate(
6013                            company_code,
6014                            currency,
6015                            &tb_entries,
6016                            period_start,
6017                            period_end,
6018                            fiscal_year,
6019                            fiscal_period,
6020                            None,
6021                            "SYS-AUTOCLOSE",
6022                        );
6023                        financial_statements.extend(stmts.clone());
6024                        standalone_statements
6025                            .entry(company_code.to_string())
6026                            .or_default()
6027                            .extend(stmts);
6028
6029                        if company_idx == 0 && !tb_entries.is_empty() {
6030                            trial_balances.push(PeriodTrialBalance {
6031                                fiscal_year,
6032                                fiscal_period,
6033                                period_start,
6034                                period_end,
6035                                entries: tb_entries,
6036                            });
6037                        }
6038                    }
6039                }
6040
6041                // --- Consolidated: aggregate all entities + apply eliminations ---
6042                // Use the primary (first) company's currency for the consolidated statement
6043                let group_currency = self
6044                    .config
6045                    .companies
6046                    .first()
6047                    .map(|c| c.currency.as_str())
6048                    .unwrap_or("USD");
6049
6050                // Build owned elimination entries for this period
6051                let period_eliminations: Vec<JournalEntry> = elimination_entries
6052                    .iter()
6053                    .filter(|je| {
6054                        je.header.fiscal_year == fiscal_year
6055                            && je.header.fiscal_period == fiscal_period
6056                    })
6057                    .map(|je| (*je).clone())
6058                    .collect();
6059
6060                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6061                    &entity_tb_map,
6062                    &period_eliminations,
6063                    &period_label,
6064                );
6065
6066                // Build a pseudo trial balance from consolidated line items for the
6067                // FinancialStatementGenerator to use (only for cash flow direction).
6068                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6069                    .line_items
6070                    .iter()
6071                    .map(|li| {
6072                        let net = li.post_elimination_total;
6073                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6074                            (net, rust_decimal::Decimal::ZERO)
6075                        } else {
6076                            (rust_decimal::Decimal::ZERO, -net)
6077                        };
6078                        datasynth_generators::TrialBalanceEntry {
6079                            account_code: li.account_category.clone(),
6080                            account_name: li.account_category.clone(),
6081                            category: li.account_category.clone(),
6082                            debit_balance: debit,
6083                            credit_balance: credit,
6084                        }
6085                    })
6086                    .collect();
6087
6088                let mut cons_stmts = cons_gen.generate(
6089                    "GROUP",
6090                    group_currency,
6091                    &cons_tb,
6092                    period_start,
6093                    period_end,
6094                    fiscal_year,
6095                    fiscal_period,
6096                    None,
6097                    "SYS-AUTOCLOSE",
6098                );
6099
6100                // Split consolidated line items by statement type.
6101                // The consolidation generator returns BS items first, then IS items,
6102                // identified by their CONS- prefix and category.
6103                let bs_categories: &[&str] = &[
6104                    "CASH",
6105                    "RECEIVABLES",
6106                    "INVENTORY",
6107                    "FIXEDASSETS",
6108                    "PAYABLES",
6109                    "ACCRUEDLIABILITIES",
6110                    "LONGTERMDEBT",
6111                    "EQUITY",
6112                ];
6113                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6114                    cons_line_items.into_iter().partition(|li| {
6115                        let upper = li.label.to_uppercase();
6116                        bs_categories.iter().any(|c| upper == *c)
6117                    });
6118
6119                for stmt in &mut cons_stmts {
6120                    stmt.is_consolidated = true;
6121                    match stmt.statement_type {
6122                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6123                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6124                        _ => {} // CF and equity change statements keep generator output
6125                    }
6126                }
6127
6128                consolidated_statements.extend(cons_stmts);
6129                consolidation_schedules.push(schedule);
6130            }
6131
6132            // Backward compat: if only 1 company, use existing code path logic
6133            // (prior_cumulative_tb for comparative amounts). Already handled above;
6134            // the prior_ref is omitted to keep this change minimal.
6135            let _ = &mut fs_gen; // suppress unused warning
6136
6137            stats.financial_statement_count = financial_statements.len();
6138            info!(
6139                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6140                stats.financial_statement_count,
6141                consolidated_statements.len(),
6142                has_journal_entries
6143            );
6144
6145            // ----------------------------------------------------------------
6146            // IFRS 8 / ASC 280: Operating Segment Reporting
6147            // ----------------------------------------------------------------
6148            // Build entity seeds from the company configuration.
6149            let entity_seeds: Vec<SegmentSeed> = self
6150                .config
6151                .companies
6152                .iter()
6153                .map(|c| SegmentSeed {
6154                    code: c.code.clone(),
6155                    name: c.name.clone(),
6156                    currency: c.currency.clone(),
6157                })
6158                .collect();
6159
6160            let mut seg_gen = SegmentGenerator::new(seed + 30);
6161
6162            // Generate one set of segment reports per period.
6163            // We extract consolidated revenue / profit / assets from the consolidated
6164            // financial statements produced above, falling back to simple sums when
6165            // no consolidated statements were generated (single-entity path).
6166            for period in 0..self.config.global.period_months {
6167                let period_end =
6168                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6169                let fiscal_year = period_end.year() as u16;
6170                let fiscal_period = period_end.month() as u8;
6171                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6172
6173                use datasynth_core::models::StatementType;
6174
6175                // Try to find consolidated income statement for this period
6176                let cons_is = consolidated_statements.iter().find(|s| {
6177                    s.fiscal_year == fiscal_year
6178                        && s.fiscal_period == fiscal_period
6179                        && s.statement_type == StatementType::IncomeStatement
6180                });
6181                let cons_bs = consolidated_statements.iter().find(|s| {
6182                    s.fiscal_year == fiscal_year
6183                        && s.fiscal_period == fiscal_period
6184                        && s.statement_type == StatementType::BalanceSheet
6185                });
6186
6187                // If consolidated statements not available fall back to the flat list
6188                let is_stmt = cons_is.or_else(|| {
6189                    financial_statements.iter().find(|s| {
6190                        s.fiscal_year == fiscal_year
6191                            && s.fiscal_period == fiscal_period
6192                            && s.statement_type == StatementType::IncomeStatement
6193                    })
6194                });
6195                let bs_stmt = cons_bs.or_else(|| {
6196                    financial_statements.iter().find(|s| {
6197                        s.fiscal_year == fiscal_year
6198                            && s.fiscal_period == fiscal_period
6199                            && s.statement_type == StatementType::BalanceSheet
6200                    })
6201                });
6202
6203                let consolidated_revenue = is_stmt
6204                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6205                    .map(|li| -li.amount) // revenue is stored as negative in IS
6206                    .unwrap_or(rust_decimal::Decimal::ZERO);
6207
6208                let consolidated_profit = is_stmt
6209                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6210                    .map(|li| li.amount)
6211                    .unwrap_or(rust_decimal::Decimal::ZERO);
6212
6213                let consolidated_assets = bs_stmt
6214                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6215                    .map(|li| li.amount)
6216                    .unwrap_or(rust_decimal::Decimal::ZERO);
6217
6218                // Skip periods where we have no financial data
6219                if consolidated_revenue == rust_decimal::Decimal::ZERO
6220                    && consolidated_assets == rust_decimal::Decimal::ZERO
6221                {
6222                    continue;
6223                }
6224
6225                let group_code = self
6226                    .config
6227                    .companies
6228                    .first()
6229                    .map(|c| c.code.as_str())
6230                    .unwrap_or("GROUP");
6231
6232                // Compute period depreciation from JEs with document type "CL" hitting account
6233                // 6000 (depreciation expense).  These are generated by phase_period_close.
6234                let total_depr: rust_decimal::Decimal = journal_entries
6235                    .iter()
6236                    .filter(|je| je.header.document_type == "CL")
6237                    .flat_map(|je| je.lines.iter())
6238                    .filter(|l| l.gl_account.starts_with("6000"))
6239                    .map(|l| l.debit_amount)
6240                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6241                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6242                    Some(total_depr)
6243                } else {
6244                    None
6245                };
6246
6247                let (segs, recon) = seg_gen.generate(
6248                    group_code,
6249                    &period_label,
6250                    consolidated_revenue,
6251                    consolidated_profit,
6252                    consolidated_assets,
6253                    &entity_seeds,
6254                    depr_param,
6255                );
6256                segment_reports.extend(segs);
6257                segment_reconciliations.push(recon);
6258            }
6259
6260            info!(
6261                "Segment reports generated: {} segments, {} reconciliations",
6262                segment_reports.len(),
6263                segment_reconciliations.len()
6264            );
6265        }
6266
6267        // Generate bank reconciliations from payment data
6268        if br_enabled && !document_flows.payments.is_empty() {
6269            let employee_ids: Vec<String> = self
6270                .master_data
6271                .employees
6272                .iter()
6273                .map(|e| e.employee_id.clone())
6274                .collect();
6275            let mut br_gen =
6276                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6277
6278            // Group payments by company code and period
6279            for company in &self.config.companies {
6280                let company_payments: Vec<PaymentReference> = document_flows
6281                    .payments
6282                    .iter()
6283                    .filter(|p| p.header.company_code == company.code)
6284                    .map(|p| PaymentReference {
6285                        id: p.header.document_id.clone(),
6286                        amount: if p.is_vendor { p.amount } else { -p.amount },
6287                        date: p.header.document_date,
6288                        reference: p
6289                            .check_number
6290                            .clone()
6291                            .or_else(|| p.wire_reference.clone())
6292                            .unwrap_or_else(|| p.header.document_id.clone()),
6293                    })
6294                    .collect();
6295
6296                if company_payments.is_empty() {
6297                    continue;
6298                }
6299
6300                let bank_account_id = format!("{}-MAIN", company.code);
6301
6302                // Generate one reconciliation per period
6303                for period in 0..self.config.global.period_months {
6304                    let period_start = start_date + chrono::Months::new(period);
6305                    let period_end =
6306                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6307
6308                    let period_payments: Vec<PaymentReference> = company_payments
6309                        .iter()
6310                        .filter(|p| p.date >= period_start && p.date <= period_end)
6311                        .cloned()
6312                        .collect();
6313
6314                    let recon = br_gen.generate(
6315                        &company.code,
6316                        &bank_account_id,
6317                        period_start,
6318                        period_end,
6319                        &company.currency,
6320                        &period_payments,
6321                    );
6322                    bank_reconciliations.push(recon);
6323                }
6324            }
6325            info!(
6326                "Bank reconciliations generated: {} reconciliations",
6327                bank_reconciliations.len()
6328            );
6329        }
6330
6331        stats.bank_reconciliation_count = bank_reconciliations.len();
6332        self.check_resources_with_log("post-financial-reporting")?;
6333
6334        if !trial_balances.is_empty() {
6335            info!(
6336                "Period-close trial balances captured: {} periods",
6337                trial_balances.len()
6338            );
6339        }
6340
6341        // Notes to financial statements are generated in a separate post-processing step
6342        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6343        // phases have completed, so that deferred tax and provision data can be wired in.
6344        let notes_to_financial_statements = Vec::new();
6345
6346        Ok(FinancialReportingSnapshot {
6347            financial_statements,
6348            standalone_statements,
6349            consolidated_statements,
6350            consolidation_schedules,
6351            bank_reconciliations,
6352            trial_balances,
6353            segment_reports,
6354            segment_reconciliations,
6355            notes_to_financial_statements,
6356        })
6357    }
6358
6359    /// Populate notes to financial statements using fully-resolved snapshots.
6360    ///
6361    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6362    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6363    /// can be wired into the notes context.  The method mutates
6364    /// `financial_reporting.notes_to_financial_statements` in-place.
6365    fn generate_notes_to_financial_statements(
6366        &self,
6367        financial_reporting: &mut FinancialReportingSnapshot,
6368        accounting_standards: &AccountingStandardsSnapshot,
6369        tax: &TaxSnapshot,
6370        hr: &HrSnapshot,
6371        audit: &AuditSnapshot,
6372        treasury: &TreasurySnapshot,
6373    ) {
6374        use datasynth_config::schema::AccountingFrameworkConfig;
6375        use datasynth_core::models::StatementType;
6376        use datasynth_generators::period_close::notes_generator::{
6377            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6378        };
6379
6380        let seed = self.seed;
6381        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6382        {
6383            Ok(d) => d,
6384            Err(_) => return,
6385        };
6386
6387        let mut notes_gen = NotesGenerator::new(seed + 4235);
6388
6389        for company in &self.config.companies {
6390            let last_period_end = start_date
6391                + chrono::Months::new(self.config.global.period_months)
6392                - chrono::Days::new(1);
6393            let fiscal_year = last_period_end.year() as u16;
6394
6395            // Extract relevant amounts from the already-generated financial statements
6396            let entity_is = financial_reporting
6397                .standalone_statements
6398                .get(&company.code)
6399                .and_then(|stmts| {
6400                    stmts.iter().find(|s| {
6401                        s.fiscal_year == fiscal_year
6402                            && s.statement_type == StatementType::IncomeStatement
6403                    })
6404                });
6405            let entity_bs = financial_reporting
6406                .standalone_statements
6407                .get(&company.code)
6408                .and_then(|stmts| {
6409                    stmts.iter().find(|s| {
6410                        s.fiscal_year == fiscal_year
6411                            && s.statement_type == StatementType::BalanceSheet
6412                    })
6413                });
6414
6415            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6416            let revenue_amount = entity_is
6417                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6418                .map(|li| li.amount);
6419            let ppe_gross = entity_bs
6420                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6421                .map(|li| li.amount);
6422
6423            let framework = match self
6424                .config
6425                .accounting_standards
6426                .framework
6427                .unwrap_or_default()
6428            {
6429                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6430                    "IFRS".to_string()
6431                }
6432                _ => "US GAAP".to_string(),
6433            };
6434
6435            // ---- Deferred tax (IAS 12 / ASC 740) ----
6436            // Sum closing DTA and DTL from rollforward entries for this entity.
6437            let (entity_dta, entity_dtl) = {
6438                let mut dta = rust_decimal::Decimal::ZERO;
6439                let mut dtl = rust_decimal::Decimal::ZERO;
6440                for rf in &tax.deferred_tax.rollforwards {
6441                    if rf.entity_code == company.code {
6442                        dta += rf.closing_dta;
6443                        dtl += rf.closing_dtl;
6444                    }
6445                }
6446                (
6447                    if dta > rust_decimal::Decimal::ZERO {
6448                        Some(dta)
6449                    } else {
6450                        None
6451                    },
6452                    if dtl > rust_decimal::Decimal::ZERO {
6453                        Some(dtl)
6454                    } else {
6455                        None
6456                    },
6457                )
6458            };
6459
6460            // ---- Provisions (IAS 37 / ASC 450) ----
6461            // Filter provisions to this entity; sum best_estimate amounts.
6462            let entity_provisions: Vec<_> = accounting_standards
6463                .provisions
6464                .iter()
6465                .filter(|p| p.entity_code == company.code)
6466                .collect();
6467            let provision_count = entity_provisions.len();
6468            let total_provisions = if provision_count > 0 {
6469                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6470            } else {
6471                None
6472            };
6473
6474            // ---- Pension data from HR snapshot ----
6475            let entity_pension_plan_count = hr
6476                .pension_plans
6477                .iter()
6478                .filter(|p| p.entity_code == company.code)
6479                .count();
6480            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6481                let sum: rust_decimal::Decimal = hr
6482                    .pension_disclosures
6483                    .iter()
6484                    .filter(|d| {
6485                        hr.pension_plans
6486                            .iter()
6487                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6488                    })
6489                    .map(|d| d.net_pension_liability)
6490                    .sum();
6491                let plan_assets_sum: rust_decimal::Decimal = hr
6492                    .pension_plan_assets
6493                    .iter()
6494                    .filter(|a| {
6495                        hr.pension_plans
6496                            .iter()
6497                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6498                    })
6499                    .map(|a| a.fair_value_closing)
6500                    .sum();
6501                if entity_pension_plan_count > 0 {
6502                    Some(sum + plan_assets_sum)
6503                } else {
6504                    None
6505                }
6506            };
6507            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6508                let sum: rust_decimal::Decimal = hr
6509                    .pension_plan_assets
6510                    .iter()
6511                    .filter(|a| {
6512                        hr.pension_plans
6513                            .iter()
6514                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6515                    })
6516                    .map(|a| a.fair_value_closing)
6517                    .sum();
6518                if entity_pension_plan_count > 0 {
6519                    Some(sum)
6520                } else {
6521                    None
6522                }
6523            };
6524
6525            // ---- Audit data: related parties + subsequent events ----
6526            // Audit snapshot covers all entities; use total counts (common case = single entity).
6527            let rp_count = audit.related_party_transactions.len();
6528            let se_count = audit.subsequent_events.len();
6529            let adjusting_count = audit
6530                .subsequent_events
6531                .iter()
6532                .filter(|e| {
6533                    matches!(
6534                        e.classification,
6535                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6536                    )
6537                })
6538                .count();
6539
6540            let ctx = NotesGeneratorContext {
6541                entity_code: company.code.clone(),
6542                framework,
6543                period: format!("FY{}", fiscal_year),
6544                period_end: last_period_end,
6545                currency: company.currency.clone(),
6546                revenue_amount,
6547                total_ppe_gross: ppe_gross,
6548                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6549                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6550                deferred_tax_asset: entity_dta,
6551                deferred_tax_liability: entity_dtl,
6552                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6553                provision_count,
6554                total_provisions,
6555                // Pension data from HR snapshot
6556                pension_plan_count: entity_pension_plan_count,
6557                total_dbo: entity_total_dbo,
6558                total_plan_assets: entity_total_plan_assets,
6559                // Audit data
6560                related_party_transaction_count: rp_count,
6561                subsequent_event_count: se_count,
6562                adjusting_event_count: adjusting_count,
6563                ..NotesGeneratorContext::default()
6564            };
6565
6566            let entity_notes = notes_gen.generate(&ctx);
6567            let standard_note_count = entity_notes.len() as u32;
6568            info!(
6569                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6570                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6571            );
6572            financial_reporting
6573                .notes_to_financial_statements
6574                .extend(entity_notes);
6575
6576            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6577            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6578                .debt_instruments
6579                .iter()
6580                .filter(|d| d.entity_id == company.code)
6581                .map(|d| {
6582                    (
6583                        format!("{:?}", d.instrument_type),
6584                        d.principal,
6585                        d.maturity_date.to_string(),
6586                    )
6587                })
6588                .collect();
6589
6590            let hedge_count = treasury.hedge_relationships.len();
6591            let effective_hedges = treasury
6592                .hedge_relationships
6593                .iter()
6594                .filter(|h| h.is_effective)
6595                .count();
6596            let total_notional: rust_decimal::Decimal = treasury
6597                .hedging_instruments
6598                .iter()
6599                .map(|h| h.notional_amount)
6600                .sum();
6601            let total_fair_value: rust_decimal::Decimal = treasury
6602                .hedging_instruments
6603                .iter()
6604                .map(|h| h.fair_value)
6605                .sum();
6606
6607            // Join provision_movements with provisions to get entity/type info
6608            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6609                .provisions
6610                .iter()
6611                .filter(|p| p.entity_code == company.code)
6612                .map(|p| p.id.as_str())
6613                .collect();
6614            let provision_movements: Vec<(
6615                String,
6616                rust_decimal::Decimal,
6617                rust_decimal::Decimal,
6618                rust_decimal::Decimal,
6619            )> = accounting_standards
6620                .provision_movements
6621                .iter()
6622                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6623                .map(|m| {
6624                    let prov_type = accounting_standards
6625                        .provisions
6626                        .iter()
6627                        .find(|p| p.id == m.provision_id)
6628                        .map(|p| format!("{:?}", p.provision_type))
6629                        .unwrap_or_else(|| "Unknown".to_string());
6630                    (prov_type, m.opening, m.additions, m.closing)
6631                })
6632                .collect();
6633
6634            let enhanced_ctx = EnhancedNotesContext {
6635                entity_code: company.code.clone(),
6636                period: format!("FY{}", fiscal_year),
6637                currency: company.currency.clone(),
6638                // Inventory breakdown: best-effort using zero (would need balance tracker)
6639                finished_goods_value: rust_decimal::Decimal::ZERO,
6640                wip_value: rust_decimal::Decimal::ZERO,
6641                raw_materials_value: rust_decimal::Decimal::ZERO,
6642                debt_instruments,
6643                hedge_count,
6644                effective_hedges,
6645                total_notional,
6646                total_fair_value,
6647                provision_movements,
6648            };
6649
6650            let enhanced_notes =
6651                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6652            if !enhanced_notes.is_empty() {
6653                info!(
6654                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6655                    company.code,
6656                    enhanced_notes.len(),
6657                    enhanced_ctx.debt_instruments.len(),
6658                    hedge_count,
6659                    enhanced_ctx.provision_movements.len(),
6660                );
6661                financial_reporting
6662                    .notes_to_financial_statements
6663                    .extend(enhanced_notes);
6664            }
6665        }
6666    }
6667
6668    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6669    ///
6670    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6671    /// posted in the journal entries flows through to the trial balance, using the real
6672    /// GL account numbers from the CoA.
6673    fn build_trial_balance_from_entries(
6674        journal_entries: &[JournalEntry],
6675        coa: &ChartOfAccounts,
6676        company_code: &str,
6677        fiscal_year: u16,
6678        fiscal_period: u8,
6679    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6680        use rust_decimal::Decimal;
6681
6682        // Accumulate total debits and credits per GL account
6683        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6684        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6685
6686        for je in journal_entries {
6687            // Filter to matching company, fiscal year, and period
6688            if je.header.company_code != company_code
6689                || je.header.fiscal_year != fiscal_year
6690                || je.header.fiscal_period != fiscal_period
6691            {
6692                continue;
6693            }
6694
6695            for line in &je.lines {
6696                let acct = &line.gl_account;
6697                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6698                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6699            }
6700        }
6701
6702        // Build a TrialBalanceEntry for each account that had activity
6703        let mut all_accounts: Vec<&String> = account_debits
6704            .keys()
6705            .chain(account_credits.keys())
6706            .collect::<std::collections::HashSet<_>>()
6707            .into_iter()
6708            .collect();
6709        all_accounts.sort();
6710
6711        let mut entries = Vec::new();
6712
6713        for acct_number in all_accounts {
6714            let debit = account_debits
6715                .get(acct_number)
6716                .copied()
6717                .unwrap_or(Decimal::ZERO);
6718            let credit = account_credits
6719                .get(acct_number)
6720                .copied()
6721                .unwrap_or(Decimal::ZERO);
6722
6723            if debit.is_zero() && credit.is_zero() {
6724                continue;
6725            }
6726
6727            // Look up account name from CoA, fall back to "Account {code}"
6728            let account_name = coa
6729                .get_account(acct_number)
6730                .map(|gl| gl.short_description.clone())
6731                .unwrap_or_else(|| format!("Account {acct_number}"));
6732
6733            // Map account code prefix to the category strings expected by
6734            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6735            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6736            // OperatingExpenses).
6737            let category = Self::category_from_account_code(acct_number);
6738
6739            entries.push(datasynth_generators::TrialBalanceEntry {
6740                account_code: acct_number.clone(),
6741                account_name,
6742                category,
6743                debit_balance: debit,
6744                credit_balance: credit,
6745            });
6746        }
6747
6748        entries
6749    }
6750
6751    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6752    /// (and including) the given period end date.
6753    ///
6754    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6755    /// while income statement accounts (revenue, expenses) show only the current period.
6756    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6757    fn build_cumulative_trial_balance(
6758        journal_entries: &[JournalEntry],
6759        coa: &ChartOfAccounts,
6760        company_code: &str,
6761        start_date: NaiveDate,
6762        period_end: NaiveDate,
6763        fiscal_year: u16,
6764        fiscal_period: u8,
6765    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6766        use rust_decimal::Decimal;
6767
6768        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6769        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6770        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6771
6772        // Accumulate debits/credits for income statement accounts (current period only)
6773        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6774        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6775
6776        for je in journal_entries {
6777            if je.header.company_code != company_code {
6778                continue;
6779            }
6780
6781            for line in &je.lines {
6782                let acct = &line.gl_account;
6783                let category = Self::category_from_account_code(acct);
6784                let is_bs_account = matches!(
6785                    category.as_str(),
6786                    "Cash"
6787                        | "Receivables"
6788                        | "Inventory"
6789                        | "FixedAssets"
6790                        | "Payables"
6791                        | "AccruedLiabilities"
6792                        | "LongTermDebt"
6793                        | "Equity"
6794                );
6795
6796                if is_bs_account {
6797                    // Balance sheet: accumulate from start through period_end
6798                    if je.header.document_date <= period_end
6799                        && je.header.document_date >= start_date
6800                    {
6801                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6802                            line.debit_amount;
6803                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6804                            line.credit_amount;
6805                    }
6806                } else {
6807                    // Income statement: current period only
6808                    if je.header.fiscal_year == fiscal_year
6809                        && je.header.fiscal_period == fiscal_period
6810                    {
6811                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6812                            line.debit_amount;
6813                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6814                            line.credit_amount;
6815                    }
6816                }
6817            }
6818        }
6819
6820        // Merge all accounts
6821        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6822        all_accounts.extend(bs_debits.keys().cloned());
6823        all_accounts.extend(bs_credits.keys().cloned());
6824        all_accounts.extend(is_debits.keys().cloned());
6825        all_accounts.extend(is_credits.keys().cloned());
6826
6827        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6828        sorted_accounts.sort();
6829
6830        let mut entries = Vec::new();
6831
6832        for acct_number in &sorted_accounts {
6833            let category = Self::category_from_account_code(acct_number);
6834            let is_bs_account = matches!(
6835                category.as_str(),
6836                "Cash"
6837                    | "Receivables"
6838                    | "Inventory"
6839                    | "FixedAssets"
6840                    | "Payables"
6841                    | "AccruedLiabilities"
6842                    | "LongTermDebt"
6843                    | "Equity"
6844            );
6845
6846            let (debit, credit) = if is_bs_account {
6847                (
6848                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6849                    bs_credits
6850                        .get(acct_number)
6851                        .copied()
6852                        .unwrap_or(Decimal::ZERO),
6853                )
6854            } else {
6855                (
6856                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6857                    is_credits
6858                        .get(acct_number)
6859                        .copied()
6860                        .unwrap_or(Decimal::ZERO),
6861                )
6862            };
6863
6864            if debit.is_zero() && credit.is_zero() {
6865                continue;
6866            }
6867
6868            let account_name = coa
6869                .get_account(acct_number)
6870                .map(|gl| gl.short_description.clone())
6871                .unwrap_or_else(|| format!("Account {acct_number}"));
6872
6873            entries.push(datasynth_generators::TrialBalanceEntry {
6874                account_code: acct_number.clone(),
6875                account_name,
6876                category,
6877                debit_balance: debit,
6878                credit_balance: credit,
6879            });
6880        }
6881
6882        entries
6883    }
6884
6885    /// Build a JE-derived cash flow statement using the indirect method.
6886    ///
6887    /// Compares current and prior cumulative trial balances to derive working capital
6888    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6889    fn build_cash_flow_from_trial_balances(
6890        current_tb: &[datasynth_generators::TrialBalanceEntry],
6891        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6892        net_income: rust_decimal::Decimal,
6893    ) -> Vec<CashFlowItem> {
6894        use rust_decimal::Decimal;
6895
6896        // Helper: aggregate a TB by category and return net (debit - credit)
6897        let aggregate =
6898            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6899                let mut map: HashMap<String, Decimal> = HashMap::new();
6900                for entry in tb {
6901                    let net = entry.debit_balance - entry.credit_balance;
6902                    *map.entry(entry.category.clone()).or_default() += net;
6903                }
6904                map
6905            };
6906
6907        let current = aggregate(current_tb);
6908        let prior = prior_tb.map(aggregate);
6909
6910        // Get balance for a category, defaulting to zero
6911        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6912            *map.get(key).unwrap_or(&Decimal::ZERO)
6913        };
6914
6915        // Compute change: current - prior (or current if no prior)
6916        let change = |key: &str| -> Decimal {
6917            let curr = get(&current, key);
6918            match &prior {
6919                Some(p) => curr - get(p, key),
6920                None => curr,
6921            }
6922        };
6923
6924        // Operating activities (indirect method)
6925        // Depreciation add-back: approximate from FixedAssets decrease
6926        let fixed_asset_change = change("FixedAssets");
6927        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6928            -fixed_asset_change
6929        } else {
6930            Decimal::ZERO
6931        };
6932
6933        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6934        let ar_change = change("Receivables");
6935        let inventory_change = change("Inventory");
6936        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6937        let ap_change = change("Payables");
6938        let accrued_change = change("AccruedLiabilities");
6939
6940        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6941            + (-ap_change)
6942            + (-accrued_change);
6943
6944        // Investing activities
6945        let capex = if fixed_asset_change > Decimal::ZERO {
6946            -fixed_asset_change
6947        } else {
6948            Decimal::ZERO
6949        };
6950        let investing_cf = capex;
6951
6952        // Financing activities
6953        let debt_change = -change("LongTermDebt");
6954        let equity_change = -change("Equity");
6955        let financing_cf = debt_change + equity_change;
6956
6957        let net_change = operating_cf + investing_cf + financing_cf;
6958
6959        vec![
6960            CashFlowItem {
6961                item_code: "CF-NI".to_string(),
6962                label: "Net Income".to_string(),
6963                category: CashFlowCategory::Operating,
6964                amount: net_income,
6965                amount_prior: None,
6966                sort_order: 1,
6967                is_total: false,
6968            },
6969            CashFlowItem {
6970                item_code: "CF-DEP".to_string(),
6971                label: "Depreciation & Amortization".to_string(),
6972                category: CashFlowCategory::Operating,
6973                amount: depreciation_addback,
6974                amount_prior: None,
6975                sort_order: 2,
6976                is_total: false,
6977            },
6978            CashFlowItem {
6979                item_code: "CF-AR".to_string(),
6980                label: "Change in Accounts Receivable".to_string(),
6981                category: CashFlowCategory::Operating,
6982                amount: -ar_change,
6983                amount_prior: None,
6984                sort_order: 3,
6985                is_total: false,
6986            },
6987            CashFlowItem {
6988                item_code: "CF-AP".to_string(),
6989                label: "Change in Accounts Payable".to_string(),
6990                category: CashFlowCategory::Operating,
6991                amount: -ap_change,
6992                amount_prior: None,
6993                sort_order: 4,
6994                is_total: false,
6995            },
6996            CashFlowItem {
6997                item_code: "CF-INV".to_string(),
6998                label: "Change in Inventory".to_string(),
6999                category: CashFlowCategory::Operating,
7000                amount: -inventory_change,
7001                amount_prior: None,
7002                sort_order: 5,
7003                is_total: false,
7004            },
7005            CashFlowItem {
7006                item_code: "CF-OP".to_string(),
7007                label: "Net Cash from Operating Activities".to_string(),
7008                category: CashFlowCategory::Operating,
7009                amount: operating_cf,
7010                amount_prior: None,
7011                sort_order: 6,
7012                is_total: true,
7013            },
7014            CashFlowItem {
7015                item_code: "CF-CAPEX".to_string(),
7016                label: "Capital Expenditures".to_string(),
7017                category: CashFlowCategory::Investing,
7018                amount: capex,
7019                amount_prior: None,
7020                sort_order: 7,
7021                is_total: false,
7022            },
7023            CashFlowItem {
7024                item_code: "CF-INV-T".to_string(),
7025                label: "Net Cash from Investing Activities".to_string(),
7026                category: CashFlowCategory::Investing,
7027                amount: investing_cf,
7028                amount_prior: None,
7029                sort_order: 8,
7030                is_total: true,
7031            },
7032            CashFlowItem {
7033                item_code: "CF-DEBT".to_string(),
7034                label: "Net Borrowings / (Repayments)".to_string(),
7035                category: CashFlowCategory::Financing,
7036                amount: debt_change,
7037                amount_prior: None,
7038                sort_order: 9,
7039                is_total: false,
7040            },
7041            CashFlowItem {
7042                item_code: "CF-EQ".to_string(),
7043                label: "Equity Changes".to_string(),
7044                category: CashFlowCategory::Financing,
7045                amount: equity_change,
7046                amount_prior: None,
7047                sort_order: 10,
7048                is_total: false,
7049            },
7050            CashFlowItem {
7051                item_code: "CF-FIN-T".to_string(),
7052                label: "Net Cash from Financing Activities".to_string(),
7053                category: CashFlowCategory::Financing,
7054                amount: financing_cf,
7055                amount_prior: None,
7056                sort_order: 11,
7057                is_total: true,
7058            },
7059            CashFlowItem {
7060                item_code: "CF-NET".to_string(),
7061                label: "Net Change in Cash".to_string(),
7062                category: CashFlowCategory::Operating,
7063                amount: net_change,
7064                amount_prior: None,
7065                sort_order: 12,
7066                is_total: true,
7067            },
7068        ]
7069    }
7070
7071    /// Calculate net income from a set of trial balance entries.
7072    ///
7073    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7074    fn calculate_net_income_from_tb(
7075        tb: &[datasynth_generators::TrialBalanceEntry],
7076    ) -> rust_decimal::Decimal {
7077        use rust_decimal::Decimal;
7078
7079        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7080        for entry in tb {
7081            let net = entry.debit_balance - entry.credit_balance;
7082            *aggregated.entry(entry.category.clone()).or_default() += net;
7083        }
7084
7085        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7086        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7087        let opex = *aggregated
7088            .get("OperatingExpenses")
7089            .unwrap_or(&Decimal::ZERO);
7090        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7091        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7092
7093        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7094        // other_income is typically negative (credit), other_expenses is typically positive
7095        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7096        let tax_rate = Decimal::new(25, 2); // 0.25
7097        let tax = operating_income * tax_rate;
7098        operating_income - tax
7099    }
7100
7101    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7102    ///
7103    /// Uses the first two digits of the account code to classify into the categories
7104    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7105    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7106    /// OperatingExpenses, OtherIncome, OtherExpenses.
7107    fn category_from_account_code(code: &str) -> String {
7108        let prefix: String = code.chars().take(2).collect();
7109        match prefix.as_str() {
7110            "10" => "Cash",
7111            "11" => "Receivables",
7112            "12" | "13" | "14" => "Inventory",
7113            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7114            "20" => "Payables",
7115            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7116            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7117            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7118            "40" | "41" | "42" | "43" | "44" => "Revenue",
7119            "50" | "51" | "52" => "CostOfSales",
7120            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7121                "OperatingExpenses"
7122            }
7123            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7124            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7125            _ => "OperatingExpenses",
7126        }
7127        .to_string()
7128    }
7129
7130    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7131    fn phase_hr_data(
7132        &mut self,
7133        stats: &mut EnhancedGenerationStatistics,
7134    ) -> SynthResult<HrSnapshot> {
7135        if !self.phase_config.generate_hr {
7136            debug!("Phase 16: Skipped (HR generation disabled)");
7137            return Ok(HrSnapshot::default());
7138        }
7139
7140        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7141
7142        let seed = self.seed;
7143        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7144            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7145        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7146        let company_code = self
7147            .config
7148            .companies
7149            .first()
7150            .map(|c| c.code.as_str())
7151            .unwrap_or("1000");
7152        let currency = self
7153            .config
7154            .companies
7155            .first()
7156            .map(|c| c.currency.as_str())
7157            .unwrap_or("USD");
7158
7159        let employee_ids: Vec<String> = self
7160            .master_data
7161            .employees
7162            .iter()
7163            .map(|e| e.employee_id.clone())
7164            .collect();
7165
7166        if employee_ids.is_empty() {
7167            debug!("Phase 16: Skipped (no employees available)");
7168            return Ok(HrSnapshot::default());
7169        }
7170
7171        // Extract cost-center pool from master data employees for cross-reference
7172        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7173        let cost_center_ids: Vec<String> = self
7174            .master_data
7175            .employees
7176            .iter()
7177            .filter_map(|e| e.cost_center.clone())
7178            .collect::<std::collections::HashSet<_>>()
7179            .into_iter()
7180            .collect();
7181
7182        let mut snapshot = HrSnapshot::default();
7183
7184        // Generate payroll runs (one per month)
7185        if self.config.hr.payroll.enabled {
7186            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7187                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7188
7189            // Look up country pack for payroll deductions and labels
7190            let payroll_pack = self.primary_pack();
7191
7192            // Store the pack on the generator so generate() resolves
7193            // localized deduction rates and labels from it.
7194            payroll_gen.set_country_pack(payroll_pack.clone());
7195
7196            let employees_with_salary: Vec<(
7197                String,
7198                rust_decimal::Decimal,
7199                Option<String>,
7200                Option<String>,
7201            )> = self
7202                .master_data
7203                .employees
7204                .iter()
7205                .map(|e| {
7206                    // Use the employee's actual annual base salary.
7207                    // Fall back to $60,000 / yr if somehow zero.
7208                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7209                        e.base_salary
7210                    } else {
7211                        rust_decimal::Decimal::from(60_000)
7212                    };
7213                    (
7214                        e.employee_id.clone(),
7215                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7216                        e.cost_center.clone(),
7217                        e.department_id.clone(),
7218                    )
7219                })
7220                .collect();
7221
7222            // Use generate_with_changes when employee change history is available
7223            // so that salary adjustments, transfers, etc. are reflected in payroll.
7224            let change_history = &self.master_data.employee_change_history;
7225            let has_changes = !change_history.is_empty();
7226            if has_changes {
7227                debug!(
7228                    "Payroll will incorporate {} employee change events",
7229                    change_history.len()
7230                );
7231            }
7232
7233            for month in 0..self.config.global.period_months {
7234                let period_start = start_date + chrono::Months::new(month);
7235                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7236                let (run, items) = if has_changes {
7237                    payroll_gen.generate_with_changes(
7238                        company_code,
7239                        &employees_with_salary,
7240                        period_start,
7241                        period_end,
7242                        currency,
7243                        change_history,
7244                    )
7245                } else {
7246                    payroll_gen.generate(
7247                        company_code,
7248                        &employees_with_salary,
7249                        period_start,
7250                        period_end,
7251                        currency,
7252                    )
7253                };
7254                snapshot.payroll_runs.push(run);
7255                snapshot.payroll_run_count += 1;
7256                snapshot.payroll_line_item_count += items.len();
7257                snapshot.payroll_line_items.extend(items);
7258            }
7259        }
7260
7261        // Generate time entries
7262        if self.config.hr.time_attendance.enabled {
7263            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7264                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7265            // v3.4.2: when a temporal context is configured, time entries
7266            // respect holidays (not just weekends) and submitted_at lag
7267            // snaps to business days.
7268            if let Some(ctx) = &self.temporal_context {
7269                time_gen.set_temporal_context(Arc::clone(ctx));
7270            }
7271            let entries = time_gen.generate(
7272                &employee_ids,
7273                start_date,
7274                end_date,
7275                &self.config.hr.time_attendance,
7276            );
7277            snapshot.time_entry_count = entries.len();
7278            snapshot.time_entries = entries;
7279        }
7280
7281        // Generate expense reports
7282        if self.config.hr.expenses.enabled {
7283            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7284                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7285            expense_gen.set_country_pack(self.primary_pack().clone());
7286            // v3.4.2: snap submission / approval / paid / line-item dates
7287            // to business days when temporal_context is present.
7288            if let Some(ctx) = &self.temporal_context {
7289                expense_gen.set_temporal_context(Arc::clone(ctx));
7290            }
7291            let company_currency = self
7292                .config
7293                .companies
7294                .first()
7295                .map(|c| c.currency.as_str())
7296                .unwrap_or("USD");
7297            let reports = expense_gen.generate_with_currency(
7298                &employee_ids,
7299                start_date,
7300                end_date,
7301                &self.config.hr.expenses,
7302                company_currency,
7303            );
7304            snapshot.expense_report_count = reports.len();
7305            snapshot.expense_reports = reports;
7306        }
7307
7308        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7309        if self.config.hr.payroll.enabled {
7310            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7311            let employee_pairs: Vec<(String, String)> = self
7312                .master_data
7313                .employees
7314                .iter()
7315                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7316                .collect();
7317            let enrollments =
7318                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7319            snapshot.benefit_enrollment_count = enrollments.len();
7320            snapshot.benefit_enrollments = enrollments;
7321        }
7322
7323        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7324        if self.phase_config.generate_hr {
7325            let entity_name = self
7326                .config
7327                .companies
7328                .first()
7329                .map(|c| c.name.as_str())
7330                .unwrap_or("Entity");
7331            let period_months = self.config.global.period_months;
7332            let period_label = {
7333                let y = start_date.year();
7334                let m = start_date.month();
7335                if period_months >= 12 {
7336                    format!("FY{y}")
7337                } else {
7338                    format!("{y}-{m:02}")
7339                }
7340            };
7341            let reporting_date =
7342                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7343
7344            // Compute average annual salary from actual payroll data when available.
7345            // PayrollRun.total_gross covers all employees for one pay period; we sum
7346            // across all runs and divide by employee_count to get per-employee total,
7347            // then annualise for sub-annual periods.
7348            let avg_salary: Option<rust_decimal::Decimal> = {
7349                let employee_count = employee_ids.len();
7350                if self.config.hr.payroll.enabled
7351                    && employee_count > 0
7352                    && !snapshot.payroll_runs.is_empty()
7353                {
7354                    // Sum total gross pay across all payroll runs for this company
7355                    let total_gross: rust_decimal::Decimal = snapshot
7356                        .payroll_runs
7357                        .iter()
7358                        .filter(|r| r.company_code == company_code)
7359                        .map(|r| r.total_gross)
7360                        .sum();
7361                    if total_gross > rust_decimal::Decimal::ZERO {
7362                        // Annualise: total_gross covers `period_months` months of pay
7363                        let annual_total = if period_months > 0 && period_months < 12 {
7364                            total_gross * rust_decimal::Decimal::from(12u32)
7365                                / rust_decimal::Decimal::from(period_months)
7366                        } else {
7367                            total_gross
7368                        };
7369                        Some(
7370                            (annual_total / rust_decimal::Decimal::from(employee_count))
7371                                .round_dp(2),
7372                        )
7373                    } else {
7374                        None
7375                    }
7376                } else {
7377                    None
7378                }
7379            };
7380
7381            let mut pension_gen =
7382                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7383            let pension_snap = pension_gen.generate(
7384                company_code,
7385                entity_name,
7386                &period_label,
7387                reporting_date,
7388                employee_ids.len(),
7389                currency,
7390                avg_salary,
7391                period_months,
7392            );
7393            snapshot.pension_plan_count = pension_snap.plans.len();
7394            snapshot.pension_plans = pension_snap.plans;
7395            snapshot.pension_obligations = pension_snap.obligations;
7396            snapshot.pension_plan_assets = pension_snap.plan_assets;
7397            snapshot.pension_disclosures = pension_snap.disclosures;
7398            // Pension JEs are returned here so they can be added to entries
7399            // in the caller (stored temporarily on snapshot for transfer).
7400            // We embed them in the hr snapshot for simplicity; the orchestrator
7401            // will extract and extend `entries`.
7402            snapshot.pension_journal_entries = pension_snap.journal_entries;
7403        }
7404
7405        // Generate stock-based compensation (ASC 718 / IFRS 2)
7406        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7407            let period_months = self.config.global.period_months;
7408            let period_label = {
7409                let y = start_date.year();
7410                let m = start_date.month();
7411                if period_months >= 12 {
7412                    format!("FY{y}")
7413                } else {
7414                    format!("{y}-{m:02}")
7415                }
7416            };
7417            let reporting_date =
7418                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7419
7420            let mut stock_comp_gen =
7421                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7422            let stock_snap = stock_comp_gen.generate(
7423                company_code,
7424                &employee_ids,
7425                start_date,
7426                &period_label,
7427                reporting_date,
7428                currency,
7429            );
7430            snapshot.stock_grant_count = stock_snap.grants.len();
7431            snapshot.stock_grants = stock_snap.grants;
7432            snapshot.stock_comp_expenses = stock_snap.expenses;
7433            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7434        }
7435
7436        stats.payroll_run_count = snapshot.payroll_run_count;
7437        stats.time_entry_count = snapshot.time_entry_count;
7438        stats.expense_report_count = snapshot.expense_report_count;
7439        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7440        stats.pension_plan_count = snapshot.pension_plan_count;
7441        stats.stock_grant_count = snapshot.stock_grant_count;
7442
7443        info!(
7444            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7445            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7446            snapshot.time_entry_count, snapshot.expense_report_count,
7447            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7448            snapshot.stock_grant_count
7449        );
7450        self.check_resources_with_log("post-hr")?;
7451
7452        Ok(snapshot)
7453    }
7454
7455    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7456    fn phase_accounting_standards(
7457        &mut self,
7458        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7459        journal_entries: &[JournalEntry],
7460        stats: &mut EnhancedGenerationStatistics,
7461    ) -> SynthResult<AccountingStandardsSnapshot> {
7462        if !self.phase_config.generate_accounting_standards {
7463            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7464            return Ok(AccountingStandardsSnapshot::default());
7465        }
7466        info!("Phase 17: Generating Accounting Standards Data");
7467
7468        let seed = self.seed;
7469        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7470            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7471        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7472        let company_code = self
7473            .config
7474            .companies
7475            .first()
7476            .map(|c| c.code.as_str())
7477            .unwrap_or("1000");
7478        let currency = self
7479            .config
7480            .companies
7481            .first()
7482            .map(|c| c.currency.as_str())
7483            .unwrap_or("USD");
7484
7485        // Convert config framework to standards framework.
7486        // If the user explicitly set a framework in the YAML config, use that.
7487        // Otherwise, fall back to the country pack's accounting.framework field,
7488        // and if that is also absent or unrecognised, default to US GAAP.
7489        let framework = match self.config.accounting_standards.framework {
7490            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7491                datasynth_standards::framework::AccountingFramework::UsGaap
7492            }
7493            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7494                datasynth_standards::framework::AccountingFramework::Ifrs
7495            }
7496            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7497                datasynth_standards::framework::AccountingFramework::DualReporting
7498            }
7499            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7500                datasynth_standards::framework::AccountingFramework::FrenchGaap
7501            }
7502            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7503                datasynth_standards::framework::AccountingFramework::GermanGaap
7504            }
7505            None => {
7506                // Derive framework from the primary company's country pack
7507                let pack = self.primary_pack();
7508                let pack_fw = pack.accounting.framework.as_str();
7509                match pack_fw {
7510                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7511                    "dual_reporting" => {
7512                        datasynth_standards::framework::AccountingFramework::DualReporting
7513                    }
7514                    "french_gaap" => {
7515                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7516                    }
7517                    "german_gaap" | "hgb" => {
7518                        datasynth_standards::framework::AccountingFramework::GermanGaap
7519                    }
7520                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7521                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7522                }
7523            }
7524        };
7525
7526        let mut snapshot = AccountingStandardsSnapshot::default();
7527
7528        // Revenue recognition
7529        if self.config.accounting_standards.revenue_recognition.enabled {
7530            let customer_ids: Vec<String> = self
7531                .master_data
7532                .customers
7533                .iter()
7534                .map(|c| c.customer_id.clone())
7535                .collect();
7536
7537            if !customer_ids.is_empty() {
7538                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7539                let contracts = rev_gen.generate(
7540                    company_code,
7541                    &customer_ids,
7542                    start_date,
7543                    end_date,
7544                    currency,
7545                    &self.config.accounting_standards.revenue_recognition,
7546                    framework,
7547                );
7548                snapshot.revenue_contract_count = contracts.len();
7549                snapshot.contracts = contracts;
7550            }
7551        }
7552
7553        // Impairment testing
7554        if self.config.accounting_standards.impairment.enabled {
7555            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7556                .master_data
7557                .assets
7558                .iter()
7559                .map(|a| {
7560                    (
7561                        a.asset_id.clone(),
7562                        a.description.clone(),
7563                        a.acquisition_cost,
7564                    )
7565                })
7566                .collect();
7567
7568            if !asset_data.is_empty() {
7569                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7570                let tests = imp_gen.generate(
7571                    company_code,
7572                    &asset_data,
7573                    end_date,
7574                    &self.config.accounting_standards.impairment,
7575                    framework,
7576                );
7577                snapshot.impairment_test_count = tests.len();
7578                snapshot.impairment_tests = tests;
7579            }
7580        }
7581
7582        // Business combinations (IFRS 3 / ASC 805)
7583        if self
7584            .config
7585            .accounting_standards
7586            .business_combinations
7587            .enabled
7588        {
7589            let bc_config = &self.config.accounting_standards.business_combinations;
7590            let framework_str = match framework {
7591                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7592                _ => "US_GAAP",
7593            };
7594            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7595            let bc_snap = bc_gen.generate(
7596                company_code,
7597                currency,
7598                start_date,
7599                end_date,
7600                bc_config.acquisition_count,
7601                framework_str,
7602            );
7603            snapshot.business_combination_count = bc_snap.combinations.len();
7604            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7605            snapshot.business_combinations = bc_snap.combinations;
7606        }
7607
7608        // Expected Credit Loss (IFRS 9 / ASC 326)
7609        if self
7610            .config
7611            .accounting_standards
7612            .expected_credit_loss
7613            .enabled
7614        {
7615            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7616            let framework_str = match framework {
7617                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7618                _ => "ASC_326",
7619            };
7620
7621            // Use AR aging data from the subledger snapshot if available;
7622            // otherwise generate synthetic bucket exposures.
7623            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7624
7625            let mut ecl_gen = EclGenerator::new(seed + 43);
7626
7627            // Collect combined bucket totals across all company AR aging reports.
7628            let bucket_exposures: Vec<(
7629                datasynth_core::models::subledger::ar::AgingBucket,
7630                rust_decimal::Decimal,
7631            )> = if ar_aging_reports.is_empty() {
7632                // No AR aging data — synthesise plausible bucket exposures.
7633                use datasynth_core::models::subledger::ar::AgingBucket;
7634                vec![
7635                    (
7636                        AgingBucket::Current,
7637                        rust_decimal::Decimal::from(500_000_u32),
7638                    ),
7639                    (
7640                        AgingBucket::Days1To30,
7641                        rust_decimal::Decimal::from(120_000_u32),
7642                    ),
7643                    (
7644                        AgingBucket::Days31To60,
7645                        rust_decimal::Decimal::from(45_000_u32),
7646                    ),
7647                    (
7648                        AgingBucket::Days61To90,
7649                        rust_decimal::Decimal::from(15_000_u32),
7650                    ),
7651                    (
7652                        AgingBucket::Over90Days,
7653                        rust_decimal::Decimal::from(8_000_u32),
7654                    ),
7655                ]
7656            } else {
7657                use datasynth_core::models::subledger::ar::AgingBucket;
7658                // Sum bucket totals from all reports.
7659                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7660                    std::collections::HashMap::new();
7661                for report in ar_aging_reports {
7662                    for (bucket, amount) in &report.bucket_totals {
7663                        *totals.entry(*bucket).or_default() += amount;
7664                    }
7665                }
7666                AgingBucket::all()
7667                    .into_iter()
7668                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7669                    .collect()
7670            };
7671
7672            let ecl_snap = ecl_gen.generate(
7673                company_code,
7674                end_date,
7675                &bucket_exposures,
7676                ecl_config,
7677                &period_label,
7678                framework_str,
7679            );
7680
7681            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7682            snapshot.ecl_models = ecl_snap.ecl_models;
7683            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7684            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7685        }
7686
7687        // Provisions and contingencies (IAS 37 / ASC 450)
7688        {
7689            let framework_str = match framework {
7690                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7691                _ => "US_GAAP",
7692            };
7693
7694            // Compute actual revenue from the journal entries generated so far.
7695            // The `journal_entries` slice passed to this phase contains all GL entries
7696            // up to and including Period Close. Fall back to a minimum of 100_000 to
7697            // avoid degenerate zero-based provision amounts on first-period datasets.
7698            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7699                .max(rust_decimal::Decimal::from(100_000_u32));
7700
7701            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7702
7703            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7704            let prov_snap = prov_gen.generate(
7705                company_code,
7706                currency,
7707                revenue_proxy,
7708                end_date,
7709                &period_label,
7710                framework_str,
7711                None, // prior_opening: no carry-forward data in single-period runs
7712            );
7713
7714            snapshot.provision_count = prov_snap.provisions.len();
7715            snapshot.provisions = prov_snap.provisions;
7716            snapshot.provision_movements = prov_snap.movements;
7717            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7718            snapshot.provision_journal_entries = prov_snap.journal_entries;
7719        }
7720
7721        // IAS 21 Functional Currency Translation
7722        // For each company whose functional currency differs from the presentation
7723        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7724        {
7725            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7726
7727            let presentation_currency = self
7728                .config
7729                .global
7730                .presentation_currency
7731                .clone()
7732                .unwrap_or_else(|| self.config.global.group_currency.clone());
7733
7734            // Build a minimal rate table populated with approximate rates from
7735            // the FX model base rates (USD-based) so we can do the translation.
7736            let mut rate_table = FxRateTable::new(&presentation_currency);
7737
7738            // Populate with base rates against USD; if presentation_currency is
7739            // not USD we do a best-effort two-step conversion using the table's
7740            // triangulation support.
7741            let base_rates = base_rates_usd();
7742            for (ccy, rate) in &base_rates {
7743                rate_table.add_rate(FxRate::new(
7744                    ccy,
7745                    "USD",
7746                    RateType::Closing,
7747                    end_date,
7748                    *rate,
7749                    "SYNTHETIC",
7750                ));
7751                // Average rate = 98% of closing (approximation).
7752                // 0.98 = 98/100 = Decimal::new(98, 2)
7753                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7754                rate_table.add_rate(FxRate::new(
7755                    ccy,
7756                    "USD",
7757                    RateType::Average,
7758                    end_date,
7759                    avg,
7760                    "SYNTHETIC",
7761                ));
7762            }
7763
7764            let mut translation_results = Vec::new();
7765            for company in &self.config.companies {
7766                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7767                // to ensure the translation produces non-trivial CTA amounts.
7768                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7769                    .max(rust_decimal::Decimal::from(100_000_u32));
7770
7771                let func_ccy = company
7772                    .functional_currency
7773                    .clone()
7774                    .unwrap_or_else(|| company.currency.clone());
7775
7776                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7777                    &company.code,
7778                    &func_ccy,
7779                    &presentation_currency,
7780                    &ias21_period_label,
7781                    end_date,
7782                    company_revenue,
7783                    &rate_table,
7784                );
7785                translation_results.push(result);
7786            }
7787
7788            snapshot.currency_translation_count = translation_results.len();
7789            snapshot.currency_translation_results = translation_results;
7790        }
7791
7792        stats.revenue_contract_count = snapshot.revenue_contract_count;
7793        stats.impairment_test_count = snapshot.impairment_test_count;
7794        stats.business_combination_count = snapshot.business_combination_count;
7795        stats.ecl_model_count = snapshot.ecl_model_count;
7796        stats.provision_count = snapshot.provision_count;
7797
7798        // ------------------------------------------------------------
7799        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7800        // ------------------------------------------------------------
7801        if self.config.accounting_standards.leases.enabled {
7802            use datasynth_generators::standards::LeaseGenerator;
7803            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7804                .unwrap_or_else(|_| {
7805                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7806                });
7807            let framework =
7808                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7809            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7810            for company in &self.config.companies {
7811                let leases = lease_gen.generate(
7812                    &company.code,
7813                    start_date,
7814                    &self.config.accounting_standards.leases,
7815                    framework,
7816                );
7817                snapshot.lease_count += leases.len();
7818                snapshot.leases.extend(leases);
7819            }
7820            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7821        }
7822
7823        // ------------------------------------------------------------
7824        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7825        // ------------------------------------------------------------
7826        if self.config.accounting_standards.fair_value.enabled {
7827            use datasynth_generators::standards::FairValueGenerator;
7828            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7829                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7830                + chrono::Months::new(self.config.global.period_months);
7831            let framework =
7832                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7833            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7834            for company in &self.config.companies {
7835                let measurements = fv_gen.generate(
7836                    &company.code,
7837                    end_date,
7838                    &company.currency,
7839                    &self.config.accounting_standards.fair_value,
7840                    framework,
7841                );
7842                snapshot.fair_value_measurement_count += measurements.len();
7843                snapshot.fair_value_measurements.extend(measurements);
7844            }
7845            info!(
7846                "v3.3.1 fair value measurements: {}",
7847                snapshot.fair_value_measurement_count
7848            );
7849        }
7850
7851        // ------------------------------------------------------------
7852        // v3.3.1: Framework reconciliation (dual reporting only)
7853        // ------------------------------------------------------------
7854        if self.config.accounting_standards.generate_differences
7855            && matches!(
7856                self.config.accounting_standards.framework,
7857                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7858            )
7859        {
7860            use datasynth_generators::standards::FrameworkReconciliationGenerator;
7861            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7862                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7863                + chrono::Months::new(self.config.global.period_months);
7864            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7865            for company in &self.config.companies {
7866                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7867                snapshot.framework_difference_count += records.len();
7868                snapshot.framework_differences.extend(records);
7869                snapshot.framework_reconciliations.push(reconciliation);
7870            }
7871            info!(
7872                "v3.3.1 framework reconciliation: {} differences across {} entities",
7873                snapshot.framework_difference_count,
7874                snapshot.framework_reconciliations.len()
7875            );
7876        }
7877
7878        info!(
7879            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7880            snapshot.revenue_contract_count,
7881            snapshot.impairment_test_count,
7882            snapshot.business_combination_count,
7883            snapshot.ecl_model_count,
7884            snapshot.provision_count,
7885            snapshot.currency_translation_count,
7886            snapshot.lease_count,
7887            snapshot.fair_value_measurement_count,
7888            snapshot.framework_difference_count,
7889        );
7890        self.check_resources_with_log("post-accounting-standards")?;
7891
7892        Ok(snapshot)
7893    }
7894
7895    /// v3.3.1: helper to resolve the accounting-standards framework enum
7896    /// from config into the `datasynth_standards::framework::AccountingFramework`
7897    /// type expected by standards generators. Falls back to US GAAP.
7898    fn resolve_accounting_framework(
7899        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7900    ) -> datasynth_standards::framework::AccountingFramework {
7901        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7902        use datasynth_standards::framework::AccountingFramework as Fw;
7903        match cfg {
7904            Some(Cfg::Ifrs) => Fw::Ifrs,
7905            Some(Cfg::DualReporting) => Fw::DualReporting,
7906            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7907            Some(Cfg::GermanGaap) => Fw::GermanGaap,
7908            _ => Fw::UsGaap,
7909        }
7910    }
7911
7912    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
7913    fn phase_manufacturing(
7914        &mut self,
7915        stats: &mut EnhancedGenerationStatistics,
7916    ) -> SynthResult<ManufacturingSnapshot> {
7917        if !self.phase_config.generate_manufacturing {
7918            debug!("Phase 18: Skipped (manufacturing generation disabled)");
7919            return Ok(ManufacturingSnapshot::default());
7920        }
7921        info!("Phase 18: Generating Manufacturing Data");
7922
7923        let seed = self.seed;
7924        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7925            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7926        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7927        let company_code = self
7928            .config
7929            .companies
7930            .first()
7931            .map(|c| c.code.as_str())
7932            .unwrap_or("1000");
7933
7934        let material_data: Vec<(String, String)> = self
7935            .master_data
7936            .materials
7937            .iter()
7938            .map(|m| (m.material_id.clone(), m.description.clone()))
7939            .collect();
7940
7941        if material_data.is_empty() {
7942            debug!("Phase 18: Skipped (no materials available)");
7943            return Ok(ManufacturingSnapshot::default());
7944        }
7945
7946        let mut snapshot = ManufacturingSnapshot::default();
7947
7948        // Generate production orders
7949        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7950        // v3.4.3: snap planned / actual / operation dates to business days.
7951        if let Some(ctx) = &self.temporal_context {
7952            prod_gen.set_temporal_context(Arc::clone(ctx));
7953        }
7954        let production_orders = prod_gen.generate(
7955            company_code,
7956            &material_data,
7957            start_date,
7958            end_date,
7959            &self.config.manufacturing.production_orders,
7960            &self.config.manufacturing.costing,
7961            &self.config.manufacturing.routing,
7962        );
7963        snapshot.production_order_count = production_orders.len();
7964
7965        // Generate quality inspections from production orders
7966        let inspection_data: Vec<(String, String, String)> = production_orders
7967            .iter()
7968            .map(|po| {
7969                (
7970                    po.order_id.clone(),
7971                    po.material_id.clone(),
7972                    po.material_description.clone(),
7973                )
7974            })
7975            .collect();
7976
7977        snapshot.production_orders = production_orders;
7978
7979        if !inspection_data.is_empty() {
7980            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7981            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7982            snapshot.quality_inspection_count = inspections.len();
7983            snapshot.quality_inspections = inspections;
7984        }
7985
7986        // Generate cycle counts (one per month)
7987        let storage_locations: Vec<(String, String)> = material_data
7988            .iter()
7989            .enumerate()
7990            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7991            .collect();
7992
7993        let employee_ids: Vec<String> = self
7994            .master_data
7995            .employees
7996            .iter()
7997            .map(|e| e.employee_id.clone())
7998            .collect();
7999        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8000            .with_employee_pool(employee_ids);
8001        let mut cycle_count_total = 0usize;
8002        for month in 0..self.config.global.period_months {
8003            let count_date = start_date + chrono::Months::new(month);
8004            let items_per_count = storage_locations.len().clamp(10, 50);
8005            let cc = cc_gen.generate(
8006                company_code,
8007                &storage_locations,
8008                count_date,
8009                items_per_count,
8010            );
8011            snapshot.cycle_counts.push(cc);
8012            cycle_count_total += 1;
8013        }
8014        snapshot.cycle_count_count = cycle_count_total;
8015
8016        // Generate BOM components
8017        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8018        let bom_components = bom_gen.generate(company_code, &material_data);
8019        snapshot.bom_component_count = bom_components.len();
8020        snapshot.bom_components = bom_components;
8021
8022        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8023        let currency = self
8024            .config
8025            .companies
8026            .first()
8027            .map(|c| c.currency.as_str())
8028            .unwrap_or("USD");
8029        let production_order_ids: Vec<String> = snapshot
8030            .production_orders
8031            .iter()
8032            .map(|po| po.order_id.clone())
8033            .collect();
8034        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8035        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8036            company_code,
8037            &material_data,
8038            start_date,
8039            end_date,
8040            2,
8041            currency,
8042            &production_order_ids,
8043        );
8044        snapshot.inventory_movement_count = inventory_movements.len();
8045        snapshot.inventory_movements = inventory_movements;
8046
8047        stats.production_order_count = snapshot.production_order_count;
8048        stats.quality_inspection_count = snapshot.quality_inspection_count;
8049        stats.cycle_count_count = snapshot.cycle_count_count;
8050        stats.bom_component_count = snapshot.bom_component_count;
8051        stats.inventory_movement_count = snapshot.inventory_movement_count;
8052
8053        info!(
8054            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8055            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8056            snapshot.bom_component_count, snapshot.inventory_movement_count
8057        );
8058        self.check_resources_with_log("post-manufacturing")?;
8059
8060        Ok(snapshot)
8061    }
8062
8063    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8064    fn phase_sales_kpi_budgets(
8065        &mut self,
8066        coa: &Arc<ChartOfAccounts>,
8067        financial_reporting: &FinancialReportingSnapshot,
8068        stats: &mut EnhancedGenerationStatistics,
8069    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8070        if !self.phase_config.generate_sales_kpi_budgets {
8071            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8072            return Ok(SalesKpiBudgetsSnapshot::default());
8073        }
8074        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8075
8076        let seed = self.seed;
8077        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8078            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8079        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8080        let company_code = self
8081            .config
8082            .companies
8083            .first()
8084            .map(|c| c.code.as_str())
8085            .unwrap_or("1000");
8086
8087        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8088
8089        // Sales Quotes
8090        if self.config.sales_quotes.enabled {
8091            let customer_data: Vec<(String, String)> = self
8092                .master_data
8093                .customers
8094                .iter()
8095                .map(|c| (c.customer_id.clone(), c.name.clone()))
8096                .collect();
8097            let material_data: Vec<(String, String)> = self
8098                .master_data
8099                .materials
8100                .iter()
8101                .map(|m| (m.material_id.clone(), m.description.clone()))
8102                .collect();
8103
8104            if !customer_data.is_empty() && !material_data.is_empty() {
8105                let employee_ids: Vec<String> = self
8106                    .master_data
8107                    .employees
8108                    .iter()
8109                    .map(|e| e.employee_id.clone())
8110                    .collect();
8111                let customer_ids: Vec<String> = self
8112                    .master_data
8113                    .customers
8114                    .iter()
8115                    .map(|c| c.customer_id.clone())
8116                    .collect();
8117                let company_currency = self
8118                    .config
8119                    .companies
8120                    .first()
8121                    .map(|c| c.currency.as_str())
8122                    .unwrap_or("USD");
8123
8124                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8125                    .with_pools(employee_ids, customer_ids);
8126                let quotes = quote_gen.generate_with_currency(
8127                    company_code,
8128                    &customer_data,
8129                    &material_data,
8130                    start_date,
8131                    end_date,
8132                    &self.config.sales_quotes,
8133                    company_currency,
8134                );
8135                snapshot.sales_quote_count = quotes.len();
8136                snapshot.sales_quotes = quotes;
8137            }
8138        }
8139
8140        // Management KPIs
8141        if self.config.financial_reporting.management_kpis.enabled {
8142            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8143            let mut kpis = kpi_gen.generate(
8144                company_code,
8145                start_date,
8146                end_date,
8147                &self.config.financial_reporting.management_kpis,
8148            );
8149
8150            // Override financial KPIs with actual data from financial statements
8151            {
8152                use rust_decimal::Decimal;
8153
8154                if let Some(income_stmt) =
8155                    financial_reporting.financial_statements.iter().find(|fs| {
8156                        fs.statement_type == StatementType::IncomeStatement
8157                            && fs.company_code == company_code
8158                    })
8159                {
8160                    // Extract revenue and COGS from income statement line items
8161                    let total_revenue: Decimal = income_stmt
8162                        .line_items
8163                        .iter()
8164                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8165                        .map(|li| li.amount)
8166                        .sum();
8167                    let total_cogs: Decimal = income_stmt
8168                        .line_items
8169                        .iter()
8170                        .filter(|li| {
8171                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8172                                && !li.is_total
8173                        })
8174                        .map(|li| li.amount.abs())
8175                        .sum();
8176                    let total_opex: Decimal = income_stmt
8177                        .line_items
8178                        .iter()
8179                        .filter(|li| {
8180                            li.section.contains("Expense")
8181                                && !li.is_total
8182                                && !li.section.contains("Cost")
8183                        })
8184                        .map(|li| li.amount.abs())
8185                        .sum();
8186
8187                    if total_revenue > Decimal::ZERO {
8188                        let hundred = Decimal::from(100);
8189                        let gross_margin_pct =
8190                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8191                        let operating_income = total_revenue - total_cogs - total_opex;
8192                        let op_margin_pct =
8193                            (operating_income * hundred / total_revenue).round_dp(2);
8194
8195                        // Override gross margin and operating margin KPIs
8196                        for kpi in &mut kpis {
8197                            if kpi.name == "Gross Margin" {
8198                                kpi.value = gross_margin_pct;
8199                            } else if kpi.name == "Operating Margin" {
8200                                kpi.value = op_margin_pct;
8201                            }
8202                        }
8203                    }
8204                }
8205
8206                // Override Current Ratio from balance sheet
8207                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8208                    fs.statement_type == StatementType::BalanceSheet
8209                        && fs.company_code == company_code
8210                }) {
8211                    let current_assets: Decimal = bs
8212                        .line_items
8213                        .iter()
8214                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8215                        .map(|li| li.amount)
8216                        .sum();
8217                    let current_liabilities: Decimal = bs
8218                        .line_items
8219                        .iter()
8220                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8221                        .map(|li| li.amount.abs())
8222                        .sum();
8223
8224                    if current_liabilities > Decimal::ZERO {
8225                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8226                        for kpi in &mut kpis {
8227                            if kpi.name == "Current Ratio" {
8228                                kpi.value = current_ratio;
8229                            }
8230                        }
8231                    }
8232                }
8233            }
8234
8235            snapshot.kpi_count = kpis.len();
8236            snapshot.kpis = kpis;
8237        }
8238
8239        // Budgets
8240        if self.config.financial_reporting.budgets.enabled {
8241            let account_data: Vec<(String, String)> = coa
8242                .accounts
8243                .iter()
8244                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8245                .collect();
8246
8247            if !account_data.is_empty() {
8248                let fiscal_year = start_date.year() as u32;
8249                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8250                let budget = budget_gen.generate(
8251                    company_code,
8252                    fiscal_year,
8253                    &account_data,
8254                    &self.config.financial_reporting.budgets,
8255                );
8256                snapshot.budget_line_count = budget.line_items.len();
8257                snapshot.budgets.push(budget);
8258            }
8259        }
8260
8261        stats.sales_quote_count = snapshot.sales_quote_count;
8262        stats.kpi_count = snapshot.kpi_count;
8263        stats.budget_line_count = snapshot.budget_line_count;
8264
8265        info!(
8266            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8267            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8268        );
8269        self.check_resources_with_log("post-sales-kpi-budgets")?;
8270
8271        Ok(snapshot)
8272    }
8273
8274    /// Compute pre-tax income for a single company from actual journal entries.
8275    ///
8276    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8277    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8278    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8279    /// and the period-close engine so that all three use a consistent definition.
8280    fn compute_pre_tax_income(
8281        company_code: &str,
8282        journal_entries: &[JournalEntry],
8283    ) -> rust_decimal::Decimal {
8284        use datasynth_core::accounts::AccountCategory;
8285        use rust_decimal::Decimal;
8286
8287        let mut total_revenue = Decimal::ZERO;
8288        let mut total_expenses = Decimal::ZERO;
8289
8290        for je in journal_entries {
8291            if je.header.company_code != company_code {
8292                continue;
8293            }
8294            for line in &je.lines {
8295                let cat = AccountCategory::from_account(&line.gl_account);
8296                match cat {
8297                    AccountCategory::Revenue => {
8298                        total_revenue += line.credit_amount - line.debit_amount;
8299                    }
8300                    AccountCategory::Cogs
8301                    | AccountCategory::OperatingExpense
8302                    | AccountCategory::OtherIncomeExpense => {
8303                        total_expenses += line.debit_amount - line.credit_amount;
8304                    }
8305                    _ => {}
8306                }
8307            }
8308        }
8309
8310        let pti = (total_revenue - total_expenses).round_dp(2);
8311        if pti == rust_decimal::Decimal::ZERO {
8312            // No income statement activity yet — fall back to a synthetic value so the
8313            // tax provision generator can still produce meaningful output.
8314            rust_decimal::Decimal::from(1_000_000u32)
8315        } else {
8316            pti
8317        }
8318    }
8319
8320    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8321    fn phase_tax_generation(
8322        &mut self,
8323        document_flows: &DocumentFlowSnapshot,
8324        journal_entries: &[JournalEntry],
8325        stats: &mut EnhancedGenerationStatistics,
8326    ) -> SynthResult<TaxSnapshot> {
8327        if !self.phase_config.generate_tax {
8328            debug!("Phase 20: Skipped (tax generation disabled)");
8329            return Ok(TaxSnapshot::default());
8330        }
8331        info!("Phase 20: Generating Tax Data");
8332
8333        let seed = self.seed;
8334        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8335            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8336        let fiscal_year = start_date.year();
8337        let company_code = self
8338            .config
8339            .companies
8340            .first()
8341            .map(|c| c.code.as_str())
8342            .unwrap_or("1000");
8343
8344        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8345            seed + 370,
8346            self.config.tax.clone(),
8347        );
8348
8349        let pack = self.primary_pack().clone();
8350        let (jurisdictions, codes) =
8351            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8352
8353        // Generate tax provisions for each company
8354        let mut provisions = Vec::new();
8355        if self.config.tax.provisions.enabled {
8356            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8357            for company in &self.config.companies {
8358                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8359                let statutory_rate = rust_decimal::Decimal::new(
8360                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8361                    2,
8362                );
8363                let provision = provision_gen.generate(
8364                    &company.code,
8365                    start_date,
8366                    pre_tax_income,
8367                    statutory_rate,
8368                );
8369                provisions.push(provision);
8370            }
8371        }
8372
8373        // Generate tax lines from document invoices
8374        let mut tax_lines = Vec::new();
8375        if !codes.is_empty() {
8376            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8377                datasynth_generators::TaxLineGeneratorConfig::default(),
8378                codes.clone(),
8379                seed + 372,
8380            );
8381
8382            // Tax lines from vendor invoices (input tax)
8383            // Use the first company's country as buyer country
8384            let buyer_country = self
8385                .config
8386                .companies
8387                .first()
8388                .map(|c| c.country.as_str())
8389                .unwrap_or("US");
8390            for vi in &document_flows.vendor_invoices {
8391                let lines = tax_line_gen.generate_for_document(
8392                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8393                    &vi.header.document_id,
8394                    buyer_country, // seller approx same country
8395                    buyer_country,
8396                    vi.payable_amount,
8397                    vi.header.document_date,
8398                    None,
8399                );
8400                tax_lines.extend(lines);
8401            }
8402
8403            // Tax lines from customer invoices (output tax)
8404            for ci in &document_flows.customer_invoices {
8405                let lines = tax_line_gen.generate_for_document(
8406                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8407                    &ci.header.document_id,
8408                    buyer_country, // seller is the company
8409                    buyer_country,
8410                    ci.total_gross_amount,
8411                    ci.header.document_date,
8412                    None,
8413                );
8414                tax_lines.extend(lines);
8415            }
8416        }
8417
8418        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8419        let deferred_tax = {
8420            let companies: Vec<(&str, &str)> = self
8421                .config
8422                .companies
8423                .iter()
8424                .map(|c| (c.code.as_str(), c.country.as_str()))
8425                .collect();
8426            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8427            deferred_gen.generate(&companies, start_date, journal_entries)
8428        };
8429
8430        // Build a document_id → posting_date map so each tax JE uses its
8431        // source document's date rather than a blanket period-end date.
8432        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8433            std::collections::HashMap::new();
8434        for vi in &document_flows.vendor_invoices {
8435            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8436        }
8437        for ci in &document_flows.customer_invoices {
8438            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8439        }
8440
8441        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8442        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8443        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8444            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8445                &tax_lines,
8446                company_code,
8447                &doc_dates,
8448                end_date,
8449            );
8450            debug!("Generated {} tax posting JEs", jes.len());
8451            jes
8452        } else {
8453            Vec::new()
8454        };
8455
8456        let snapshot = TaxSnapshot {
8457            jurisdiction_count: jurisdictions.len(),
8458            code_count: codes.len(),
8459            jurisdictions,
8460            codes,
8461            tax_provisions: provisions,
8462            tax_lines,
8463            tax_returns: Vec::new(),
8464            withholding_records: Vec::new(),
8465            tax_anomaly_labels: Vec::new(),
8466            deferred_tax,
8467            tax_posting_journal_entries,
8468        };
8469
8470        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8471        stats.tax_code_count = snapshot.code_count;
8472        stats.tax_provision_count = snapshot.tax_provisions.len();
8473        stats.tax_line_count = snapshot.tax_lines.len();
8474
8475        info!(
8476            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8477            snapshot.jurisdiction_count,
8478            snapshot.code_count,
8479            snapshot.tax_provisions.len(),
8480            snapshot.deferred_tax.temporary_differences.len(),
8481            snapshot.deferred_tax.journal_entries.len(),
8482            snapshot.tax_posting_journal_entries.len(),
8483        );
8484        self.check_resources_with_log("post-tax")?;
8485
8486        Ok(snapshot)
8487    }
8488
8489    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8490    fn phase_esg_generation(
8491        &mut self,
8492        document_flows: &DocumentFlowSnapshot,
8493        manufacturing: &ManufacturingSnapshot,
8494        stats: &mut EnhancedGenerationStatistics,
8495    ) -> SynthResult<EsgSnapshot> {
8496        if !self.phase_config.generate_esg {
8497            debug!("Phase 21: Skipped (ESG generation disabled)");
8498            return Ok(EsgSnapshot::default());
8499        }
8500        let degradation = self.check_resources()?;
8501        if degradation >= DegradationLevel::Reduced {
8502            debug!(
8503                "Phase skipped due to resource pressure (degradation: {:?})",
8504                degradation
8505            );
8506            return Ok(EsgSnapshot::default());
8507        }
8508        info!("Phase 21: Generating ESG Data");
8509
8510        let seed = self.seed;
8511        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8512            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8513        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8514        let entity_id = self
8515            .config
8516            .companies
8517            .first()
8518            .map(|c| c.code.as_str())
8519            .unwrap_or("1000");
8520
8521        let esg_cfg = &self.config.esg;
8522        let mut snapshot = EsgSnapshot::default();
8523
8524        // Energy consumption (feeds into scope 1 & 2 emissions)
8525        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8526            esg_cfg.environmental.energy.clone(),
8527            seed + 80,
8528        );
8529        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8530
8531        // Water usage
8532        let facility_count = esg_cfg.environmental.energy.facility_count;
8533        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8534        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8535
8536        // Waste
8537        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8538            seed + 82,
8539            esg_cfg.environmental.waste.diversion_target,
8540            facility_count,
8541        );
8542        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8543
8544        // Emissions (scope 1, 2, 3)
8545        let mut emission_gen =
8546            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8547
8548        // Build EnergyInput from energy_records
8549        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8550            .iter()
8551            .map(|e| datasynth_generators::EnergyInput {
8552                facility_id: e.facility_id.clone(),
8553                energy_type: match e.energy_source {
8554                    EnergySourceType::NaturalGas => {
8555                        datasynth_generators::EnergyInputType::NaturalGas
8556                    }
8557                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8558                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8559                    _ => datasynth_generators::EnergyInputType::Electricity,
8560                },
8561                consumption_kwh: e.consumption_kwh,
8562                period: e.period,
8563            })
8564            .collect();
8565
8566        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8567        if !manufacturing.production_orders.is_empty() {
8568            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8569                &manufacturing.production_orders,
8570                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8571                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8572            );
8573            if !mfg_energy.is_empty() {
8574                info!(
8575                    "ESG: {} energy inputs derived from {} production orders",
8576                    mfg_energy.len(),
8577                    manufacturing.production_orders.len(),
8578                );
8579                energy_inputs.extend(mfg_energy);
8580            }
8581        }
8582
8583        let mut emissions = Vec::new();
8584        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8585        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8586
8587        // Scope 3: use vendor spend data from actual payments
8588        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8589            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8590            for payment in &document_flows.payments {
8591                if payment.is_vendor {
8592                    *totals
8593                        .entry(payment.business_partner_id.clone())
8594                        .or_default() += payment.amount;
8595                }
8596            }
8597            totals
8598        };
8599        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8600            .master_data
8601            .vendors
8602            .iter()
8603            .map(|v| {
8604                let spend = vendor_payment_totals
8605                    .get(&v.vendor_id)
8606                    .copied()
8607                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8608                datasynth_generators::VendorSpendInput {
8609                    vendor_id: v.vendor_id.clone(),
8610                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8611                    spend,
8612                    country: v.country.clone(),
8613                }
8614            })
8615            .collect();
8616        if !vendor_spend.is_empty() {
8617            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8618                entity_id,
8619                &vendor_spend,
8620                start_date,
8621                end_date,
8622            ));
8623        }
8624
8625        // Business travel & commuting (scope 3)
8626        let headcount = self.master_data.employees.len() as u32;
8627        if headcount > 0 {
8628            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8629            emissions.extend(emission_gen.generate_scope3_business_travel(
8630                entity_id,
8631                travel_spend,
8632                start_date,
8633            ));
8634            emissions
8635                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8636        }
8637
8638        snapshot.emission_count = emissions.len();
8639        snapshot.emissions = emissions;
8640        snapshot.energy = energy_records;
8641
8642        // Social: Workforce diversity, pay equity, safety
8643        let mut workforce_gen =
8644            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8645        let total_headcount = headcount.max(100);
8646        snapshot.diversity =
8647            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8648        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8649
8650        // v2.4: Derive additional workforce diversity metrics from actual employee data
8651        if !self.master_data.employees.is_empty() {
8652            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8653                entity_id,
8654                &self.master_data.employees,
8655                end_date,
8656            );
8657            if !hr_diversity.is_empty() {
8658                info!(
8659                    "ESG: {} diversity metrics derived from {} actual employees",
8660                    hr_diversity.len(),
8661                    self.master_data.employees.len(),
8662                );
8663                snapshot.diversity.extend(hr_diversity);
8664            }
8665        }
8666
8667        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8668            entity_id,
8669            facility_count,
8670            start_date,
8671            end_date,
8672        );
8673
8674        // Compute safety metrics
8675        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8676        let safety_metric = workforce_gen.compute_safety_metrics(
8677            entity_id,
8678            &snapshot.safety_incidents,
8679            total_hours,
8680            start_date,
8681        );
8682        snapshot.safety_metrics = vec![safety_metric];
8683
8684        // Governance
8685        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8686            seed + 85,
8687            esg_cfg.governance.board_size,
8688            esg_cfg.governance.independence_target,
8689        );
8690        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8691
8692        // Supplier ESG assessments
8693        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8694            esg_cfg.supply_chain_esg.clone(),
8695            seed + 86,
8696        );
8697        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8698            .master_data
8699            .vendors
8700            .iter()
8701            .map(|v| datasynth_generators::VendorInput {
8702                vendor_id: v.vendor_id.clone(),
8703                country: v.country.clone(),
8704                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8705                quality_score: None,
8706            })
8707            .collect();
8708        snapshot.supplier_assessments =
8709            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8710
8711        // Disclosures
8712        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8713            seed + 87,
8714            esg_cfg.reporting.clone(),
8715            esg_cfg.climate_scenarios.clone(),
8716        );
8717        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8718        snapshot.disclosures = disclosure_gen.generate_disclosures(
8719            entity_id,
8720            &snapshot.materiality,
8721            start_date,
8722            end_date,
8723        );
8724        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8725        snapshot.disclosure_count = snapshot.disclosures.len();
8726
8727        // Anomaly injection
8728        if esg_cfg.anomaly_rate > 0.0 {
8729            let mut anomaly_injector =
8730                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8731            let mut labels = Vec::new();
8732            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8733            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8734            labels.extend(
8735                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8736            );
8737            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8738            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8739            snapshot.anomaly_labels = labels;
8740        }
8741
8742        stats.esg_emission_count = snapshot.emission_count;
8743        stats.esg_disclosure_count = snapshot.disclosure_count;
8744
8745        info!(
8746            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8747            snapshot.emission_count,
8748            snapshot.disclosure_count,
8749            snapshot.supplier_assessments.len()
8750        );
8751        self.check_resources_with_log("post-esg")?;
8752
8753        Ok(snapshot)
8754    }
8755
8756    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8757    fn phase_treasury_data(
8758        &mut self,
8759        document_flows: &DocumentFlowSnapshot,
8760        subledger: &SubledgerSnapshot,
8761        intercompany: &IntercompanySnapshot,
8762        stats: &mut EnhancedGenerationStatistics,
8763    ) -> SynthResult<TreasurySnapshot> {
8764        if !self.phase_config.generate_treasury {
8765            debug!("Phase 22: Skipped (treasury generation disabled)");
8766            return Ok(TreasurySnapshot::default());
8767        }
8768        let degradation = self.check_resources()?;
8769        if degradation >= DegradationLevel::Reduced {
8770            debug!(
8771                "Phase skipped due to resource pressure (degradation: {:?})",
8772                degradation
8773            );
8774            return Ok(TreasurySnapshot::default());
8775        }
8776        info!("Phase 22: Generating Treasury Data");
8777
8778        let seed = self.seed;
8779        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8780            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8781        let currency = self
8782            .config
8783            .companies
8784            .first()
8785            .map(|c| c.currency.as_str())
8786            .unwrap_or("USD");
8787        let entity_id = self
8788            .config
8789            .companies
8790            .first()
8791            .map(|c| c.code.as_str())
8792            .unwrap_or("1000");
8793
8794        let mut snapshot = TreasurySnapshot::default();
8795
8796        // Generate debt instruments
8797        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8798            self.config.treasury.debt.clone(),
8799            seed + 90,
8800        );
8801        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8802
8803        // Generate hedging instruments (IR swaps for floating-rate debt)
8804        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8805            self.config.treasury.hedging.clone(),
8806            seed + 91,
8807        );
8808        for debt in &snapshot.debt_instruments {
8809            if debt.rate_type == InterestRateType::Variable {
8810                let swap = hedge_gen.generate_ir_swap(
8811                    currency,
8812                    debt.principal,
8813                    debt.origination_date,
8814                    debt.maturity_date,
8815                );
8816                snapshot.hedging_instruments.push(swap);
8817            }
8818        }
8819
8820        // Build FX exposures from foreign-currency payments and generate
8821        // FX forwards + hedge relationship designations via generate() API.
8822        {
8823            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8824            for payment in &document_flows.payments {
8825                if payment.currency != currency {
8826                    let entry = fx_map
8827                        .entry(payment.currency.clone())
8828                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8829                    entry.0 += payment.amount;
8830                    // Use the latest settlement date among grouped payments
8831                    if payment.header.document_date > entry.1 {
8832                        entry.1 = payment.header.document_date;
8833                    }
8834                }
8835            }
8836            if !fx_map.is_empty() {
8837                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8838                    .into_iter()
8839                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8840                        datasynth_generators::treasury::FxExposure {
8841                            currency_pair: format!("{foreign_ccy}/{currency}"),
8842                            foreign_currency: foreign_ccy,
8843                            net_amount,
8844                            settlement_date,
8845                            description: "AP payment FX exposure".to_string(),
8846                        }
8847                    })
8848                    .collect();
8849                let (fx_instruments, fx_relationships) =
8850                    hedge_gen.generate(start_date, &fx_exposures);
8851                snapshot.hedging_instruments.extend(fx_instruments);
8852                snapshot.hedge_relationships.extend(fx_relationships);
8853            }
8854        }
8855
8856        // Inject anomalies if configured
8857        if self.config.treasury.anomaly_rate > 0.0 {
8858            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8859                seed + 92,
8860                self.config.treasury.anomaly_rate,
8861            );
8862            let mut labels = Vec::new();
8863            labels.extend(
8864                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8865            );
8866            snapshot.treasury_anomaly_labels = labels;
8867        }
8868
8869        // Generate cash positions from payment flows
8870        if self.config.treasury.cash_positioning.enabled {
8871            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8872
8873            // AP payments as outflows
8874            for payment in &document_flows.payments {
8875                cash_flows.push(datasynth_generators::treasury::CashFlow {
8876                    date: payment.header.document_date,
8877                    account_id: format!("{entity_id}-MAIN"),
8878                    amount: payment.amount,
8879                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8880                });
8881            }
8882
8883            // Customer receipts (from O2C chains) as inflows
8884            for chain in &document_flows.o2c_chains {
8885                if let Some(ref receipt) = chain.customer_receipt {
8886                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8887                        date: receipt.header.document_date,
8888                        account_id: format!("{entity_id}-MAIN"),
8889                        amount: receipt.amount,
8890                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8891                    });
8892                }
8893                // Remainder receipts (follow-up to partial payments)
8894                for receipt in &chain.remainder_receipts {
8895                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8896                        date: receipt.header.document_date,
8897                        account_id: format!("{entity_id}-MAIN"),
8898                        amount: receipt.amount,
8899                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8900                    });
8901                }
8902            }
8903
8904            if !cash_flows.is_empty() {
8905                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8906                    self.config.treasury.cash_positioning.clone(),
8907                    seed + 93,
8908                );
8909                let account_id = format!("{entity_id}-MAIN");
8910                snapshot.cash_positions = cash_gen.generate(
8911                    entity_id,
8912                    &account_id,
8913                    currency,
8914                    &cash_flows,
8915                    start_date,
8916                    start_date + chrono::Months::new(self.config.global.period_months),
8917                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
8918                );
8919            }
8920        }
8921
8922        // Generate cash forecasts from AR/AP aging
8923        if self.config.treasury.cash_forecasting.enabled {
8924            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8925
8926            // Build AR aging items from subledger AR invoices
8927            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8928                .ar_invoices
8929                .iter()
8930                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8931                .map(|inv| {
8932                    let days_past_due = if inv.due_date < end_date {
8933                        (end_date - inv.due_date).num_days().max(0) as u32
8934                    } else {
8935                        0
8936                    };
8937                    datasynth_generators::treasury::ArAgingItem {
8938                        expected_date: inv.due_date,
8939                        amount: inv.amount_remaining,
8940                        days_past_due,
8941                        document_id: inv.invoice_number.clone(),
8942                    }
8943                })
8944                .collect();
8945
8946            // Build AP aging items from subledger AP invoices
8947            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8948                .ap_invoices
8949                .iter()
8950                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8951                .map(|inv| datasynth_generators::treasury::ApAgingItem {
8952                    payment_date: inv.due_date,
8953                    amount: inv.amount_remaining,
8954                    document_id: inv.invoice_number.clone(),
8955                })
8956                .collect();
8957
8958            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8959                self.config.treasury.cash_forecasting.clone(),
8960                seed + 94,
8961            );
8962            let forecast = forecast_gen.generate(
8963                entity_id,
8964                currency,
8965                end_date,
8966                &ar_items,
8967                &ap_items,
8968                &[], // scheduled disbursements - empty for now
8969            );
8970            snapshot.cash_forecasts.push(forecast);
8971        }
8972
8973        // Generate cash pools and sweeps
8974        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8975            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8976            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8977                self.config.treasury.cash_pooling.clone(),
8978                seed + 95,
8979            );
8980
8981            // Create a pool from available accounts
8982            let account_ids: Vec<String> = snapshot
8983                .cash_positions
8984                .iter()
8985                .map(|cp| cp.bank_account_id.clone())
8986                .collect::<std::collections::HashSet<_>>()
8987                .into_iter()
8988                .collect();
8989
8990            if let Some(pool) =
8991                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8992            {
8993                // Generate sweeps - build participant balances from last cash position per account
8994                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8995                for cp in &snapshot.cash_positions {
8996                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8997                }
8998
8999                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9000                    latest_balances
9001                        .into_iter()
9002                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9003                        .map(
9004                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9005                                account_id: id,
9006                                balance,
9007                            },
9008                        )
9009                        .collect();
9010
9011                let sweeps =
9012                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9013                snapshot.cash_pool_sweeps = sweeps;
9014                snapshot.cash_pools.push(pool);
9015            }
9016        }
9017
9018        // Generate bank guarantees
9019        if self.config.treasury.bank_guarantees.enabled {
9020            let vendor_names: Vec<String> = self
9021                .master_data
9022                .vendors
9023                .iter()
9024                .map(|v| v.name.clone())
9025                .collect();
9026            if !vendor_names.is_empty() {
9027                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9028                    self.config.treasury.bank_guarantees.clone(),
9029                    seed + 96,
9030                );
9031                snapshot.bank_guarantees =
9032                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9033            }
9034        }
9035
9036        // Generate netting runs from intercompany matched pairs
9037        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9038            let entity_ids: Vec<String> = self
9039                .config
9040                .companies
9041                .iter()
9042                .map(|c| c.code.clone())
9043                .collect();
9044            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9045                .matched_pairs
9046                .iter()
9047                .map(|mp| {
9048                    (
9049                        mp.seller_company.clone(),
9050                        mp.buyer_company.clone(),
9051                        mp.amount,
9052                    )
9053                })
9054                .collect();
9055            if entity_ids.len() >= 2 {
9056                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9057                    self.config.treasury.netting.clone(),
9058                    seed + 97,
9059                );
9060                snapshot.netting_runs = netting_gen.generate(
9061                    &entity_ids,
9062                    currency,
9063                    start_date,
9064                    self.config.global.period_months,
9065                    &ic_amounts,
9066                );
9067            }
9068        }
9069
9070        // Generate treasury journal entries from the instruments we just created.
9071        {
9072            use datasynth_generators::treasury::TreasuryAccounting;
9073
9074            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9075            let mut treasury_jes = Vec::new();
9076
9077            // Debt interest accrual JEs
9078            if !snapshot.debt_instruments.is_empty() {
9079                let debt_jes =
9080                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9081                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9082                treasury_jes.extend(debt_jes);
9083            }
9084
9085            // Hedge mark-to-market JEs
9086            if !snapshot.hedging_instruments.is_empty() {
9087                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9088                    &snapshot.hedging_instruments,
9089                    &snapshot.hedge_relationships,
9090                    end_date,
9091                    entity_id,
9092                );
9093                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9094                treasury_jes.extend(hedge_jes);
9095            }
9096
9097            // Cash pool sweep JEs
9098            if !snapshot.cash_pool_sweeps.is_empty() {
9099                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9100                    &snapshot.cash_pool_sweeps,
9101                    entity_id,
9102                );
9103                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9104                treasury_jes.extend(sweep_jes);
9105            }
9106
9107            if !treasury_jes.is_empty() {
9108                debug!("Total treasury journal entries: {}", treasury_jes.len());
9109            }
9110            snapshot.journal_entries = treasury_jes;
9111        }
9112
9113        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9114        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9115        stats.cash_position_count = snapshot.cash_positions.len();
9116        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9117        stats.cash_pool_count = snapshot.cash_pools.len();
9118
9119        info!(
9120            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9121            snapshot.debt_instruments.len(),
9122            snapshot.hedging_instruments.len(),
9123            snapshot.cash_positions.len(),
9124            snapshot.cash_forecasts.len(),
9125            snapshot.cash_pools.len(),
9126            snapshot.bank_guarantees.len(),
9127            snapshot.netting_runs.len(),
9128            snapshot.journal_entries.len(),
9129        );
9130        self.check_resources_with_log("post-treasury")?;
9131
9132        Ok(snapshot)
9133    }
9134
9135    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9136    fn phase_project_accounting(
9137        &mut self,
9138        document_flows: &DocumentFlowSnapshot,
9139        hr: &HrSnapshot,
9140        stats: &mut EnhancedGenerationStatistics,
9141    ) -> SynthResult<ProjectAccountingSnapshot> {
9142        if !self.phase_config.generate_project_accounting {
9143            debug!("Phase 23: Skipped (project accounting disabled)");
9144            return Ok(ProjectAccountingSnapshot::default());
9145        }
9146        let degradation = self.check_resources()?;
9147        if degradation >= DegradationLevel::Reduced {
9148            debug!(
9149                "Phase skipped due to resource pressure (degradation: {:?})",
9150                degradation
9151            );
9152            return Ok(ProjectAccountingSnapshot::default());
9153        }
9154        info!("Phase 23: Generating Project Accounting Data");
9155
9156        let seed = self.seed;
9157        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9158            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9159        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9160        let company_code = self
9161            .config
9162            .companies
9163            .first()
9164            .map(|c| c.code.as_str())
9165            .unwrap_or("1000");
9166
9167        let mut snapshot = ProjectAccountingSnapshot::default();
9168
9169        // Generate projects with WBS hierarchies
9170        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9171            self.config.project_accounting.clone(),
9172            seed + 95,
9173        );
9174        let pool = project_gen.generate(company_code, start_date, end_date);
9175        snapshot.projects = pool.projects.clone();
9176
9177        // Link source documents to projects for cost allocation
9178        {
9179            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9180                Vec::new();
9181
9182            // Time entries
9183            for te in &hr.time_entries {
9184                let total_hours = te.hours_regular + te.hours_overtime;
9185                if total_hours > 0.0 {
9186                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9187                        id: te.entry_id.clone(),
9188                        entity_id: company_code.to_string(),
9189                        date: te.date,
9190                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9191                            .unwrap_or(rust_decimal::Decimal::ZERO),
9192                        source_type: CostSourceType::TimeEntry,
9193                        hours: Some(
9194                            rust_decimal::Decimal::from_f64_retain(total_hours)
9195                                .unwrap_or(rust_decimal::Decimal::ZERO),
9196                        ),
9197                    });
9198                }
9199            }
9200
9201            // Expense reports
9202            for er in &hr.expense_reports {
9203                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9204                    id: er.report_id.clone(),
9205                    entity_id: company_code.to_string(),
9206                    date: er.submission_date,
9207                    amount: er.total_amount,
9208                    source_type: CostSourceType::ExpenseReport,
9209                    hours: None,
9210                });
9211            }
9212
9213            // Purchase orders
9214            for po in &document_flows.purchase_orders {
9215                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9216                    id: po.header.document_id.clone(),
9217                    entity_id: company_code.to_string(),
9218                    date: po.header.document_date,
9219                    amount: po.total_net_amount,
9220                    source_type: CostSourceType::PurchaseOrder,
9221                    hours: None,
9222                });
9223            }
9224
9225            // Vendor invoices
9226            for vi in &document_flows.vendor_invoices {
9227                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9228                    id: vi.header.document_id.clone(),
9229                    entity_id: company_code.to_string(),
9230                    date: vi.header.document_date,
9231                    amount: vi.payable_amount,
9232                    source_type: CostSourceType::VendorInvoice,
9233                    hours: None,
9234                });
9235            }
9236
9237            if !source_docs.is_empty() && !pool.projects.is_empty() {
9238                let mut cost_gen =
9239                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9240                        self.config.project_accounting.cost_allocation.clone(),
9241                        seed + 99,
9242                    );
9243                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9244            }
9245        }
9246
9247        // Generate change orders
9248        if self.config.project_accounting.change_orders.enabled {
9249            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9250                self.config.project_accounting.change_orders.clone(),
9251                seed + 96,
9252            );
9253            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9254        }
9255
9256        // Generate milestones
9257        if self.config.project_accounting.milestones.enabled {
9258            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9259                self.config.project_accounting.milestones.clone(),
9260                seed + 97,
9261            );
9262            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9263        }
9264
9265        // Generate earned value metrics (needs cost lines, so only if we have projects)
9266        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9267            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9268                self.config.project_accounting.earned_value.clone(),
9269                seed + 98,
9270            );
9271            snapshot.earned_value_metrics =
9272                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9273        }
9274
9275        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9276        if self.config.project_accounting.revenue_recognition.enabled
9277            && !snapshot.projects.is_empty()
9278            && !snapshot.cost_lines.is_empty()
9279        {
9280            use datasynth_generators::project_accounting::RevenueGenerator;
9281            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9282            let avg_contract_value =
9283                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9284                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9285
9286            // Build contract value tuples: only customer-type projects get revenue recognition.
9287            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9288            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9289                snapshot
9290                    .projects
9291                    .iter()
9292                    .filter(|p| {
9293                        matches!(
9294                            p.project_type,
9295                            datasynth_core::models::ProjectType::Customer
9296                        )
9297                    })
9298                    .map(|p| {
9299                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9300                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9301                        // budget × 1.25 → contract value
9302                        } else {
9303                            avg_contract_value
9304                        };
9305                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9306                        (p.project_id.clone(), cv, etc)
9307                    })
9308                    .collect();
9309
9310            if !contract_values.is_empty() {
9311                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9312                snapshot.revenue_records = rev_gen.generate(
9313                    &snapshot.projects,
9314                    &snapshot.cost_lines,
9315                    &contract_values,
9316                    start_date,
9317                    end_date,
9318                );
9319                debug!(
9320                    "Generated {} revenue recognition records for {} customer projects",
9321                    snapshot.revenue_records.len(),
9322                    contract_values.len()
9323                );
9324            }
9325        }
9326
9327        stats.project_count = snapshot.projects.len();
9328        stats.project_change_order_count = snapshot.change_orders.len();
9329        stats.project_cost_line_count = snapshot.cost_lines.len();
9330
9331        info!(
9332            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9333            snapshot.projects.len(),
9334            snapshot.change_orders.len(),
9335            snapshot.milestones.len(),
9336            snapshot.earned_value_metrics.len()
9337        );
9338        self.check_resources_with_log("post-project-accounting")?;
9339
9340        Ok(snapshot)
9341    }
9342
9343    /// Phase 24: Generate process evolution and organizational events.
9344    fn phase_evolution_events(
9345        &mut self,
9346        stats: &mut EnhancedGenerationStatistics,
9347    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9348        if !self.phase_config.generate_evolution_events {
9349            debug!("Phase 24: Skipped (evolution events disabled)");
9350            return Ok((Vec::new(), Vec::new()));
9351        }
9352        info!("Phase 24: Generating Process Evolution + Organizational Events");
9353
9354        let seed = self.seed;
9355        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9356            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9357        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9358
9359        // Process evolution events
9360        let mut proc_gen =
9361            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9362                seed + 100,
9363            );
9364        let process_events = proc_gen.generate_events(start_date, end_date);
9365
9366        // Organizational events
9367        let company_codes: Vec<String> = self
9368            .config
9369            .companies
9370            .iter()
9371            .map(|c| c.code.clone())
9372            .collect();
9373        let mut org_gen =
9374            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9375                seed + 101,
9376            );
9377        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9378
9379        stats.process_evolution_event_count = process_events.len();
9380        stats.organizational_event_count = org_events.len();
9381
9382        info!(
9383            "Evolution events generated: {} process evolution, {} organizational",
9384            process_events.len(),
9385            org_events.len()
9386        );
9387        self.check_resources_with_log("post-evolution-events")?;
9388
9389        Ok((process_events, org_events))
9390    }
9391
9392    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9393    /// data recovery, and regulatory changes).
9394    fn phase_disruption_events(
9395        &self,
9396        stats: &mut EnhancedGenerationStatistics,
9397    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9398        if !self.config.organizational_events.enabled {
9399            debug!("Phase 24b: Skipped (organizational events disabled)");
9400            return Ok(Vec::new());
9401        }
9402        info!("Phase 24b: Generating Disruption Events");
9403
9404        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9405            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9406        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9407
9408        let company_codes: Vec<String> = self
9409            .config
9410            .companies
9411            .iter()
9412            .map(|c| c.code.clone())
9413            .collect();
9414
9415        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9416        let events = gen.generate(start_date, end_date, &company_codes);
9417
9418        stats.disruption_event_count = events.len();
9419        info!("Disruption events generated: {} events", events.len());
9420        self.check_resources_with_log("post-disruption-events")?;
9421
9422        Ok(events)
9423    }
9424
9425    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9426    ///
9427    /// Produces paired examples where each pair contains the original clean JE
9428    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9429    /// split transaction). Useful for training anomaly detection models with
9430    /// known ground truth.
9431    fn phase_counterfactuals(
9432        &self,
9433        journal_entries: &[JournalEntry],
9434        stats: &mut EnhancedGenerationStatistics,
9435    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9436        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9437            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9438            return Ok(Vec::new());
9439        }
9440        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9441
9442        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9443
9444        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9445
9446        // Rotating set of specs to produce diverse mutation types
9447        let specs = [
9448            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9449            CounterfactualSpec::ShiftDate { days: -14 },
9450            CounterfactualSpec::SelfApprove,
9451            CounterfactualSpec::SplitTransaction { split_count: 3 },
9452        ];
9453
9454        let pairs: Vec<_> = journal_entries
9455            .iter()
9456            .enumerate()
9457            .map(|(i, je)| {
9458                let spec = &specs[i % specs.len()];
9459                gen.generate(je, spec)
9460            })
9461            .collect();
9462
9463        stats.counterfactual_pair_count = pairs.len();
9464        info!(
9465            "Counterfactual pairs generated: {} pairs from {} journal entries",
9466            pairs.len(),
9467            journal_entries.len()
9468        );
9469        self.check_resources_with_log("post-counterfactuals")?;
9470
9471        Ok(pairs)
9472    }
9473
9474    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9475    ///
9476    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9477    /// fraudulent, then generates probabilistic red flags on all chain documents.
9478    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9479    /// to produce realistic ML training data.
9480    fn phase_red_flags(
9481        &self,
9482        anomaly_labels: &AnomalyLabels,
9483        document_flows: &DocumentFlowSnapshot,
9484        stats: &mut EnhancedGenerationStatistics,
9485    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9486        if !self.config.fraud.enabled {
9487            debug!("Phase 26: Skipped (fraud generation disabled)");
9488            return Ok(Vec::new());
9489        }
9490        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9491
9492        use datasynth_generators::fraud::RedFlagGenerator;
9493
9494        let generator = RedFlagGenerator::new();
9495        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9496
9497        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9498        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9499            .labels
9500            .iter()
9501            .filter(|label| label.anomaly_type.is_intentional())
9502            .map(|label| label.document_id.as_str())
9503            .collect();
9504
9505        let mut flags = Vec::new();
9506
9507        // Iterate P2P chains: use the purchase order document ID as the chain key.
9508        for chain in &document_flows.p2p_chains {
9509            let doc_id = &chain.purchase_order.header.document_id;
9510            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9511            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9512        }
9513
9514        // Iterate O2C chains: use the sales order document ID as the chain key.
9515        for chain in &document_flows.o2c_chains {
9516            let doc_id = &chain.sales_order.header.document_id;
9517            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9518            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9519        }
9520
9521        stats.red_flag_count = flags.len();
9522        info!(
9523            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9524            flags.len(),
9525            document_flows.p2p_chains.len(),
9526            document_flows.o2c_chains.len(),
9527            fraud_doc_ids.len()
9528        );
9529        self.check_resources_with_log("post-red-flags")?;
9530
9531        Ok(flags)
9532    }
9533
9534    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9535    ///
9536    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9537    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9538    /// advance them over the simulation period.
9539    fn phase_collusion_rings(
9540        &mut self,
9541        stats: &mut EnhancedGenerationStatistics,
9542    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9543        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9544            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9545            return Ok(Vec::new());
9546        }
9547        info!("Phase 26b: Generating Collusion Rings");
9548
9549        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9550            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9551        let months = self.config.global.period_months;
9552
9553        let employee_ids: Vec<String> = self
9554            .master_data
9555            .employees
9556            .iter()
9557            .map(|e| e.employee_id.clone())
9558            .collect();
9559        let vendor_ids: Vec<String> = self
9560            .master_data
9561            .vendors
9562            .iter()
9563            .map(|v| v.vendor_id.clone())
9564            .collect();
9565
9566        let mut generator =
9567            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9568        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9569
9570        stats.collusion_ring_count = rings.len();
9571        info!(
9572            "Collusion rings generated: {} rings, total members: {}",
9573            rings.len(),
9574            rings
9575                .iter()
9576                .map(datasynth_generators::fraud::CollusionRing::size)
9577                .sum::<usize>()
9578        );
9579        self.check_resources_with_log("post-collusion-rings")?;
9580
9581        Ok(rings)
9582    }
9583
9584    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9585    ///
9586    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9587    /// master data changes over time, supporting bi-temporal audit queries.
9588    fn phase_temporal_attributes(
9589        &mut self,
9590        stats: &mut EnhancedGenerationStatistics,
9591    ) -> SynthResult<
9592        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9593    > {
9594        if !self.config.temporal_attributes.enabled {
9595            debug!("Phase 27: Skipped (temporal attributes disabled)");
9596            return Ok(Vec::new());
9597        }
9598        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9599
9600        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9601            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9602
9603        // Build a TemporalAttributeConfig from the user's config.
9604        // Since Phase 27 is already gated on temporal_attributes.enabled,
9605        // default to enabling version chains so users get actual mutations.
9606        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9607            || self.config.temporal_attributes.enabled;
9608        let temporal_config = {
9609            let ta = &self.config.temporal_attributes;
9610            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9611                .enabled(ta.enabled)
9612                .closed_probability(ta.valid_time.closed_probability)
9613                .avg_validity_days(ta.valid_time.avg_validity_days)
9614                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9615                .with_version_chains(if generate_version_chains {
9616                    ta.avg_versions_per_entity
9617                } else {
9618                    1.0
9619                })
9620                .build()
9621        };
9622        // Apply backdating settings if configured
9623        let temporal_config = if self
9624            .config
9625            .temporal_attributes
9626            .transaction_time
9627            .allow_backdating
9628        {
9629            let mut c = temporal_config;
9630            c.transaction_time.allow_backdating = true;
9631            c.transaction_time.backdating_probability = self
9632                .config
9633                .temporal_attributes
9634                .transaction_time
9635                .backdating_probability;
9636            c.transaction_time.max_backdate_days = self
9637                .config
9638                .temporal_attributes
9639                .transaction_time
9640                .max_backdate_days;
9641            c
9642        } else {
9643            temporal_config
9644        };
9645        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9646            temporal_config,
9647            self.seed + 130,
9648            start_date,
9649        );
9650
9651        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9652            self.seed + 130,
9653            datasynth_core::GeneratorType::Vendor,
9654        );
9655
9656        let chains: Vec<_> = self
9657            .master_data
9658            .vendors
9659            .iter()
9660            .map(|vendor| {
9661                let id = uuid_factory.next();
9662                gen.generate_version_chain(vendor.clone(), id)
9663            })
9664            .collect();
9665
9666        stats.temporal_version_chain_count = chains.len();
9667        info!("Temporal version chains generated: {} chains", chains.len());
9668        self.check_resources_with_log("post-temporal-attributes")?;
9669
9670        Ok(chains)
9671    }
9672
9673    /// Phase 28: Build entity relationship graph and cross-process links.
9674    ///
9675    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9676    /// `EntityGraph` from master-data vendor/customer entities and
9677    /// journal-entry-derived transaction summaries.
9678    ///
9679    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9680    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9681    /// generates inventory-movement cross-process links.
9682    fn phase_entity_relationships(
9683        &self,
9684        journal_entries: &[JournalEntry],
9685        document_flows: &DocumentFlowSnapshot,
9686        stats: &mut EnhancedGenerationStatistics,
9687    ) -> SynthResult<(
9688        Option<datasynth_core::models::EntityGraph>,
9689        Vec<datasynth_core::models::CrossProcessLink>,
9690    )> {
9691        use datasynth_generators::relationships::{
9692            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9693            TransactionSummary,
9694        };
9695
9696        let rs_enabled = self.config.relationship_strength.enabled;
9697        let cpl_enabled = self.config.cross_process_links.enabled
9698            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9699
9700        if !rs_enabled && !cpl_enabled {
9701            debug!(
9702                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9703            );
9704            return Ok((None, Vec::new()));
9705        }
9706
9707        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9708
9709        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9710            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9711
9712        let company_code = self
9713            .config
9714            .companies
9715            .first()
9716            .map(|c| c.code.as_str())
9717            .unwrap_or("1000");
9718
9719        // Build the generator with matching config flags
9720        let gen_config = EntityGraphConfig {
9721            enabled: rs_enabled,
9722            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9723                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9724                enable_return_flows: false,
9725                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9726                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9727                // Use higher link rate for small datasets to avoid probabilistic empty results
9728                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9729                    1.0
9730                } else {
9731                    0.30
9732                },
9733                ..Default::default()
9734            },
9735            strength_config: datasynth_generators::relationships::StrengthConfig {
9736                transaction_volume_weight: self
9737                    .config
9738                    .relationship_strength
9739                    .calculation
9740                    .transaction_volume_weight,
9741                transaction_count_weight: self
9742                    .config
9743                    .relationship_strength
9744                    .calculation
9745                    .transaction_count_weight,
9746                duration_weight: self
9747                    .config
9748                    .relationship_strength
9749                    .calculation
9750                    .relationship_duration_weight,
9751                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9752                mutual_connections_weight: self
9753                    .config
9754                    .relationship_strength
9755                    .calculation
9756                    .mutual_connections_weight,
9757                recency_half_life_days: self
9758                    .config
9759                    .relationship_strength
9760                    .calculation
9761                    .recency_half_life_days,
9762            },
9763            ..Default::default()
9764        };
9765
9766        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9767
9768        // --- Part 1: Entity Relationship Graph ---
9769        let entity_graph = if rs_enabled {
9770            // Build EntitySummary lists from master data
9771            let vendor_summaries: Vec<EntitySummary> = self
9772                .master_data
9773                .vendors
9774                .iter()
9775                .map(|v| {
9776                    EntitySummary::new(
9777                        &v.vendor_id,
9778                        &v.name,
9779                        datasynth_core::models::GraphEntityType::Vendor,
9780                        start_date,
9781                    )
9782                })
9783                .collect();
9784
9785            let customer_summaries: Vec<EntitySummary> = self
9786                .master_data
9787                .customers
9788                .iter()
9789                .map(|c| {
9790                    EntitySummary::new(
9791                        &c.customer_id,
9792                        &c.name,
9793                        datasynth_core::models::GraphEntityType::Customer,
9794                        start_date,
9795                    )
9796                })
9797                .collect();
9798
9799            // Build transaction summaries from journal entries.
9800            // Key = (company_code, trading_partner) for entries that have a
9801            // trading partner.  This captures intercompany flows and any JE
9802            // whose line items carry a trading_partner reference.
9803            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9804                std::collections::HashMap::new();
9805
9806            for je in journal_entries {
9807                let cc = je.header.company_code.clone();
9808                let posting_date = je.header.posting_date;
9809                for line in &je.lines {
9810                    if let Some(ref tp) = line.trading_partner {
9811                        let amount = if line.debit_amount > line.credit_amount {
9812                            line.debit_amount
9813                        } else {
9814                            line.credit_amount
9815                        };
9816                        let entry = txn_summaries
9817                            .entry((cc.clone(), tp.clone()))
9818                            .or_insert_with(|| TransactionSummary {
9819                                total_volume: rust_decimal::Decimal::ZERO,
9820                                transaction_count: 0,
9821                                first_transaction_date: posting_date,
9822                                last_transaction_date: posting_date,
9823                                related_entities: std::collections::HashSet::new(),
9824                            });
9825                        entry.total_volume += amount;
9826                        entry.transaction_count += 1;
9827                        if posting_date < entry.first_transaction_date {
9828                            entry.first_transaction_date = posting_date;
9829                        }
9830                        if posting_date > entry.last_transaction_date {
9831                            entry.last_transaction_date = posting_date;
9832                        }
9833                        entry.related_entities.insert(cc.clone());
9834                    }
9835                }
9836            }
9837
9838            // Also extract transaction relationships from document flow chains.
9839            // P2P chains: Company → Vendor relationships
9840            for chain in &document_flows.p2p_chains {
9841                let cc = chain.purchase_order.header.company_code.clone();
9842                let vendor_id = chain.purchase_order.vendor_id.clone();
9843                let po_date = chain.purchase_order.header.document_date;
9844                let amount = chain.purchase_order.total_net_amount;
9845
9846                let entry = txn_summaries
9847                    .entry((cc.clone(), vendor_id))
9848                    .or_insert_with(|| TransactionSummary {
9849                        total_volume: rust_decimal::Decimal::ZERO,
9850                        transaction_count: 0,
9851                        first_transaction_date: po_date,
9852                        last_transaction_date: po_date,
9853                        related_entities: std::collections::HashSet::new(),
9854                    });
9855                entry.total_volume += amount;
9856                entry.transaction_count += 1;
9857                if po_date < entry.first_transaction_date {
9858                    entry.first_transaction_date = po_date;
9859                }
9860                if po_date > entry.last_transaction_date {
9861                    entry.last_transaction_date = po_date;
9862                }
9863                entry.related_entities.insert(cc);
9864            }
9865
9866            // O2C chains: Company → Customer relationships
9867            for chain in &document_flows.o2c_chains {
9868                let cc = chain.sales_order.header.company_code.clone();
9869                let customer_id = chain.sales_order.customer_id.clone();
9870                let so_date = chain.sales_order.header.document_date;
9871                let amount = chain.sales_order.total_net_amount;
9872
9873                let entry = txn_summaries
9874                    .entry((cc.clone(), customer_id))
9875                    .or_insert_with(|| TransactionSummary {
9876                        total_volume: rust_decimal::Decimal::ZERO,
9877                        transaction_count: 0,
9878                        first_transaction_date: so_date,
9879                        last_transaction_date: so_date,
9880                        related_entities: std::collections::HashSet::new(),
9881                    });
9882                entry.total_volume += amount;
9883                entry.transaction_count += 1;
9884                if so_date < entry.first_transaction_date {
9885                    entry.first_transaction_date = so_date;
9886                }
9887                if so_date > entry.last_transaction_date {
9888                    entry.last_transaction_date = so_date;
9889                }
9890                entry.related_entities.insert(cc);
9891            }
9892
9893            let as_of_date = journal_entries
9894                .last()
9895                .map(|je| je.header.posting_date)
9896                .unwrap_or(start_date);
9897
9898            let graph = gen.generate_entity_graph(
9899                company_code,
9900                as_of_date,
9901                &vendor_summaries,
9902                &customer_summaries,
9903                &txn_summaries,
9904            );
9905
9906            info!(
9907                "Entity relationship graph: {} nodes, {} edges",
9908                graph.nodes.len(),
9909                graph.edges.len()
9910            );
9911            stats.entity_relationship_node_count = graph.nodes.len();
9912            stats.entity_relationship_edge_count = graph.edges.len();
9913            Some(graph)
9914        } else {
9915            None
9916        };
9917
9918        // --- Part 2: Cross-Process Links ---
9919        let cross_process_links = if cpl_enabled {
9920            // Build GoodsReceiptRef from P2P chains
9921            let gr_refs: Vec<GoodsReceiptRef> = document_flows
9922                .p2p_chains
9923                .iter()
9924                .flat_map(|chain| {
9925                    let vendor_id = chain.purchase_order.vendor_id.clone();
9926                    let cc = chain.purchase_order.header.company_code.clone();
9927                    chain.goods_receipts.iter().flat_map(move |gr| {
9928                        gr.items.iter().filter_map({
9929                            let doc_id = gr.header.document_id.clone();
9930                            let v_id = vendor_id.clone();
9931                            let company = cc.clone();
9932                            let receipt_date = gr.header.document_date;
9933                            move |item| {
9934                                item.base
9935                                    .material_id
9936                                    .as_ref()
9937                                    .map(|mat_id| GoodsReceiptRef {
9938                                        document_id: doc_id.clone(),
9939                                        material_id: mat_id.clone(),
9940                                        quantity: item.base.quantity,
9941                                        receipt_date,
9942                                        vendor_id: v_id.clone(),
9943                                        company_code: company.clone(),
9944                                    })
9945                            }
9946                        })
9947                    })
9948                })
9949                .collect();
9950
9951            // Build DeliveryRef from O2C chains
9952            let del_refs: Vec<DeliveryRef> = document_flows
9953                .o2c_chains
9954                .iter()
9955                .flat_map(|chain| {
9956                    let customer_id = chain.sales_order.customer_id.clone();
9957                    let cc = chain.sales_order.header.company_code.clone();
9958                    chain.deliveries.iter().flat_map(move |del| {
9959                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9960                        del.items.iter().filter_map({
9961                            let doc_id = del.header.document_id.clone();
9962                            let c_id = customer_id.clone();
9963                            let company = cc.clone();
9964                            move |item| {
9965                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9966                                    document_id: doc_id.clone(),
9967                                    material_id: mat_id.clone(),
9968                                    quantity: item.base.quantity,
9969                                    delivery_date,
9970                                    customer_id: c_id.clone(),
9971                                    company_code: company.clone(),
9972                                })
9973                            }
9974                        })
9975                    })
9976                })
9977                .collect();
9978
9979            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9980            info!("Cross-process links generated: {} links", links.len());
9981            stats.cross_process_link_count = links.len();
9982            links
9983        } else {
9984            Vec::new()
9985        };
9986
9987        self.check_resources_with_log("post-entity-relationships")?;
9988        Ok((entity_graph, cross_process_links))
9989    }
9990
9991    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9992    fn phase_industry_data(
9993        &self,
9994        stats: &mut EnhancedGenerationStatistics,
9995    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9996        if !self.config.industry_specific.enabled {
9997            return None;
9998        }
9999        info!("Phase 29: Generating industry-specific data");
10000        let output = datasynth_generators::industry::factory::generate_industry_output(
10001            self.config.global.industry,
10002        );
10003        stats.industry_gl_account_count = output.gl_accounts.len();
10004        info!(
10005            "Industry data generated: {} GL accounts for {:?}",
10006            output.gl_accounts.len(),
10007            self.config.global.industry
10008        );
10009        Some(output)
10010    }
10011
10012    /// Phase 3b: Generate opening balances for each company.
10013    fn phase_opening_balances(
10014        &mut self,
10015        coa: &Arc<ChartOfAccounts>,
10016        stats: &mut EnhancedGenerationStatistics,
10017    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10018        if !self.config.balance.generate_opening_balances {
10019            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10020            return Ok(Vec::new());
10021        }
10022        info!("Phase 3b: Generating Opening Balances");
10023
10024        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10025            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10026        let fiscal_year = start_date.year();
10027
10028        let industry = match self.config.global.industry {
10029            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10030            IndustrySector::Retail => IndustryType::Retail,
10031            IndustrySector::FinancialServices => IndustryType::Financial,
10032            IndustrySector::Healthcare => IndustryType::Healthcare,
10033            IndustrySector::Technology => IndustryType::Technology,
10034            _ => IndustryType::Manufacturing,
10035        };
10036
10037        let config = datasynth_generators::OpeningBalanceConfig {
10038            industry,
10039            ..Default::default()
10040        };
10041        let mut gen =
10042            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10043
10044        let mut results = Vec::new();
10045        for company in &self.config.companies {
10046            let spec = OpeningBalanceSpec::new(
10047                company.code.clone(),
10048                start_date,
10049                fiscal_year,
10050                company.currency.clone(),
10051                rust_decimal::Decimal::new(10_000_000, 0),
10052                industry,
10053            );
10054            let ob = gen.generate(&spec, coa, start_date, &company.code);
10055            results.push(ob);
10056        }
10057
10058        stats.opening_balance_count = results.len();
10059        info!("Opening balances generated: {} companies", results.len());
10060        self.check_resources_with_log("post-opening-balances")?;
10061
10062        Ok(results)
10063    }
10064
10065    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10066    fn phase_subledger_reconciliation(
10067        &mut self,
10068        subledger: &SubledgerSnapshot,
10069        entries: &[JournalEntry],
10070        stats: &mut EnhancedGenerationStatistics,
10071    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10072        if !self.config.balance.reconcile_subledgers {
10073            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10074            return Ok(Vec::new());
10075        }
10076        info!("Phase 9b: Reconciling GL to subledger balances");
10077
10078        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10079            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10080            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10081
10082        // Build GL balance map from journal entries using a balance tracker
10083        let tracker_config = BalanceTrackerConfig {
10084            validate_on_each_entry: false,
10085            track_history: false,
10086            fail_on_validation_error: false,
10087            ..Default::default()
10088        };
10089        let recon_currency = self
10090            .config
10091            .companies
10092            .first()
10093            .map(|c| c.currency.clone())
10094            .unwrap_or_else(|| "USD".to_string());
10095        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10096        let validation_errors = tracker.apply_entries(entries);
10097        if !validation_errors.is_empty() {
10098            warn!(
10099                error_count = validation_errors.len(),
10100                "Balance tracker encountered validation errors during subledger reconciliation"
10101            );
10102            for err in &validation_errors {
10103                debug!("Balance validation error: {:?}", err);
10104            }
10105        }
10106
10107        let mut engine = datasynth_generators::ReconciliationEngine::new(
10108            datasynth_generators::ReconciliationConfig::default(),
10109        );
10110
10111        let mut results = Vec::new();
10112        let company_code = self
10113            .config
10114            .companies
10115            .first()
10116            .map(|c| c.code.as_str())
10117            .unwrap_or("1000");
10118
10119        // Reconcile AR
10120        if !subledger.ar_invoices.is_empty() {
10121            let gl_balance = tracker
10122                .get_account_balance(
10123                    company_code,
10124                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10125                )
10126                .map(|b| b.closing_balance)
10127                .unwrap_or_default();
10128            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10129            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10130        }
10131
10132        // Reconcile AP
10133        if !subledger.ap_invoices.is_empty() {
10134            let gl_balance = tracker
10135                .get_account_balance(
10136                    company_code,
10137                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10138                )
10139                .map(|b| b.closing_balance)
10140                .unwrap_or_default();
10141            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10142            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10143        }
10144
10145        // Reconcile FA
10146        if !subledger.fa_records.is_empty() {
10147            let gl_asset_balance = tracker
10148                .get_account_balance(
10149                    company_code,
10150                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10151                )
10152                .map(|b| b.closing_balance)
10153                .unwrap_or_default();
10154            let gl_accum_depr_balance = tracker
10155                .get_account_balance(
10156                    company_code,
10157                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10158                )
10159                .map(|b| b.closing_balance)
10160                .unwrap_or_default();
10161            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10162                subledger.fa_records.iter().collect();
10163            let (asset_recon, depr_recon) = engine.reconcile_fa(
10164                company_code,
10165                end_date,
10166                gl_asset_balance,
10167                gl_accum_depr_balance,
10168                &fa_refs,
10169            );
10170            results.push(asset_recon);
10171            results.push(depr_recon);
10172        }
10173
10174        // Reconcile Inventory
10175        if !subledger.inventory_positions.is_empty() {
10176            let gl_balance = tracker
10177                .get_account_balance(
10178                    company_code,
10179                    datasynth_core::accounts::control_accounts::INVENTORY,
10180                )
10181                .map(|b| b.closing_balance)
10182                .unwrap_or_default();
10183            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10184                subledger.inventory_positions.iter().collect();
10185            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10186        }
10187
10188        stats.subledger_reconciliation_count = results.len();
10189        let passed = results.iter().filter(|r| r.is_balanced()).count();
10190        let failed = results.len() - passed;
10191        info!(
10192            "Subledger reconciliation: {} checks, {} passed, {} failed",
10193            results.len(),
10194            passed,
10195            failed
10196        );
10197        self.check_resources_with_log("post-subledger-reconciliation")?;
10198
10199        Ok(results)
10200    }
10201
10202    /// Generate the chart of accounts.
10203    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10204        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10205
10206        let coa_framework = self.resolve_coa_framework();
10207
10208        let mut gen = ChartOfAccountsGenerator::new(
10209            self.config.chart_of_accounts.complexity,
10210            self.config.global.industry,
10211            self.seed,
10212        )
10213        .with_coa_framework(coa_framework);
10214
10215        let mut built = gen.generate();
10216        // v4.4.1: propagate the accounting framework label from config
10217        // onto the CoA struct so SDK consumers can read it without
10218        // cross-referencing the config (they previously saw null).
10219        if self.config.accounting_standards.enabled {
10220            use datasynth_config::schema::AccountingFrameworkConfig;
10221            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10222                match f {
10223                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10224                    AccountingFrameworkConfig::Ifrs => "ifrs",
10225                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10226                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10227                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10228                }
10229                .to_string()
10230            });
10231        }
10232        let coa = Arc::new(built);
10233        self.coa = Some(Arc::clone(&coa));
10234
10235        if let Some(pb) = pb {
10236            pb.finish_with_message("Chart of Accounts complete");
10237        }
10238
10239        Ok(coa)
10240    }
10241
10242    /// Generate master data entities.
10243    fn generate_master_data(&mut self) -> SynthResult<()> {
10244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10246        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10247
10248        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10249        let pb = self.create_progress_bar(total, "Generating Master Data");
10250
10251        // Resolve country pack once for all companies (uses primary company's country)
10252        let pack = self.primary_pack().clone();
10253
10254        // Capture config values needed inside the parallel closure
10255        let vendors_per_company = self.phase_config.vendors_per_company;
10256        let customers_per_company = self.phase_config.customers_per_company;
10257        let materials_per_company = self.phase_config.materials_per_company;
10258        let assets_per_company = self.phase_config.assets_per_company;
10259        let coa_framework = self.resolve_coa_framework();
10260
10261        // Generate all master data in parallel across companies.
10262        // Each company's data is independent, making this embarrassingly parallel.
10263        let per_company_results: Vec<_> = self
10264            .config
10265            .companies
10266            .par_iter()
10267            .enumerate()
10268            .map(|(i, company)| {
10269                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10270                let pack = pack.clone();
10271
10272                // Generate vendors (offset counter so IDs are globally unique across companies)
10273                let mut vendor_gen = VendorGenerator::new(company_seed);
10274                vendor_gen.set_country_pack(pack.clone());
10275                vendor_gen.set_coa_framework(coa_framework);
10276                vendor_gen.set_counter_offset(i * vendors_per_company);
10277                // v3.2.0+: user-supplied bank names (and future template
10278                // strings) flow through the shared provider.
10279                vendor_gen.set_template_provider(self.template_provider.clone());
10280                // Wire vendor network config when enabled
10281                if self.config.vendor_network.enabled {
10282                    let vn = &self.config.vendor_network;
10283                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10284                        enabled: true,
10285                        depth: vn.depth,
10286                        tier1_count: datasynth_generators::TierCountConfig::new(
10287                            vn.tier1.min,
10288                            vn.tier1.max,
10289                        ),
10290                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10291                            vn.tier2_per_parent.min,
10292                            vn.tier2_per_parent.max,
10293                        ),
10294                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10295                            vn.tier3_per_parent.min,
10296                            vn.tier3_per_parent.max,
10297                        ),
10298                        cluster_distribution: datasynth_generators::ClusterDistribution {
10299                            reliable_strategic: vn.clusters.reliable_strategic,
10300                            standard_operational: vn.clusters.standard_operational,
10301                            transactional: vn.clusters.transactional,
10302                            problematic: vn.clusters.problematic,
10303                        },
10304                        concentration_limits: datasynth_generators::ConcentrationLimits {
10305                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10306                            max_top5: vn.dependencies.top_5_concentration,
10307                        },
10308                        ..datasynth_generators::VendorNetworkConfig::default()
10309                    });
10310                }
10311                let vendor_pool =
10312                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10313
10314                // Generate customers (offset counter so IDs are globally unique across companies)
10315                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10316                customer_gen.set_country_pack(pack.clone());
10317                customer_gen.set_coa_framework(coa_framework);
10318                customer_gen.set_counter_offset(i * customers_per_company);
10319                // v3.2.0+: user-supplied customer names flow through the shared provider.
10320                customer_gen.set_template_provider(self.template_provider.clone());
10321                // Wire customer segmentation config when enabled
10322                if self.config.customer_segmentation.enabled {
10323                    let cs = &self.config.customer_segmentation;
10324                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10325                        enabled: true,
10326                        segment_distribution: datasynth_generators::SegmentDistribution {
10327                            enterprise: cs.value_segments.enterprise.customer_share,
10328                            mid_market: cs.value_segments.mid_market.customer_share,
10329                            smb: cs.value_segments.smb.customer_share,
10330                            consumer: cs.value_segments.consumer.customer_share,
10331                        },
10332                        referral_config: datasynth_generators::ReferralConfig {
10333                            enabled: cs.networks.referrals.enabled,
10334                            referral_rate: cs.networks.referrals.referral_rate,
10335                            ..Default::default()
10336                        },
10337                        hierarchy_config: datasynth_generators::HierarchyConfig {
10338                            enabled: cs.networks.corporate_hierarchies.enabled,
10339                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10340                            ..Default::default()
10341                        },
10342                        ..Default::default()
10343                    };
10344                    customer_gen.set_segmentation_config(seg_cfg);
10345                }
10346                let customer_pool = customer_gen.generate_customer_pool(
10347                    customers_per_company,
10348                    &company.code,
10349                    start_date,
10350                );
10351
10352                // Generate materials (offset counter so IDs are globally unique across companies)
10353                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10354                material_gen.set_country_pack(pack.clone());
10355                material_gen.set_counter_offset(i * materials_per_company);
10356                // v3.2.1+: user-supplied material descriptions flow through shared provider
10357                material_gen.set_template_provider(self.template_provider.clone());
10358                let material_pool = material_gen.generate_material_pool(
10359                    materials_per_company,
10360                    &company.code,
10361                    start_date,
10362                );
10363
10364                // Generate fixed assets
10365                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10366                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10367                asset_gen.set_template_provider(self.template_provider.clone());
10368                let asset_pool = asset_gen.generate_asset_pool(
10369                    assets_per_company,
10370                    &company.code,
10371                    (start_date, end_date),
10372                );
10373
10374                // Generate employees
10375                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10376                employee_gen.set_country_pack(pack);
10377                // v3.2.1+: user-supplied department names flow through shared provider
10378                employee_gen.set_template_provider(self.template_provider.clone());
10379                let employee_pool =
10380                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10381
10382                // Generate employee change history (2-5 events per employee)
10383                let employee_change_history =
10384                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10385
10386                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10387                let employee_ids: Vec<String> = employee_pool
10388                    .employees
10389                    .iter()
10390                    .map(|e| e.employee_id.clone())
10391                    .collect();
10392                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10393                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10394
10395                (
10396                    vendor_pool.vendors,
10397                    customer_pool.customers,
10398                    material_pool.materials,
10399                    asset_pool.assets,
10400                    employee_pool.employees,
10401                    employee_change_history,
10402                    cost_centers,
10403                )
10404            })
10405            .collect();
10406
10407        // Aggregate results from all companies
10408        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10409            per_company_results
10410        {
10411            self.master_data.vendors.extend(vendors);
10412            self.master_data.customers.extend(customers);
10413            self.master_data.materials.extend(materials);
10414            self.master_data.assets.extend(assets);
10415            self.master_data.employees.extend(employees);
10416            self.master_data.cost_centers.extend(cost_centers);
10417            self.master_data
10418                .employee_change_history
10419                .extend(change_history);
10420        }
10421
10422        // v3.3.0: one OrganizationalProfile per company. Cheap to
10423        // generate (derived from industry + company_code) so we
10424        // always emit when master data runs; no separate config flag.
10425        {
10426            use datasynth_core::models::IndustrySector;
10427            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10428            let industry = match self.config.global.industry {
10429                IndustrySector::Manufacturing => "manufacturing",
10430                IndustrySector::Retail => "retail",
10431                IndustrySector::FinancialServices => "financial_services",
10432                IndustrySector::Technology => "technology",
10433                IndustrySector::Healthcare => "healthcare",
10434                _ => "other",
10435            };
10436            for (i, company) in self.config.companies.iter().enumerate() {
10437                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10438                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10439                let profile = profile_gen.generate(&company.code, industry);
10440                self.master_data.organizational_profiles.push(profile);
10441            }
10442        }
10443
10444        if let Some(pb) = &pb {
10445            pb.inc(total);
10446        }
10447        if let Some(pb) = pb {
10448            pb.finish_with_message("Master data generation complete");
10449        }
10450
10451        Ok(())
10452    }
10453
10454    /// Generate document flows (P2P and O2C).
10455    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10456        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10457            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10458
10459        // Generate P2P chains
10460        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10461        let months = (self.config.global.period_months as usize).max(1);
10462        let p2p_count = self
10463            .phase_config
10464            .p2p_chains
10465            .min(self.master_data.vendors.len() * 2 * months);
10466        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10467
10468        // Convert P2P config from schema to generator config
10469        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10470        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10471        p2p_gen.set_country_pack(self.primary_pack().clone());
10472        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10473        // to business days. No-op when `temporal_patterns.business_days.
10474        // enabled = false`.
10475        if let Some(ctx) = &self.temporal_context {
10476            p2p_gen.set_temporal_context(Arc::clone(ctx));
10477        }
10478
10479        for i in 0..p2p_count {
10480            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10481            let materials: Vec<&Material> = self
10482                .master_data
10483                .materials
10484                .iter()
10485                .skip(i % self.master_data.materials.len().max(1))
10486                .take(2.min(self.master_data.materials.len()))
10487                .collect();
10488
10489            if materials.is_empty() {
10490                continue;
10491            }
10492
10493            let company = &self.config.companies[i % self.config.companies.len()];
10494            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10495            let fiscal_period = po_date.month() as u8;
10496            let created_by = if self.master_data.employees.is_empty() {
10497                "SYSTEM"
10498            } else {
10499                self.master_data.employees[i % self.master_data.employees.len()]
10500                    .user_id
10501                    .as_str()
10502            };
10503
10504            let chain = p2p_gen.generate_chain(
10505                &company.code,
10506                vendor,
10507                &materials,
10508                po_date,
10509                start_date.year() as u16,
10510                fiscal_period,
10511                created_by,
10512            );
10513
10514            // Flatten documents
10515            flows.purchase_orders.push(chain.purchase_order.clone());
10516            flows.goods_receipts.extend(chain.goods_receipts.clone());
10517            if let Some(vi) = &chain.vendor_invoice {
10518                flows.vendor_invoices.push(vi.clone());
10519            }
10520            if let Some(payment) = &chain.payment {
10521                flows.payments.push(payment.clone());
10522            }
10523            for remainder in &chain.remainder_payments {
10524                flows.payments.push(remainder.clone());
10525            }
10526            flows.p2p_chains.push(chain);
10527
10528            if let Some(pb) = &pb {
10529                pb.inc(1);
10530            }
10531        }
10532
10533        if let Some(pb) = pb {
10534            pb.finish_with_message("P2P document flows complete");
10535        }
10536
10537        // Generate O2C chains
10538        // Cap at ~2 SOs per customer per month to keep order volume realistic
10539        let o2c_count = self
10540            .phase_config
10541            .o2c_chains
10542            .min(self.master_data.customers.len() * 2 * months);
10543        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10544
10545        // Convert O2C config from schema to generator config
10546        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10547        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10548        o2c_gen.set_country_pack(self.primary_pack().clone());
10549        // v3.4.1: wire temporal context (no-op when business_days disabled).
10550        if let Some(ctx) = &self.temporal_context {
10551            o2c_gen.set_temporal_context(Arc::clone(ctx));
10552        }
10553
10554        for i in 0..o2c_count {
10555            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10556            let materials: Vec<&Material> = self
10557                .master_data
10558                .materials
10559                .iter()
10560                .skip(i % self.master_data.materials.len().max(1))
10561                .take(2.min(self.master_data.materials.len()))
10562                .collect();
10563
10564            if materials.is_empty() {
10565                continue;
10566            }
10567
10568            let company = &self.config.companies[i % self.config.companies.len()];
10569            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10570            let fiscal_period = so_date.month() as u8;
10571            let created_by = if self.master_data.employees.is_empty() {
10572                "SYSTEM"
10573            } else {
10574                self.master_data.employees[i % self.master_data.employees.len()]
10575                    .user_id
10576                    .as_str()
10577            };
10578
10579            let chain = o2c_gen.generate_chain(
10580                &company.code,
10581                customer,
10582                &materials,
10583                so_date,
10584                start_date.year() as u16,
10585                fiscal_period,
10586                created_by,
10587            );
10588
10589            // Flatten documents
10590            flows.sales_orders.push(chain.sales_order.clone());
10591            flows.deliveries.extend(chain.deliveries.clone());
10592            if let Some(ci) = &chain.customer_invoice {
10593                flows.customer_invoices.push(ci.clone());
10594            }
10595            if let Some(receipt) = &chain.customer_receipt {
10596                flows.payments.push(receipt.clone());
10597            }
10598            // Extract remainder receipts (follow-up to partial payments)
10599            for receipt in &chain.remainder_receipts {
10600                flows.payments.push(receipt.clone());
10601            }
10602            flows.o2c_chains.push(chain);
10603
10604            if let Some(pb) = &pb {
10605                pb.inc(1);
10606            }
10607        }
10608
10609        if let Some(pb) = pb {
10610            pb.finish_with_message("O2C document flows complete");
10611        }
10612
10613        // Collect all document cross-references from document headers.
10614        // Each document embeds references to its predecessor(s) via add_reference(); here we
10615        // denormalise them into a flat list for the document_references.json output file.
10616        {
10617            let mut refs = Vec::new();
10618            for doc in &flows.purchase_orders {
10619                refs.extend(doc.header.document_references.iter().cloned());
10620            }
10621            for doc in &flows.goods_receipts {
10622                refs.extend(doc.header.document_references.iter().cloned());
10623            }
10624            for doc in &flows.vendor_invoices {
10625                refs.extend(doc.header.document_references.iter().cloned());
10626            }
10627            for doc in &flows.sales_orders {
10628                refs.extend(doc.header.document_references.iter().cloned());
10629            }
10630            for doc in &flows.deliveries {
10631                refs.extend(doc.header.document_references.iter().cloned());
10632            }
10633            for doc in &flows.customer_invoices {
10634                refs.extend(doc.header.document_references.iter().cloned());
10635            }
10636            for doc in &flows.payments {
10637                refs.extend(doc.header.document_references.iter().cloned());
10638            }
10639            debug!(
10640                "Collected {} document cross-references from document headers",
10641                refs.len()
10642            );
10643            flows.document_references = refs;
10644        }
10645
10646        Ok(())
10647    }
10648
10649    /// Generate journal entries using parallel generation across multiple cores.
10650    fn generate_journal_entries(
10651        &mut self,
10652        coa: &Arc<ChartOfAccounts>,
10653    ) -> SynthResult<Vec<JournalEntry>> {
10654        use datasynth_core::traits::ParallelGenerator;
10655
10656        let total = self.calculate_total_transactions();
10657        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10658
10659        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10660            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10661        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10662
10663        let company_codes: Vec<String> = self
10664            .config
10665            .companies
10666            .iter()
10667            .map(|c| c.code.clone())
10668            .collect();
10669
10670        let mut generator = JournalEntryGenerator::new_with_params(
10671            self.config.transactions.clone(),
10672            Arc::clone(coa),
10673            company_codes,
10674            start_date,
10675            end_date,
10676            self.seed,
10677        );
10678        // Wire the `business_processes.*_weight` config through (phantom knob
10679        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10680        let bp = &self.config.business_processes;
10681        generator.set_business_process_weights(
10682            bp.o2c_weight,
10683            bp.p2p_weight,
10684            bp.r2r_weight,
10685            bp.h2r_weight,
10686            bp.a2r_weight,
10687        );
10688        // v3.4.0: wire advanced distributions (mixture models + industry
10689        // profiles). No-op when `distributions.enabled = false` or
10690        // `distributions.amounts.enabled = false`, preserving v3.3.2
10691        // byte-identical output on default configs.
10692        generator
10693            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10694            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10695        let generator = generator;
10696
10697        // Connect generated master data to ensure JEs reference real entities
10698        // Enable persona-based error injection for realistic human behavior
10699        // Pass fraud configuration for fraud injection
10700        let je_pack = self.primary_pack();
10701
10702        let mut generator = generator
10703            .with_master_data(
10704                &self.master_data.vendors,
10705                &self.master_data.customers,
10706                &self.master_data.materials,
10707            )
10708            .with_country_pack_names(je_pack)
10709            .with_country_pack_temporal(
10710                self.config.temporal_patterns.clone(),
10711                self.seed + 200,
10712                je_pack,
10713            )
10714            .with_persona_errors(true)
10715            .with_fraud_config(self.config.fraud.clone());
10716
10717        // Apply temporal drift if configured. v3.5.2+: also merge
10718        // `distributions.regime_changes` (regime events, economic
10719        // cycles, parameter drifts) into the same DriftConfig so both
10720        // knobs flow through the shared DriftController.
10721        let temporal_enabled = self.config.temporal.enabled;
10722        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10723        if temporal_enabled || regimes_enabled {
10724            let mut drift_config = if temporal_enabled {
10725                self.config.temporal.to_core_config()
10726            } else {
10727                // regime-changes only: start from default (drift OFF),
10728                // apply_to flips `enabled = true`.
10729                datasynth_core::distributions::DriftConfig::default()
10730            };
10731            if regimes_enabled {
10732                self.config
10733                    .distributions
10734                    .regime_changes
10735                    .apply_to(&mut drift_config, start_date);
10736            }
10737            generator = generator.with_drift_config(drift_config, self.seed + 100);
10738        }
10739
10740        // Check memory limit at start
10741        self.check_memory_limit()?;
10742
10743        // Determine parallelism: use available cores, but cap at total entries
10744        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10745
10746        // Use parallel generation for datasets with 10K+ entries.
10747        // Below this threshold, the statistical properties of a single-seeded
10748        // generator (e.g. Benford compliance) are better preserved.
10749        let entries = if total >= 10_000 && num_threads > 1 {
10750            // Parallel path: split the generator across cores and generate in parallel.
10751            // Each sub-generator gets a unique seed for deterministic, independent generation.
10752            let sub_generators = generator.split(num_threads);
10753            let entries_per_thread = total as usize / num_threads;
10754            let remainder = total as usize % num_threads;
10755
10756            let batches: Vec<Vec<JournalEntry>> = sub_generators
10757                .into_par_iter()
10758                .enumerate()
10759                .map(|(i, mut gen)| {
10760                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10761                    gen.generate_batch(count)
10762                })
10763                .collect();
10764
10765            // Merge all batches into a single Vec
10766            let entries = JournalEntryGenerator::merge_results(batches);
10767
10768            if let Some(pb) = &pb {
10769                pb.inc(total);
10770            }
10771            entries
10772        } else {
10773            // Sequential path for small datasets (< 1000 entries)
10774            let mut entries = Vec::with_capacity(total as usize);
10775            for _ in 0..total {
10776                let entry = generator.generate();
10777                entries.push(entry);
10778                if let Some(pb) = &pb {
10779                    pb.inc(1);
10780                }
10781            }
10782            entries
10783        };
10784
10785        if let Some(pb) = pb {
10786            pb.finish_with_message("Journal entries complete");
10787        }
10788
10789        Ok(entries)
10790    }
10791
10792    /// Generate journal entries from document flows.
10793    ///
10794    /// This creates proper GL entries for each document in the P2P and O2C flows,
10795    /// ensuring that document activity is reflected in the general ledger.
10796    fn generate_jes_from_document_flows(
10797        &mut self,
10798        flows: &DocumentFlowSnapshot,
10799    ) -> SynthResult<Vec<JournalEntry>> {
10800        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10801        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10802
10803        let je_config = match self.resolve_coa_framework() {
10804            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10805            CoAFramework::GermanSkr04 => {
10806                let fa = datasynth_core::FrameworkAccounts::german_gaap();
10807                DocumentFlowJeConfig::from(&fa)
10808            }
10809            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10810        };
10811
10812        let populate_fec = je_config.populate_fec_fields;
10813        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10814
10815        // Build auxiliary account lookup from vendor/customer master data so that
10816        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
10817        // PCG "4010001") instead of raw partner IDs.
10818        if populate_fec {
10819            let mut aux_lookup = std::collections::HashMap::new();
10820            for vendor in &self.master_data.vendors {
10821                if let Some(ref aux) = vendor.auxiliary_gl_account {
10822                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10823                }
10824            }
10825            for customer in &self.master_data.customers {
10826                if let Some(ref aux) = customer.auxiliary_gl_account {
10827                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10828                }
10829            }
10830            if !aux_lookup.is_empty() {
10831                generator.set_auxiliary_account_lookup(aux_lookup);
10832            }
10833        }
10834
10835        let mut entries = Vec::new();
10836
10837        // Generate JEs from P2P chains
10838        for chain in &flows.p2p_chains {
10839            let chain_entries = generator.generate_from_p2p_chain(chain);
10840            entries.extend(chain_entries);
10841            if let Some(pb) = &pb {
10842                pb.inc(1);
10843            }
10844        }
10845
10846        // Generate JEs from O2C chains
10847        for chain in &flows.o2c_chains {
10848            let chain_entries = generator.generate_from_o2c_chain(chain);
10849            entries.extend(chain_entries);
10850            if let Some(pb) = &pb {
10851                pb.inc(1);
10852            }
10853        }
10854
10855        if let Some(pb) = pb {
10856            pb.finish_with_message(format!(
10857                "Generated {} JEs from document flows",
10858                entries.len()
10859            ));
10860        }
10861
10862        Ok(entries)
10863    }
10864
10865    /// Generate journal entries from payroll runs.
10866    ///
10867    /// Creates one JE per payroll run:
10868    /// - DR Salaries & Wages (6100) for gross pay
10869    /// - CR Payroll Clearing (9100) for gross pay
10870    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10871        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10872
10873        let mut jes = Vec::with_capacity(payroll_runs.len());
10874
10875        for run in payroll_runs {
10876            let mut je = JournalEntry::new_simple(
10877                format!("JE-PAYROLL-{}", run.payroll_id),
10878                run.company_code.clone(),
10879                run.run_date,
10880                format!("Payroll {}", run.payroll_id),
10881            );
10882
10883            // Debit Salaries & Wages for gross pay
10884            je.add_line(JournalEntryLine {
10885                line_number: 1,
10886                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10887                debit_amount: run.total_gross,
10888                reference: Some(run.payroll_id.clone()),
10889                text: Some(format!(
10890                    "Payroll {} ({} employees)",
10891                    run.payroll_id, run.employee_count
10892                )),
10893                ..Default::default()
10894            });
10895
10896            // Credit Payroll Clearing for gross pay
10897            je.add_line(JournalEntryLine {
10898                line_number: 2,
10899                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10900                credit_amount: run.total_gross,
10901                reference: Some(run.payroll_id.clone()),
10902                ..Default::default()
10903            });
10904
10905            jes.push(je);
10906        }
10907
10908        jes
10909    }
10910
10911    /// Link document flows to subledger records.
10912    ///
10913    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
10914    /// ensuring subledger data is coherent with document flow data.
10915    fn link_document_flows_to_subledgers(
10916        &mut self,
10917        flows: &DocumentFlowSnapshot,
10918    ) -> SynthResult<SubledgerSnapshot> {
10919        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10920        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10921
10922        // Build vendor/customer name maps from master data for realistic subledger names
10923        let vendor_names: std::collections::HashMap<String, String> = self
10924            .master_data
10925            .vendors
10926            .iter()
10927            .map(|v| (v.vendor_id.clone(), v.name.clone()))
10928            .collect();
10929        let customer_names: std::collections::HashMap<String, String> = self
10930            .master_data
10931            .customers
10932            .iter()
10933            .map(|c| (c.customer_id.clone(), c.name.clone()))
10934            .collect();
10935
10936        let mut linker = DocumentFlowLinker::new()
10937            .with_vendor_names(vendor_names)
10938            .with_customer_names(customer_names);
10939
10940        // Convert vendor invoices to AP invoices
10941        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10942        if let Some(pb) = &pb {
10943            pb.inc(flows.vendor_invoices.len() as u64);
10944        }
10945
10946        // Convert customer invoices to AR invoices
10947        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10948        if let Some(pb) = &pb {
10949            pb.inc(flows.customer_invoices.len() as u64);
10950        }
10951
10952        if let Some(pb) = pb {
10953            pb.finish_with_message(format!(
10954                "Linked {} AP and {} AR invoices",
10955                ap_invoices.len(),
10956                ar_invoices.len()
10957            ));
10958        }
10959
10960        Ok(SubledgerSnapshot {
10961            ap_invoices,
10962            ar_invoices,
10963            fa_records: Vec::new(),
10964            inventory_positions: Vec::new(),
10965            inventory_movements: Vec::new(),
10966            // Aging reports are computed after payment settlement in phase_document_flows.
10967            ar_aging_reports: Vec::new(),
10968            ap_aging_reports: Vec::new(),
10969            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
10970            depreciation_runs: Vec::new(),
10971            inventory_valuations: Vec::new(),
10972            // Dunning runs and letters are populated in phase_document_flows after AR aging.
10973            dunning_runs: Vec::new(),
10974            dunning_letters: Vec::new(),
10975        })
10976    }
10977
10978    /// Generate OCPM events from document flows.
10979    ///
10980    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
10981    /// capturing the object-centric process perspective.
10982    #[allow(clippy::too_many_arguments)]
10983    fn generate_ocpm_events(
10984        &mut self,
10985        flows: &DocumentFlowSnapshot,
10986        sourcing: &SourcingSnapshot,
10987        hr: &HrSnapshot,
10988        manufacturing: &ManufacturingSnapshot,
10989        banking: &BankingSnapshot,
10990        audit: &AuditSnapshot,
10991        financial_reporting: &FinancialReportingSnapshot,
10992    ) -> SynthResult<OcpmSnapshot> {
10993        let total_chains = flows.p2p_chains.len()
10994            + flows.o2c_chains.len()
10995            + sourcing.sourcing_projects.len()
10996            + hr.payroll_runs.len()
10997            + manufacturing.production_orders.len()
10998            + banking.customers.len()
10999            + audit.engagements.len()
11000            + financial_reporting.bank_reconciliations.len();
11001        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11002
11003        // Create OCPM event log with standard types
11004        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11005        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11006
11007        // Configure the OCPM generator
11008        let ocpm_config = OcpmGeneratorConfig {
11009            generate_p2p: true,
11010            generate_o2c: true,
11011            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11012            generate_h2r: !hr.payroll_runs.is_empty(),
11013            generate_mfg: !manufacturing.production_orders.is_empty(),
11014            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11015            generate_bank: !banking.customers.is_empty(),
11016            generate_audit: !audit.engagements.is_empty(),
11017            happy_path_rate: 0.75,
11018            exception_path_rate: 0.20,
11019            error_path_rate: 0.05,
11020            add_duration_variability: true,
11021            duration_std_dev_factor: 0.3,
11022        };
11023        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11024        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11025
11026        // Get available users for resource assignment
11027        let available_users: Vec<String> = self
11028            .master_data
11029            .employees
11030            .iter()
11031            .take(20)
11032            .map(|e| e.user_id.clone())
11033            .collect();
11034
11035        // Deterministic base date from config (avoids Utc::now() non-determinism)
11036        let fallback_date =
11037            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11038        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11039            .unwrap_or(fallback_date);
11040        let base_midnight = base_date
11041            .and_hms_opt(0, 0, 0)
11042            .expect("midnight is always valid");
11043        let base_datetime =
11044            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11045
11046        // Helper closure to add case results to event log
11047        let add_result = |event_log: &mut OcpmEventLog,
11048                          result: datasynth_ocpm::CaseGenerationResult| {
11049            for event in result.events {
11050                event_log.add_event(event);
11051            }
11052            for object in result.objects {
11053                event_log.add_object(object);
11054            }
11055            for relationship in result.relationships {
11056                event_log.add_relationship(relationship);
11057            }
11058            for corr in result.correlation_events {
11059                event_log.add_correlation_event(corr);
11060            }
11061            event_log.add_case(result.case_trace);
11062        };
11063
11064        // Generate events from P2P chains
11065        for chain in &flows.p2p_chains {
11066            let po = &chain.purchase_order;
11067            let documents = P2pDocuments::new(
11068                &po.header.document_id,
11069                &po.vendor_id,
11070                &po.header.company_code,
11071                po.total_net_amount,
11072                &po.header.currency,
11073                &ocpm_uuid_factory,
11074            )
11075            .with_goods_receipt(
11076                chain
11077                    .goods_receipts
11078                    .first()
11079                    .map(|gr| gr.header.document_id.as_str())
11080                    .unwrap_or(""),
11081                &ocpm_uuid_factory,
11082            )
11083            .with_invoice(
11084                chain
11085                    .vendor_invoice
11086                    .as_ref()
11087                    .map(|vi| vi.header.document_id.as_str())
11088                    .unwrap_or(""),
11089                &ocpm_uuid_factory,
11090            )
11091            .with_payment(
11092                chain
11093                    .payment
11094                    .as_ref()
11095                    .map(|p| p.header.document_id.as_str())
11096                    .unwrap_or(""),
11097                &ocpm_uuid_factory,
11098            );
11099
11100            let start_time =
11101                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11102            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11103            add_result(&mut event_log, result);
11104
11105            if let Some(pb) = &pb {
11106                pb.inc(1);
11107            }
11108        }
11109
11110        // Generate events from O2C chains
11111        for chain in &flows.o2c_chains {
11112            let so = &chain.sales_order;
11113            let documents = O2cDocuments::new(
11114                &so.header.document_id,
11115                &so.customer_id,
11116                &so.header.company_code,
11117                so.total_net_amount,
11118                &so.header.currency,
11119                &ocpm_uuid_factory,
11120            )
11121            .with_delivery(
11122                chain
11123                    .deliveries
11124                    .first()
11125                    .map(|d| d.header.document_id.as_str())
11126                    .unwrap_or(""),
11127                &ocpm_uuid_factory,
11128            )
11129            .with_invoice(
11130                chain
11131                    .customer_invoice
11132                    .as_ref()
11133                    .map(|ci| ci.header.document_id.as_str())
11134                    .unwrap_or(""),
11135                &ocpm_uuid_factory,
11136            )
11137            .with_receipt(
11138                chain
11139                    .customer_receipt
11140                    .as_ref()
11141                    .map(|r| r.header.document_id.as_str())
11142                    .unwrap_or(""),
11143                &ocpm_uuid_factory,
11144            );
11145
11146            let start_time =
11147                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11148            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11149            add_result(&mut event_log, result);
11150
11151            if let Some(pb) = &pb {
11152                pb.inc(1);
11153            }
11154        }
11155
11156        // Generate events from S2C sourcing projects
11157        for project in &sourcing.sourcing_projects {
11158            // Find vendor from contracts or qualifications
11159            let vendor_id = sourcing
11160                .contracts
11161                .iter()
11162                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11163                .map(|c| c.vendor_id.clone())
11164                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11165                .or_else(|| {
11166                    self.master_data
11167                        .vendors
11168                        .first()
11169                        .map(|v| v.vendor_id.clone())
11170                })
11171                .unwrap_or_else(|| "V000".to_string());
11172            let mut docs = S2cDocuments::new(
11173                &project.project_id,
11174                &vendor_id,
11175                &project.company_code,
11176                project.estimated_annual_spend,
11177                &ocpm_uuid_factory,
11178            );
11179            // Link RFx if available
11180            if let Some(rfx) = sourcing
11181                .rfx_events
11182                .iter()
11183                .find(|r| r.sourcing_project_id == project.project_id)
11184            {
11185                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11186                // Link winning bid (status == Accepted)
11187                if let Some(bid) = sourcing.bids.iter().find(|b| {
11188                    b.rfx_id == rfx.rfx_id
11189                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11190                }) {
11191                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11192                }
11193            }
11194            // Link contract
11195            if let Some(contract) = sourcing
11196                .contracts
11197                .iter()
11198                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11199            {
11200                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11201            }
11202            let start_time = base_datetime - chrono::Duration::days(90);
11203            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11204            add_result(&mut event_log, result);
11205
11206            if let Some(pb) = &pb {
11207                pb.inc(1);
11208            }
11209        }
11210
11211        // Generate events from H2R payroll runs
11212        for run in &hr.payroll_runs {
11213            // Use first matching payroll line item's employee, or fallback
11214            let employee_id = hr
11215                .payroll_line_items
11216                .iter()
11217                .find(|li| li.payroll_id == run.payroll_id)
11218                .map(|li| li.employee_id.as_str())
11219                .unwrap_or("EMP000");
11220            let docs = H2rDocuments::new(
11221                &run.payroll_id,
11222                employee_id,
11223                &run.company_code,
11224                run.total_gross,
11225                &ocpm_uuid_factory,
11226            )
11227            .with_time_entries(
11228                hr.time_entries
11229                    .iter()
11230                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11231                    .take(5)
11232                    .map(|t| t.entry_id.as_str())
11233                    .collect(),
11234            );
11235            let start_time = base_datetime - chrono::Duration::days(30);
11236            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11237            add_result(&mut event_log, result);
11238
11239            if let Some(pb) = &pb {
11240                pb.inc(1);
11241            }
11242        }
11243
11244        // Generate events from MFG production orders
11245        for order in &manufacturing.production_orders {
11246            let mut docs = MfgDocuments::new(
11247                &order.order_id,
11248                &order.material_id,
11249                &order.company_code,
11250                order.planned_quantity,
11251                &ocpm_uuid_factory,
11252            )
11253            .with_operations(
11254                order
11255                    .operations
11256                    .iter()
11257                    .map(|o| format!("OP-{:04}", o.operation_number))
11258                    .collect::<Vec<_>>()
11259                    .iter()
11260                    .map(std::string::String::as_str)
11261                    .collect(),
11262            );
11263            // Link quality inspection if available (via reference_id matching order_id)
11264            if let Some(insp) = manufacturing
11265                .quality_inspections
11266                .iter()
11267                .find(|i| i.reference_id == order.order_id)
11268            {
11269                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11270            }
11271            // Link cycle count if available (match by material_id in items)
11272            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11273                cc.items
11274                    .iter()
11275                    .any(|item| item.material_id == order.material_id)
11276            }) {
11277                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11278            }
11279            let start_time = base_datetime - chrono::Duration::days(60);
11280            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11281            add_result(&mut event_log, result);
11282
11283            if let Some(pb) = &pb {
11284                pb.inc(1);
11285            }
11286        }
11287
11288        // Generate events from Banking customers
11289        for customer in &banking.customers {
11290            let customer_id_str = customer.customer_id.to_string();
11291            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11292            // Link accounts (primary_owner_id matches customer_id)
11293            if let Some(account) = banking
11294                .accounts
11295                .iter()
11296                .find(|a| a.primary_owner_id == customer.customer_id)
11297            {
11298                let account_id_str = account.account_id.to_string();
11299                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11300                // Link transactions for this account
11301                let txn_strs: Vec<String> = banking
11302                    .transactions
11303                    .iter()
11304                    .filter(|t| t.account_id == account.account_id)
11305                    .take(10)
11306                    .map(|t| t.transaction_id.to_string())
11307                    .collect();
11308                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11309                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11310                    .transactions
11311                    .iter()
11312                    .filter(|t| t.account_id == account.account_id)
11313                    .take(10)
11314                    .map(|t| t.amount)
11315                    .collect();
11316                if !txn_ids.is_empty() {
11317                    docs = docs.with_transactions(txn_ids, txn_amounts);
11318                }
11319            }
11320            let start_time = base_datetime - chrono::Duration::days(180);
11321            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11322            add_result(&mut event_log, result);
11323
11324            if let Some(pb) = &pb {
11325                pb.inc(1);
11326            }
11327        }
11328
11329        // Generate events from Audit engagements
11330        for engagement in &audit.engagements {
11331            let engagement_id_str = engagement.engagement_id.to_string();
11332            let docs = AuditDocuments::new(
11333                &engagement_id_str,
11334                &engagement.client_entity_id,
11335                &ocpm_uuid_factory,
11336            )
11337            .with_workpapers(
11338                audit
11339                    .workpapers
11340                    .iter()
11341                    .filter(|w| w.engagement_id == engagement.engagement_id)
11342                    .take(10)
11343                    .map(|w| w.workpaper_id.to_string())
11344                    .collect::<Vec<_>>()
11345                    .iter()
11346                    .map(std::string::String::as_str)
11347                    .collect(),
11348            )
11349            .with_evidence(
11350                audit
11351                    .evidence
11352                    .iter()
11353                    .filter(|e| e.engagement_id == engagement.engagement_id)
11354                    .take(10)
11355                    .map(|e| e.evidence_id.to_string())
11356                    .collect::<Vec<_>>()
11357                    .iter()
11358                    .map(std::string::String::as_str)
11359                    .collect(),
11360            )
11361            .with_risks(
11362                audit
11363                    .risk_assessments
11364                    .iter()
11365                    .filter(|r| r.engagement_id == engagement.engagement_id)
11366                    .take(5)
11367                    .map(|r| r.risk_id.to_string())
11368                    .collect::<Vec<_>>()
11369                    .iter()
11370                    .map(std::string::String::as_str)
11371                    .collect(),
11372            )
11373            .with_findings(
11374                audit
11375                    .findings
11376                    .iter()
11377                    .filter(|f| f.engagement_id == engagement.engagement_id)
11378                    .take(5)
11379                    .map(|f| f.finding_id.to_string())
11380                    .collect::<Vec<_>>()
11381                    .iter()
11382                    .map(std::string::String::as_str)
11383                    .collect(),
11384            )
11385            .with_judgments(
11386                audit
11387                    .judgments
11388                    .iter()
11389                    .filter(|j| j.engagement_id == engagement.engagement_id)
11390                    .take(5)
11391                    .map(|j| j.judgment_id.to_string())
11392                    .collect::<Vec<_>>()
11393                    .iter()
11394                    .map(std::string::String::as_str)
11395                    .collect(),
11396            );
11397            let start_time = base_datetime - chrono::Duration::days(120);
11398            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11399            add_result(&mut event_log, result);
11400
11401            if let Some(pb) = &pb {
11402                pb.inc(1);
11403            }
11404        }
11405
11406        // Generate events from Bank Reconciliations
11407        for recon in &financial_reporting.bank_reconciliations {
11408            let docs = BankReconDocuments::new(
11409                &recon.reconciliation_id,
11410                &recon.bank_account_id,
11411                &recon.company_code,
11412                recon.bank_ending_balance,
11413                &ocpm_uuid_factory,
11414            )
11415            .with_statement_lines(
11416                recon
11417                    .statement_lines
11418                    .iter()
11419                    .take(20)
11420                    .map(|l| l.line_id.as_str())
11421                    .collect(),
11422            )
11423            .with_reconciling_items(
11424                recon
11425                    .reconciling_items
11426                    .iter()
11427                    .take(10)
11428                    .map(|i| i.item_id.as_str())
11429                    .collect(),
11430            );
11431            let start_time = base_datetime - chrono::Duration::days(30);
11432            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11433            add_result(&mut event_log, result);
11434
11435            if let Some(pb) = &pb {
11436                pb.inc(1);
11437            }
11438        }
11439
11440        // Compute process variants
11441        event_log.compute_variants();
11442
11443        let summary = event_log.summary();
11444
11445        if let Some(pb) = pb {
11446            pb.finish_with_message(format!(
11447                "Generated {} OCPM events, {} objects",
11448                summary.event_count, summary.object_count
11449            ));
11450        }
11451
11452        Ok(OcpmSnapshot {
11453            event_count: summary.event_count,
11454            object_count: summary.object_count,
11455            case_count: summary.case_count,
11456            event_log: Some(event_log),
11457        })
11458    }
11459
11460    /// Inject anomalies into journal entries.
11461    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11462        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11463
11464        // Read anomaly rates from config instead of using hardcoded values.
11465        // Priority: anomaly_injection config > fraud config > default 0.02
11466        let total_rate = if self.config.anomaly_injection.enabled {
11467            self.config.anomaly_injection.rates.total_rate
11468        } else if self.config.fraud.enabled {
11469            self.config.fraud.fraud_rate
11470        } else {
11471            0.02
11472        };
11473
11474        let fraud_rate = if self.config.anomaly_injection.enabled {
11475            self.config.anomaly_injection.rates.fraud_rate
11476        } else {
11477            AnomalyRateConfig::default().fraud_rate
11478        };
11479
11480        let error_rate = if self.config.anomaly_injection.enabled {
11481            self.config.anomaly_injection.rates.error_rate
11482        } else {
11483            AnomalyRateConfig::default().error_rate
11484        };
11485
11486        let process_issue_rate = if self.config.anomaly_injection.enabled {
11487            self.config.anomaly_injection.rates.process_rate
11488        } else {
11489            AnomalyRateConfig::default().process_issue_rate
11490        };
11491
11492        let anomaly_config = AnomalyInjectorConfig {
11493            rates: AnomalyRateConfig {
11494                total_rate,
11495                fraud_rate,
11496                error_rate,
11497                process_issue_rate,
11498                ..Default::default()
11499            },
11500            seed: self.seed + 5000,
11501            ..Default::default()
11502        };
11503
11504        let mut injector = AnomalyInjector::new(anomaly_config);
11505        let result = injector.process_entries(entries);
11506
11507        if let Some(pb) = &pb {
11508            pb.inc(entries.len() as u64);
11509            pb.finish_with_message("Anomaly injection complete");
11510        }
11511
11512        let mut by_type = HashMap::new();
11513        for label in &result.labels {
11514            *by_type
11515                .entry(format!("{:?}", label.anomaly_type))
11516                .or_insert(0) += 1;
11517        }
11518
11519        Ok(AnomalyLabels {
11520            labels: result.labels,
11521            summary: Some(result.summary),
11522            by_type,
11523        })
11524    }
11525
11526    /// Validate journal entries using running balance tracker.
11527    ///
11528    /// Applies all entries to the balance tracker and validates:
11529    /// - Each entry is internally balanced (debits = credits)
11530    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11531    ///
11532    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11533    /// excluded from balance validation as they may be intentionally unbalanced.
11534    fn validate_journal_entries(
11535        &mut self,
11536        entries: &[JournalEntry],
11537    ) -> SynthResult<BalanceValidationResult> {
11538        // Filter out entries with human errors as they may be intentionally unbalanced
11539        let clean_entries: Vec<&JournalEntry> = entries
11540            .iter()
11541            .filter(|e| {
11542                e.header
11543                    .header_text
11544                    .as_ref()
11545                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11546                    .unwrap_or(true)
11547            })
11548            .collect();
11549
11550        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11551
11552        // Configure tracker to not fail on errors (collect them instead)
11553        let config = BalanceTrackerConfig {
11554            validate_on_each_entry: false,   // We'll validate at the end
11555            track_history: false,            // Skip history for performance
11556            fail_on_validation_error: false, // Collect errors, don't fail
11557            ..Default::default()
11558        };
11559        let validation_currency = self
11560            .config
11561            .companies
11562            .first()
11563            .map(|c| c.currency.clone())
11564            .unwrap_or_else(|| "USD".to_string());
11565
11566        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11567
11568        // Apply clean entries (without human errors)
11569        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11570        let errors = tracker.apply_entries(&clean_refs);
11571
11572        if let Some(pb) = &pb {
11573            pb.inc(entries.len() as u64);
11574        }
11575
11576        // Check if any entries were unbalanced
11577        // Note: When fail_on_validation_error is false, errors are stored in tracker
11578        let has_unbalanced = tracker
11579            .get_validation_errors()
11580            .iter()
11581            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11582
11583        // Validate balance sheet for each company
11584        // Include both returned errors and collected validation errors
11585        let mut all_errors = errors;
11586        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11587        let company_codes: Vec<String> = self
11588            .config
11589            .companies
11590            .iter()
11591            .map(|c| c.code.clone())
11592            .collect();
11593
11594        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11595            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11596            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11597
11598        for company_code in &company_codes {
11599            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11600                all_errors.push(e);
11601            }
11602        }
11603
11604        // Get statistics after all mutable operations are done
11605        let stats = tracker.get_statistics();
11606
11607        // Determine if balanced overall
11608        let is_balanced = all_errors.is_empty();
11609
11610        if let Some(pb) = pb {
11611            let msg = if is_balanced {
11612                "Balance validation passed"
11613            } else {
11614                "Balance validation completed with errors"
11615            };
11616            pb.finish_with_message(msg);
11617        }
11618
11619        Ok(BalanceValidationResult {
11620            validated: true,
11621            is_balanced,
11622            entries_processed: stats.entries_processed,
11623            total_debits: stats.total_debits,
11624            total_credits: stats.total_credits,
11625            accounts_tracked: stats.accounts_tracked,
11626            companies_tracked: stats.companies_tracked,
11627            validation_errors: all_errors,
11628            has_unbalanced_entries: has_unbalanced,
11629        })
11630    }
11631
11632    /// Inject data quality variations into journal entries.
11633    ///
11634    /// Applies typos, missing values, and format variations to make
11635    /// the synthetic data more realistic for testing data cleaning pipelines.
11636    fn inject_data_quality(
11637        &mut self,
11638        entries: &mut [JournalEntry],
11639    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11640        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11641
11642        // Build config from user-specified schema settings when data_quality is enabled;
11643        // otherwise fall back to the low-rate minimal() preset.
11644        let config = if self.config.data_quality.enabled {
11645            let dq = &self.config.data_quality;
11646            DataQualityConfig {
11647                enable_missing_values: dq.missing_values.enabled,
11648                missing_values: datasynth_generators::MissingValueConfig {
11649                    global_rate: dq.effective_missing_rate(),
11650                    ..Default::default()
11651                },
11652                enable_format_variations: dq.format_variations.enabled,
11653                format_variations: datasynth_generators::FormatVariationConfig {
11654                    date_variation_rate: dq.format_variations.dates.rate,
11655                    amount_variation_rate: dq.format_variations.amounts.rate,
11656                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11657                    ..Default::default()
11658                },
11659                enable_duplicates: dq.duplicates.enabled,
11660                duplicates: datasynth_generators::DuplicateConfig {
11661                    duplicate_rate: dq.effective_duplicate_rate(),
11662                    ..Default::default()
11663                },
11664                enable_typos: dq.typos.enabled,
11665                typos: datasynth_generators::TypoConfig {
11666                    char_error_rate: dq.effective_typo_rate(),
11667                    ..Default::default()
11668                },
11669                enable_encoding_issues: dq.encoding_issues.enabled,
11670                encoding_issue_rate: dq.encoding_issues.rate,
11671                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11672                track_statistics: true,
11673            }
11674        } else {
11675            DataQualityConfig::minimal()
11676        };
11677        let mut injector = DataQualityInjector::new(config);
11678
11679        // Wire country pack for locale-aware format baselines
11680        injector.set_country_pack(self.primary_pack().clone());
11681
11682        // Build context for missing value decisions
11683        let context = HashMap::new();
11684
11685        for entry in entries.iter_mut() {
11686            // Process header_text field (common target for typos)
11687            if let Some(text) = &entry.header.header_text {
11688                let processed = injector.process_text_field(
11689                    "header_text",
11690                    text,
11691                    &entry.header.document_id.to_string(),
11692                    &context,
11693                );
11694                match processed {
11695                    Some(new_text) if new_text != *text => {
11696                        entry.header.header_text = Some(new_text);
11697                    }
11698                    None => {
11699                        entry.header.header_text = None; // Missing value
11700                    }
11701                    _ => {}
11702                }
11703            }
11704
11705            // Process reference field
11706            if let Some(ref_text) = &entry.header.reference {
11707                let processed = injector.process_text_field(
11708                    "reference",
11709                    ref_text,
11710                    &entry.header.document_id.to_string(),
11711                    &context,
11712                );
11713                match processed {
11714                    Some(new_text) if new_text != *ref_text => {
11715                        entry.header.reference = Some(new_text);
11716                    }
11717                    None => {
11718                        entry.header.reference = None;
11719                    }
11720                    _ => {}
11721                }
11722            }
11723
11724            // Process user_persona field (potential for typos in user IDs)
11725            let user_persona = entry.header.user_persona.clone();
11726            if let Some(processed) = injector.process_text_field(
11727                "user_persona",
11728                &user_persona,
11729                &entry.header.document_id.to_string(),
11730                &context,
11731            ) {
11732                if processed != user_persona {
11733                    entry.header.user_persona = processed;
11734                }
11735            }
11736
11737            // Process line items
11738            for line in &mut entry.lines {
11739                // Process line description if present
11740                if let Some(ref text) = line.line_text {
11741                    let processed = injector.process_text_field(
11742                        "line_text",
11743                        text,
11744                        &entry.header.document_id.to_string(),
11745                        &context,
11746                    );
11747                    match processed {
11748                        Some(new_text) if new_text != *text => {
11749                            line.line_text = Some(new_text);
11750                        }
11751                        None => {
11752                            line.line_text = None;
11753                        }
11754                        _ => {}
11755                    }
11756                }
11757
11758                // Process cost_center if present
11759                if let Some(cc) = &line.cost_center {
11760                    let processed = injector.process_text_field(
11761                        "cost_center",
11762                        cc,
11763                        &entry.header.document_id.to_string(),
11764                        &context,
11765                    );
11766                    match processed {
11767                        Some(new_cc) if new_cc != *cc => {
11768                            line.cost_center = Some(new_cc);
11769                        }
11770                        None => {
11771                            line.cost_center = None;
11772                        }
11773                        _ => {}
11774                    }
11775                }
11776            }
11777
11778            if let Some(pb) = &pb {
11779                pb.inc(1);
11780            }
11781        }
11782
11783        if let Some(pb) = pb {
11784            pb.finish_with_message("Data quality injection complete");
11785        }
11786
11787        let quality_issues = injector.issues().to_vec();
11788        Ok((injector.stats().clone(), quality_issues))
11789    }
11790
11791    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
11792    ///
11793    /// Creates complete audit documentation for each company in the configuration,
11794    /// following ISA standards:
11795    /// - ISA 210/220: Engagement acceptance and terms
11796    /// - ISA 230: Audit documentation (workpapers)
11797    /// - ISA 265: Control deficiencies (findings)
11798    /// - ISA 315/330: Risk assessment and response
11799    /// - ISA 500: Audit evidence
11800    /// - ISA 200: Professional judgment
11801    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11802        // Check if FSM-driven audit generation is enabled
11803        let use_fsm = self
11804            .config
11805            .audit
11806            .fsm
11807            .as_ref()
11808            .map(|f| f.enabled)
11809            .unwrap_or(false);
11810
11811        if use_fsm {
11812            return self.generate_audit_data_with_fsm(entries);
11813        }
11814
11815        // --- Legacy (non-FSM) audit generation follows ---
11816        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11817            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11818        let fiscal_year = start_date.year() as u16;
11819        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11820
11821        // Calculate rough total revenue from entries for materiality
11822        let total_revenue: rust_decimal::Decimal = entries
11823            .iter()
11824            .flat_map(|e| e.lines.iter())
11825            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11826            .map(|l| l.credit_amount)
11827            .sum();
11828
11829        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
11830        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11831
11832        let mut snapshot = AuditSnapshot::default();
11833
11834        // Initialize generators
11835        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11836        // v3.3.2: thread the user-facing audit schema config into the
11837        // engagement generator (team size range).
11838        engagement_gen.set_team_config(&self.config.audit.team);
11839
11840        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11841        // v3.3.2: thread workpaper + review workflow schema config into
11842        // the workpaper generator (per-section count range + review
11843        // delay ranges).
11844        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11845        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11846        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11847        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11848        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
11849        finding_gen.set_template_provider(self.template_provider.clone());
11850        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11851        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11852        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11853        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11854        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11855        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11856        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11857
11858        // Get list of accounts from CoA for risk assessment
11859        let accounts: Vec<String> = self
11860            .coa
11861            .as_ref()
11862            .map(|coa| {
11863                coa.get_postable_accounts()
11864                    .iter()
11865                    .map(|acc| acc.account_code().to_string())
11866                    .collect()
11867            })
11868            .unwrap_or_default();
11869
11870        // Generate engagements for each company
11871        for (i, company) in self.config.companies.iter().enumerate() {
11872            // Calculate company-specific revenue (proportional to volume weight)
11873            let company_revenue = total_revenue
11874                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11875
11876            // Generate engagements for this company
11877            let engagements_for_company =
11878                self.phase_config.audit_engagements / self.config.companies.len().max(1);
11879            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11880                1
11881            } else {
11882                0
11883            };
11884
11885            for _eng_idx in 0..(engagements_for_company + extra) {
11886                // v3.3.2: draw engagement type from the user-configured
11887                // distribution instead of always using the default
11888                // (AnnualAudit). Falls back to the default when all
11889                // probabilities are zero.
11890                let eng_type =
11891                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11892
11893                // Generate the engagement
11894                let mut engagement = engagement_gen.generate_engagement(
11895                    &company.code,
11896                    &company.name,
11897                    fiscal_year,
11898                    period_end,
11899                    company_revenue,
11900                    Some(eng_type),
11901                );
11902
11903                // Replace synthetic team IDs with real employee IDs from master data
11904                if !self.master_data.employees.is_empty() {
11905                    let emp_count = self.master_data.employees.len();
11906                    // Use employee IDs deterministically based on engagement index
11907                    let base = (i * 10 + _eng_idx) % emp_count;
11908                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11909                        .employee_id
11910                        .clone();
11911                    engagement.engagement_manager_id = self.master_data.employees
11912                        [(base + 1) % emp_count]
11913                        .employee_id
11914                        .clone();
11915                    let real_team: Vec<String> = engagement
11916                        .team_member_ids
11917                        .iter()
11918                        .enumerate()
11919                        .map(|(j, _)| {
11920                            self.master_data.employees[(base + 2 + j) % emp_count]
11921                                .employee_id
11922                                .clone()
11923                        })
11924                        .collect();
11925                    engagement.team_member_ids = real_team;
11926                }
11927
11928                if let Some(pb) = &pb {
11929                    pb.inc(1);
11930                }
11931
11932                // Get team members from the engagement
11933                let team_members: Vec<String> = engagement.team_member_ids.clone();
11934
11935                // Generate workpapers for the engagement.
11936                // v3.3.2: honor `audit.generate_workpapers` — when false,
11937                // workpapers (and dependent evidence) are skipped while
11938                // the engagement itself, risk assessments, findings, etc.
11939                // still generate normally.
11940                let workpapers = if self.config.audit.generate_workpapers {
11941                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11942                } else {
11943                    Vec::new()
11944                };
11945
11946                for wp in &workpapers {
11947                    if let Some(pb) = &pb {
11948                        pb.inc(1);
11949                    }
11950
11951                    // Generate evidence for each workpaper
11952                    let evidence = evidence_gen.generate_evidence_for_workpaper(
11953                        wp,
11954                        &team_members,
11955                        wp.preparer_date,
11956                    );
11957
11958                    for _ in &evidence {
11959                        if let Some(pb) = &pb {
11960                            pb.inc(1);
11961                        }
11962                    }
11963
11964                    snapshot.evidence.extend(evidence);
11965                }
11966
11967                // Generate risk assessments for the engagement
11968                let risks =
11969                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11970
11971                for _ in &risks {
11972                    if let Some(pb) = &pb {
11973                        pb.inc(1);
11974                    }
11975                }
11976                snapshot.risk_assessments.extend(risks);
11977
11978                // Generate findings for the engagement
11979                let findings = finding_gen.generate_findings_for_engagement(
11980                    &engagement,
11981                    &workpapers,
11982                    &team_members,
11983                );
11984
11985                for _ in &findings {
11986                    if let Some(pb) = &pb {
11987                        pb.inc(1);
11988                    }
11989                }
11990                snapshot.findings.extend(findings);
11991
11992                // Generate professional judgments for the engagement
11993                let judgments =
11994                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11995
11996                for _ in &judgments {
11997                    if let Some(pb) = &pb {
11998                        pb.inc(1);
11999                    }
12000                }
12001                snapshot.judgments.extend(judgments);
12002
12003                // ISA 505: External confirmations and responses
12004                let (confs, resps) =
12005                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12006                snapshot.confirmations.extend(confs);
12007                snapshot.confirmation_responses.extend(resps);
12008
12009                // ISA 330: Procedure steps per workpaper
12010                let team_pairs: Vec<(String, String)> = team_members
12011                    .iter()
12012                    .map(|id| {
12013                        let name = self
12014                            .master_data
12015                            .employees
12016                            .iter()
12017                            .find(|e| e.employee_id == *id)
12018                            .map(|e| e.display_name.clone())
12019                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12020                        (id.clone(), name)
12021                    })
12022                    .collect();
12023                for wp in &workpapers {
12024                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12025                    snapshot.procedure_steps.extend(steps);
12026                }
12027
12028                // ISA 530: Samples per workpaper
12029                for wp in &workpapers {
12030                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12031                        snapshot.samples.push(sample);
12032                    }
12033                }
12034
12035                // ISA 520: Analytical procedures
12036                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12037                snapshot.analytical_results.extend(analytical);
12038
12039                // ISA 610: Internal audit function and reports
12040                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12041                snapshot.ia_functions.push(ia_func);
12042                snapshot.ia_reports.extend(ia_reports);
12043
12044                // ISA 550: Related parties and transactions
12045                let vendor_names: Vec<String> = self
12046                    .master_data
12047                    .vendors
12048                    .iter()
12049                    .map(|v| v.name.clone())
12050                    .collect();
12051                let customer_names: Vec<String> = self
12052                    .master_data
12053                    .customers
12054                    .iter()
12055                    .map(|c| c.name.clone())
12056                    .collect();
12057                let (parties, rp_txns) =
12058                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12059                snapshot.related_parties.extend(parties);
12060                snapshot.related_party_transactions.extend(rp_txns);
12061
12062                // Add workpapers after findings since findings need them
12063                snapshot.workpapers.extend(workpapers);
12064
12065                // Generate audit scope record for this engagement (one per engagement)
12066                {
12067                    let scope_id = format!(
12068                        "SCOPE-{}-{}",
12069                        engagement.engagement_id.simple(),
12070                        &engagement.client_entity_id
12071                    );
12072                    let scope = datasynth_core::models::audit::AuditScope::new(
12073                        scope_id.clone(),
12074                        engagement.engagement_id.to_string(),
12075                        engagement.client_entity_id.clone(),
12076                        engagement.materiality,
12077                    );
12078                    // Wire scope_id back to engagement
12079                    let mut eng = engagement;
12080                    eng.scope_id = Some(scope_id);
12081                    snapshot.audit_scopes.push(scope);
12082                    snapshot.engagements.push(eng);
12083                }
12084            }
12085        }
12086
12087        // ----------------------------------------------------------------
12088        // ISA 600: Group audit — component auditors, plan, instructions, reports
12089        // ----------------------------------------------------------------
12090        if self.config.companies.len() > 1 {
12091            // Use materiality from the first engagement if available, otherwise
12092            // derive a reasonable figure from total revenue.
12093            let group_materiality = snapshot
12094                .engagements
12095                .first()
12096                .map(|e| e.materiality)
12097                .unwrap_or_else(|| {
12098                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12099                    total_revenue * pct
12100                });
12101
12102            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12103            let group_engagement_id = snapshot
12104                .engagements
12105                .first()
12106                .map(|e| e.engagement_id.to_string())
12107                .unwrap_or_else(|| "GROUP-ENG".to_string());
12108
12109            let component_snapshot = component_gen.generate(
12110                &self.config.companies,
12111                group_materiality,
12112                &group_engagement_id,
12113                period_end,
12114            );
12115
12116            snapshot.component_auditors = component_snapshot.component_auditors;
12117            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12118            snapshot.component_instructions = component_snapshot.component_instructions;
12119            snapshot.component_reports = component_snapshot.component_reports;
12120
12121            info!(
12122                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12123                snapshot.component_auditors.len(),
12124                snapshot.component_instructions.len(),
12125                snapshot.component_reports.len(),
12126            );
12127        }
12128
12129        // ----------------------------------------------------------------
12130        // ISA 210: Engagement letters — one per engagement
12131        // ----------------------------------------------------------------
12132        {
12133            let applicable_framework = self
12134                .config
12135                .accounting_standards
12136                .framework
12137                .as_ref()
12138                .map(|f| format!("{f:?}"))
12139                .unwrap_or_else(|| "IFRS".to_string());
12140
12141            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12142            let entity_count = self.config.companies.len();
12143
12144            for engagement in &snapshot.engagements {
12145                let company = self
12146                    .config
12147                    .companies
12148                    .iter()
12149                    .find(|c| c.code == engagement.client_entity_id);
12150                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12151                let letter_date = engagement.planning_start;
12152                let letter = letter_gen.generate(
12153                    &engagement.engagement_id.to_string(),
12154                    &engagement.client_name,
12155                    entity_count,
12156                    engagement.period_end_date,
12157                    currency,
12158                    &applicable_framework,
12159                    letter_date,
12160                );
12161                snapshot.engagement_letters.push(letter);
12162            }
12163
12164            info!(
12165                "ISA 210 engagement letters: {} generated",
12166                snapshot.engagement_letters.len()
12167            );
12168        }
12169
12170        // ----------------------------------------------------------------
12171        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12172        // ----------------------------------------------------------------
12173        if self.phase_config.generate_legal_documents {
12174            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12175            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12176            for engagement in &snapshot.engagements {
12177                // Build an employee name list for signatory drawing —
12178                // prefer employees from the engaged entity, fall back to
12179                // all employees.
12180                let employee_names: Vec<String> = self
12181                    .master_data
12182                    .employees
12183                    .iter()
12184                    .filter(|e| e.company_code == engagement.client_entity_id)
12185                    .map(|e| e.display_name.clone())
12186                    .collect();
12187                let names_to_use = if !employee_names.is_empty() {
12188                    employee_names
12189                } else {
12190                    self.master_data
12191                        .employees
12192                        .iter()
12193                        .take(10)
12194                        .map(|e| e.display_name.clone())
12195                        .collect()
12196                };
12197                let docs = legal_gen.generate(
12198                    &engagement.client_entity_id,
12199                    engagement.fiscal_year as i32,
12200                    &names_to_use,
12201                );
12202                snapshot.legal_documents.extend(docs);
12203            }
12204            info!(
12205                "v3.3.0 legal documents: {} emitted across {} engagements",
12206                snapshot.legal_documents.len(),
12207                snapshot.engagements.len()
12208            );
12209        }
12210
12211        // ----------------------------------------------------------------
12212        // v3.3.0: IT general controls — access logs + change records
12213        //
12214        // `ItControlsGenerator` runs one pass per company (not per
12215        // engagement) so employee sets and system catalogs stay
12216        // coherent. We derive the period from the earliest engagement's
12217        // planning_start through the latest engagement's period_end_date
12218        // for each company.
12219        // ----------------------------------------------------------------
12220        if self.phase_config.generate_it_controls {
12221            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12222            use std::collections::HashMap;
12223            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12224
12225            // Group engagements by company to produce one IT-controls
12226            // window per entity.
12227            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12228                HashMap::new();
12229            for engagement in &snapshot.engagements {
12230                let entry = by_company
12231                    .entry(engagement.client_entity_id.clone())
12232                    .or_insert((engagement.planning_start, engagement.period_end_date));
12233                if engagement.planning_start < entry.0 {
12234                    entry.0 = engagement.planning_start;
12235                }
12236                if engagement.period_end_date > entry.1 {
12237                    entry.1 = engagement.period_end_date;
12238                }
12239            }
12240
12241            // Standard system catalog — populated from known ERP / app
12242            // names. Keeps the generator's data shape stable when the
12243            // user hasn't configured IT-system naming separately.
12244            let systems: Vec<String> = vec![
12245                "SAP ECC",
12246                "SAP S/4 HANA",
12247                "Oracle EBS",
12248                "Workday",
12249                "NetSuite",
12250                "Active Directory",
12251                "SharePoint",
12252                "Salesforce",
12253                "ServiceNow",
12254                "Jira",
12255                "GitHub Enterprise",
12256                "AWS Console",
12257                "Okta",
12258            ]
12259            .into_iter()
12260            .map(String::from)
12261            .collect();
12262
12263            for (company_code, (start, end)) in by_company {
12264                let emps: Vec<(String, String)> = self
12265                    .master_data
12266                    .employees
12267                    .iter()
12268                    .filter(|e| e.company_code == company_code)
12269                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12270                    .collect();
12271                if emps.is_empty() {
12272                    continue;
12273                }
12274                // Compute period in months, rounded up to the nearest
12275                // whole month (min 1).
12276                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12277                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12278                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12279                snapshot.it_controls_access_logs.extend(access_logs);
12280                snapshot.it_controls_change_records.extend(change_records);
12281            }
12282
12283            info!(
12284                "v3.3.0 IT controls: {} access logs, {} change records",
12285                snapshot.it_controls_access_logs.len(),
12286                snapshot.it_controls_change_records.len()
12287            );
12288        }
12289
12290        // ----------------------------------------------------------------
12291        // ISA 560 / IAS 10: Subsequent events
12292        // ----------------------------------------------------------------
12293        {
12294            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12295            let entity_codes: Vec<String> = self
12296                .config
12297                .companies
12298                .iter()
12299                .map(|c| c.code.clone())
12300                .collect();
12301            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12302            info!(
12303                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12304                subsequent.len(),
12305                subsequent
12306                    .iter()
12307                    .filter(|e| matches!(
12308                        e.classification,
12309                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12310                    ))
12311                    .count(),
12312                subsequent
12313                    .iter()
12314                    .filter(|e| matches!(
12315                        e.classification,
12316                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12317                    ))
12318                    .count(),
12319            );
12320            snapshot.subsequent_events = subsequent;
12321        }
12322
12323        // ----------------------------------------------------------------
12324        // ISA 402: Service organization controls
12325        // ----------------------------------------------------------------
12326        {
12327            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12328            let entity_codes: Vec<String> = self
12329                .config
12330                .companies
12331                .iter()
12332                .map(|c| c.code.clone())
12333                .collect();
12334            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12335            info!(
12336                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12337                soc_snapshot.service_organizations.len(),
12338                soc_snapshot.soc_reports.len(),
12339                soc_snapshot.user_entity_controls.len(),
12340            );
12341            snapshot.service_organizations = soc_snapshot.service_organizations;
12342            snapshot.soc_reports = soc_snapshot.soc_reports;
12343            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12344        }
12345
12346        // ----------------------------------------------------------------
12347        // ISA 570: Going concern assessments
12348        // ----------------------------------------------------------------
12349        {
12350            use datasynth_generators::audit::going_concern_generator::{
12351                GoingConcernGenerator, GoingConcernInput,
12352            };
12353            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12354            let entity_codes: Vec<String> = self
12355                .config
12356                .companies
12357                .iter()
12358                .map(|c| c.code.clone())
12359                .collect();
12360            // Assessment date = period end + 75 days (typical sign-off window).
12361            let assessment_date = period_end + chrono::Duration::days(75);
12362            let period_label = format!("FY{}", period_end.year());
12363
12364            // Build financial inputs from actual journal entries.
12365            //
12366            // We derive approximate P&L, working capital, and operating cash flow
12367            // by aggregating GL account balances from the journal entry population.
12368            // Account ranges used (standard chart):
12369            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12370            //   Expenses:        6xxx (debit-normal)
12371            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12372            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12373            //   Operating CF:    net income adjusted for D&A (rough proxy)
12374            let gc_inputs: Vec<GoingConcernInput> = self
12375                .config
12376                .companies
12377                .iter()
12378                .map(|company| {
12379                    let code = &company.code;
12380                    let mut revenue = rust_decimal::Decimal::ZERO;
12381                    let mut expenses = rust_decimal::Decimal::ZERO;
12382                    let mut current_assets = rust_decimal::Decimal::ZERO;
12383                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12384                    let mut total_debt = rust_decimal::Decimal::ZERO;
12385
12386                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12387                        for line in &je.lines {
12388                            let acct = line.gl_account.as_str();
12389                            let net = line.debit_amount - line.credit_amount;
12390                            if acct.starts_with('4') {
12391                                // Revenue accounts: credit-normal, so negative net = revenue earned
12392                                revenue -= net;
12393                            } else if acct.starts_with('6') {
12394                                // Expense accounts: debit-normal
12395                                expenses += net;
12396                            }
12397                            // Balance sheet accounts for working capital
12398                            if acct.starts_with('1') {
12399                                // Current asset accounts (1000–1499)
12400                                if let Ok(n) = acct.parse::<u32>() {
12401                                    if (1000..=1499).contains(&n) {
12402                                        current_assets += net;
12403                                    }
12404                                }
12405                            } else if acct.starts_with('2') {
12406                                if let Ok(n) = acct.parse::<u32>() {
12407                                    if (2000..=2499).contains(&n) {
12408                                        // Current liabilities
12409                                        current_liabs -= net; // credit-normal
12410                                    } else if (2500..=2999).contains(&n) {
12411                                        // Long-term debt
12412                                        total_debt -= net;
12413                                    }
12414                                }
12415                            }
12416                        }
12417                    }
12418
12419                    let net_income = revenue - expenses;
12420                    let working_capital = current_assets - current_liabs;
12421                    // Rough operating CF proxy: net income (full accrual CF calculation
12422                    // is done separately in the cash flow statement generator)
12423                    let operating_cash_flow = net_income;
12424
12425                    GoingConcernInput {
12426                        entity_code: code.clone(),
12427                        net_income,
12428                        working_capital,
12429                        operating_cash_flow,
12430                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12431                        assessment_date,
12432                    }
12433                })
12434                .collect();
12435
12436            let assessments = if gc_inputs.is_empty() {
12437                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12438            } else {
12439                gc_gen.generate_for_entities_with_inputs(
12440                    &entity_codes,
12441                    &gc_inputs,
12442                    assessment_date,
12443                    &period_label,
12444                )
12445            };
12446            info!(
12447                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12448                assessments.len(),
12449                assessments.iter().filter(|a| matches!(
12450                    a.auditor_conclusion,
12451                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12452                )).count(),
12453                assessments.iter().filter(|a| matches!(
12454                    a.auditor_conclusion,
12455                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12456                )).count(),
12457                assessments.iter().filter(|a| matches!(
12458                    a.auditor_conclusion,
12459                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12460                )).count(),
12461            );
12462            snapshot.going_concern_assessments = assessments;
12463        }
12464
12465        // ----------------------------------------------------------------
12466        // ISA 540: Accounting estimates
12467        // ----------------------------------------------------------------
12468        {
12469            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12470            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12471            let entity_codes: Vec<String> = self
12472                .config
12473                .companies
12474                .iter()
12475                .map(|c| c.code.clone())
12476                .collect();
12477            let estimates = est_gen.generate_for_entities(&entity_codes);
12478            info!(
12479                "ISA 540 accounting estimates: {} estimates across {} entities \
12480                 ({} with retrospective reviews, {} with auditor point estimates)",
12481                estimates.len(),
12482                entity_codes.len(),
12483                estimates
12484                    .iter()
12485                    .filter(|e| e.retrospective_review.is_some())
12486                    .count(),
12487                estimates
12488                    .iter()
12489                    .filter(|e| e.auditor_point_estimate.is_some())
12490                    .count(),
12491            );
12492            snapshot.accounting_estimates = estimates;
12493        }
12494
12495        // ----------------------------------------------------------------
12496        // ISA 700/701/705/706: Audit opinions (one per engagement)
12497        // ----------------------------------------------------------------
12498        {
12499            use datasynth_generators::audit::audit_opinion_generator::{
12500                AuditOpinionGenerator, AuditOpinionInput,
12501            };
12502
12503            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12504
12505            // Build inputs — one per engagement, linking findings and going concern.
12506            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12507                .engagements
12508                .iter()
12509                .map(|eng| {
12510                    // Collect findings for this engagement.
12511                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12512                        .findings
12513                        .iter()
12514                        .filter(|f| f.engagement_id == eng.engagement_id)
12515                        .cloned()
12516                        .collect();
12517
12518                    // Going concern for this entity.
12519                    let gc = snapshot
12520                        .going_concern_assessments
12521                        .iter()
12522                        .find(|g| g.entity_code == eng.client_entity_id)
12523                        .cloned();
12524
12525                    // Component reports relevant to this engagement.
12526                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12527                        snapshot.component_reports.clone();
12528
12529                    let auditor = self
12530                        .master_data
12531                        .employees
12532                        .first()
12533                        .map(|e| e.display_name.clone())
12534                        .unwrap_or_else(|| "Global Audit LLP".into());
12535
12536                    let partner = self
12537                        .master_data
12538                        .employees
12539                        .get(1)
12540                        .map(|e| e.display_name.clone())
12541                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12542
12543                    AuditOpinionInput {
12544                        entity_code: eng.client_entity_id.clone(),
12545                        entity_name: eng.client_name.clone(),
12546                        engagement_id: eng.engagement_id,
12547                        period_end: eng.period_end_date,
12548                        findings: eng_findings,
12549                        going_concern: gc,
12550                        component_reports: comp_reports,
12551                        // Mark as US-listed when audit standards include PCAOB.
12552                        is_us_listed: {
12553                            let fw = &self.config.audit_standards.isa_compliance.framework;
12554                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12555                        },
12556                        auditor_name: auditor,
12557                        engagement_partner: partner,
12558                    }
12559                })
12560                .collect();
12561
12562            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12563
12564            for go in &generated_opinions {
12565                snapshot
12566                    .key_audit_matters
12567                    .extend(go.key_audit_matters.clone());
12568            }
12569            snapshot.audit_opinions = generated_opinions
12570                .into_iter()
12571                .map(|go| go.opinion)
12572                .collect();
12573
12574            info!(
12575                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12576                snapshot.audit_opinions.len(),
12577                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12578                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12579                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12580                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12581            );
12582        }
12583
12584        // ----------------------------------------------------------------
12585        // SOX 302 / 404 assessments
12586        // ----------------------------------------------------------------
12587        {
12588            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12589
12590            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12591
12592            for (i, company) in self.config.companies.iter().enumerate() {
12593                // Collect findings for this company's engagements.
12594                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12595                    .engagements
12596                    .iter()
12597                    .filter(|e| e.client_entity_id == company.code)
12598                    .map(|e| e.engagement_id)
12599                    .collect();
12600
12601                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12602                    .findings
12603                    .iter()
12604                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12605                    .cloned()
12606                    .collect();
12607
12608                // Derive executive names from employee list.
12609                let emp_count = self.master_data.employees.len();
12610                let ceo_name = if emp_count > 0 {
12611                    self.master_data.employees[i % emp_count]
12612                        .display_name
12613                        .clone()
12614                } else {
12615                    format!("CEO of {}", company.name)
12616                };
12617                let cfo_name = if emp_count > 1 {
12618                    self.master_data.employees[(i + 1) % emp_count]
12619                        .display_name
12620                        .clone()
12621                } else {
12622                    format!("CFO of {}", company.name)
12623                };
12624
12625                // Use engagement materiality if available.
12626                let materiality = snapshot
12627                    .engagements
12628                    .iter()
12629                    .find(|e| e.client_entity_id == company.code)
12630                    .map(|e| e.materiality)
12631                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12632
12633                let input = SoxGeneratorInput {
12634                    company_code: company.code.clone(),
12635                    company_name: company.name.clone(),
12636                    fiscal_year,
12637                    period_end,
12638                    findings: company_findings,
12639                    ceo_name,
12640                    cfo_name,
12641                    materiality_threshold: materiality,
12642                    revenue_percent: rust_decimal::Decimal::from(100),
12643                    assets_percent: rust_decimal::Decimal::from(100),
12644                    significant_accounts: vec![
12645                        "Revenue".into(),
12646                        "Accounts Receivable".into(),
12647                        "Inventory".into(),
12648                        "Fixed Assets".into(),
12649                        "Accounts Payable".into(),
12650                    ],
12651                };
12652
12653                let (certs, assessment) = sox_gen.generate(&input);
12654                snapshot.sox_302_certifications.extend(certs);
12655                snapshot.sox_404_assessments.push(assessment);
12656            }
12657
12658            info!(
12659                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12660                snapshot.sox_302_certifications.len(),
12661                snapshot.sox_404_assessments.len(),
12662                snapshot
12663                    .sox_404_assessments
12664                    .iter()
12665                    .filter(|a| a.icfr_effective)
12666                    .count(),
12667                snapshot
12668                    .sox_404_assessments
12669                    .iter()
12670                    .filter(|a| !a.icfr_effective)
12671                    .count(),
12672            );
12673        }
12674
12675        // ----------------------------------------------------------------
12676        // ISA 320: Materiality calculations (one per entity)
12677        // ----------------------------------------------------------------
12678        {
12679            use datasynth_generators::audit::materiality_generator::{
12680                MaterialityGenerator, MaterialityInput,
12681            };
12682
12683            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12684
12685            // Compute per-company financials from JEs.
12686            // Asset accounts start with '1', revenue with '4',
12687            // expense accounts with '5' or '6'.
12688            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12689
12690            for company in &self.config.companies {
12691                let company_code = company.code.clone();
12692
12693                // Revenue: credit-side entries on 4xxx accounts
12694                let company_revenue: rust_decimal::Decimal = entries
12695                    .iter()
12696                    .filter(|e| e.company_code() == company_code)
12697                    .flat_map(|e| e.lines.iter())
12698                    .filter(|l| l.account_code.starts_with('4'))
12699                    .map(|l| l.credit_amount)
12700                    .sum();
12701
12702                // Total assets: debit balances on 1xxx accounts
12703                let total_assets: rust_decimal::Decimal = entries
12704                    .iter()
12705                    .filter(|e| e.company_code() == company_code)
12706                    .flat_map(|e| e.lines.iter())
12707                    .filter(|l| l.account_code.starts_with('1'))
12708                    .map(|l| l.debit_amount)
12709                    .sum();
12710
12711                // Expenses: debit-side entries on 5xxx/6xxx accounts
12712                let total_expenses: rust_decimal::Decimal = entries
12713                    .iter()
12714                    .filter(|e| e.company_code() == company_code)
12715                    .flat_map(|e| e.lines.iter())
12716                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12717                    .map(|l| l.debit_amount)
12718                    .sum();
12719
12720                // Equity: credit balances on 3xxx accounts
12721                let equity: rust_decimal::Decimal = entries
12722                    .iter()
12723                    .filter(|e| e.company_code() == company_code)
12724                    .flat_map(|e| e.lines.iter())
12725                    .filter(|l| l.account_code.starts_with('3'))
12726                    .map(|l| l.credit_amount)
12727                    .sum();
12728
12729                let pretax_income = company_revenue - total_expenses;
12730
12731                // If no company-specific data, fall back to proportional share
12732                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12733                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
12734                        .unwrap_or(rust_decimal::Decimal::ONE);
12735                    (
12736                        total_revenue * w,
12737                        total_revenue * w * rust_decimal::Decimal::from(3),
12738                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
12739                        total_revenue * w * rust_decimal::Decimal::from(2),
12740                    )
12741                } else {
12742                    (company_revenue, total_assets, pretax_income, equity)
12743                };
12744
12745                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
12746
12747                materiality_inputs.push(MaterialityInput {
12748                    entity_code: company_code,
12749                    period: format!("FY{}", fiscal_year),
12750                    revenue: rev,
12751                    pretax_income: pti,
12752                    total_assets: assets,
12753                    equity: eq,
12754                    gross_profit,
12755                });
12756            }
12757
12758            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12759
12760            info!(
12761                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12762                 {} total assets, {} equity benchmarks)",
12763                snapshot.materiality_calculations.len(),
12764                snapshot
12765                    .materiality_calculations
12766                    .iter()
12767                    .filter(|m| matches!(
12768                        m.benchmark,
12769                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12770                    ))
12771                    .count(),
12772                snapshot
12773                    .materiality_calculations
12774                    .iter()
12775                    .filter(|m| matches!(
12776                        m.benchmark,
12777                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12778                    ))
12779                    .count(),
12780                snapshot
12781                    .materiality_calculations
12782                    .iter()
12783                    .filter(|m| matches!(
12784                        m.benchmark,
12785                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12786                    ))
12787                    .count(),
12788                snapshot
12789                    .materiality_calculations
12790                    .iter()
12791                    .filter(|m| matches!(
12792                        m.benchmark,
12793                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12794                    ))
12795                    .count(),
12796            );
12797        }
12798
12799        // ----------------------------------------------------------------
12800        // ISA 315: Combined Risk Assessments (per entity, per account area)
12801        // ----------------------------------------------------------------
12802        {
12803            use datasynth_generators::audit::cra_generator::CraGenerator;
12804
12805            let mut cra_gen = CraGenerator::new(self.seed + 8315);
12806
12807            // Build entity → scope_id map from already-generated scopes
12808            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12809                .audit_scopes
12810                .iter()
12811                .map(|s| (s.entity_code.clone(), s.id.clone()))
12812                .collect();
12813
12814            for company in &self.config.companies {
12815                let cras = cra_gen.generate_for_entity(&company.code, None);
12816                let scope_id = entity_scope_map.get(&company.code).cloned();
12817                let cras_with_scope: Vec<_> = cras
12818                    .into_iter()
12819                    .map(|mut cra| {
12820                        cra.scope_id = scope_id.clone();
12821                        cra
12822                    })
12823                    .collect();
12824                snapshot.combined_risk_assessments.extend(cras_with_scope);
12825            }
12826
12827            let significant_count = snapshot
12828                .combined_risk_assessments
12829                .iter()
12830                .filter(|c| c.significant_risk)
12831                .count();
12832            let high_cra_count = snapshot
12833                .combined_risk_assessments
12834                .iter()
12835                .filter(|c| {
12836                    matches!(
12837                        c.combined_risk,
12838                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12839                    )
12840                })
12841                .count();
12842
12843            info!(
12844                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12845                snapshot.combined_risk_assessments.len(),
12846                significant_count,
12847                high_cra_count,
12848            );
12849        }
12850
12851        // ----------------------------------------------------------------
12852        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
12853        // ----------------------------------------------------------------
12854        {
12855            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12856
12857            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12858
12859            // Group CRAs by entity and use per-entity tolerable error from materiality
12860            for company in &self.config.companies {
12861                let entity_code = company.code.clone();
12862
12863                // Find tolerable error for this entity (= performance materiality)
12864                let tolerable_error = snapshot
12865                    .materiality_calculations
12866                    .iter()
12867                    .find(|m| m.entity_code == entity_code)
12868                    .map(|m| m.tolerable_error);
12869
12870                // Collect CRAs for this entity
12871                let entity_cras: Vec<_> = snapshot
12872                    .combined_risk_assessments
12873                    .iter()
12874                    .filter(|c| c.entity_code == entity_code)
12875                    .cloned()
12876                    .collect();
12877
12878                if !entity_cras.is_empty() {
12879                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12880                    snapshot.sampling_plans.extend(plans);
12881                    snapshot.sampled_items.extend(items);
12882                }
12883            }
12884
12885            let misstatement_count = snapshot
12886                .sampled_items
12887                .iter()
12888                .filter(|i| i.misstatement_found)
12889                .count();
12890
12891            info!(
12892                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12893                snapshot.sampling_plans.len(),
12894                snapshot.sampled_items.len(),
12895                misstatement_count,
12896            );
12897        }
12898
12899        // ----------------------------------------------------------------
12900        // ISA 315: Significant Classes of Transactions (SCOTS)
12901        // ----------------------------------------------------------------
12902        {
12903            use datasynth_generators::audit::scots_generator::{
12904                ScotsGenerator, ScotsGeneratorConfig,
12905            };
12906
12907            let ic_enabled = self.config.intercompany.enabled;
12908
12909            let config = ScotsGeneratorConfig {
12910                intercompany_enabled: ic_enabled,
12911                ..ScotsGeneratorConfig::default()
12912            };
12913            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12914
12915            for company in &self.config.companies {
12916                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12917                snapshot
12918                    .significant_transaction_classes
12919                    .extend(entity_scots);
12920            }
12921
12922            let estimation_count = snapshot
12923                .significant_transaction_classes
12924                .iter()
12925                .filter(|s| {
12926                    matches!(
12927                        s.transaction_type,
12928                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12929                    )
12930                })
12931                .count();
12932
12933            info!(
12934                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12935                snapshot.significant_transaction_classes.len(),
12936                estimation_count,
12937            );
12938        }
12939
12940        // ----------------------------------------------------------------
12941        // ISA 520: Unusual Item Markers
12942        // ----------------------------------------------------------------
12943        {
12944            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12945
12946            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12947            let entity_codes: Vec<String> = self
12948                .config
12949                .companies
12950                .iter()
12951                .map(|c| c.code.clone())
12952                .collect();
12953            let unusual_flags =
12954                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12955            info!(
12956                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12957                unusual_flags.len(),
12958                unusual_flags
12959                    .iter()
12960                    .filter(|f| matches!(
12961                        f.severity,
12962                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12963                    ))
12964                    .count(),
12965                unusual_flags
12966                    .iter()
12967                    .filter(|f| matches!(
12968                        f.severity,
12969                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12970                    ))
12971                    .count(),
12972                unusual_flags
12973                    .iter()
12974                    .filter(|f| matches!(
12975                        f.severity,
12976                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12977                    ))
12978                    .count(),
12979            );
12980            snapshot.unusual_items = unusual_flags;
12981        }
12982
12983        // ----------------------------------------------------------------
12984        // ISA 520: Analytical Relationships
12985        // ----------------------------------------------------------------
12986        {
12987            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12988
12989            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12990            let entity_codes: Vec<String> = self
12991                .config
12992                .companies
12993                .iter()
12994                .map(|c| c.code.clone())
12995                .collect();
12996            let current_period_label = format!("FY{fiscal_year}");
12997            let prior_period_label = format!("FY{}", fiscal_year - 1);
12998            let analytical_rels = ar_gen.generate_for_entities(
12999                &entity_codes,
13000                entries,
13001                &current_period_label,
13002                &prior_period_label,
13003            );
13004            let out_of_range = analytical_rels
13005                .iter()
13006                .filter(|r| !r.within_expected_range)
13007                .count();
13008            info!(
13009                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13010                analytical_rels.len(),
13011                out_of_range,
13012            );
13013            snapshot.analytical_relationships = analytical_rels;
13014        }
13015
13016        if let Some(pb) = pb {
13017            pb.finish_with_message(format!(
13018                "Audit data: {} engagements, {} workpapers, {} evidence, \
13019                 {} confirmations, {} procedure steps, {} samples, \
13020                 {} analytical, {} IA funcs, {} related parties, \
13021                 {} component auditors, {} letters, {} subsequent events, \
13022                 {} service orgs, {} going concern, {} accounting estimates, \
13023                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13024                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13025                 {} unusual items, {} analytical relationships",
13026                snapshot.engagements.len(),
13027                snapshot.workpapers.len(),
13028                snapshot.evidence.len(),
13029                snapshot.confirmations.len(),
13030                snapshot.procedure_steps.len(),
13031                snapshot.samples.len(),
13032                snapshot.analytical_results.len(),
13033                snapshot.ia_functions.len(),
13034                snapshot.related_parties.len(),
13035                snapshot.component_auditors.len(),
13036                snapshot.engagement_letters.len(),
13037                snapshot.subsequent_events.len(),
13038                snapshot.service_organizations.len(),
13039                snapshot.going_concern_assessments.len(),
13040                snapshot.accounting_estimates.len(),
13041                snapshot.audit_opinions.len(),
13042                snapshot.key_audit_matters.len(),
13043                snapshot.sox_302_certifications.len(),
13044                snapshot.sox_404_assessments.len(),
13045                snapshot.materiality_calculations.len(),
13046                snapshot.combined_risk_assessments.len(),
13047                snapshot.sampling_plans.len(),
13048                snapshot.significant_transaction_classes.len(),
13049                snapshot.unusual_items.len(),
13050                snapshot.analytical_relationships.len(),
13051            ));
13052        }
13053
13054        // ----------------------------------------------------------------
13055        // PCAOB-ISA cross-reference mappings
13056        // ----------------------------------------------------------------
13057        // Always include the standard PCAOB-ISA mappings when audit generation is
13058        // enabled. These are static reference data (no randomness required) so we
13059        // call standard_mappings() directly.
13060        {
13061            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13062            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13063            debug!(
13064                "PCAOB-ISA mappings generated: {} mappings",
13065                snapshot.isa_pcaob_mappings.len()
13066            );
13067        }
13068
13069        // ----------------------------------------------------------------
13070        // ISA standard reference entries
13071        // ----------------------------------------------------------------
13072        // Emit flat ISA standard reference data (number, title, series) so
13073        // consumers get a machine-readable listing of all 34 ISA standards in
13074        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13075        {
13076            use datasynth_standards::audit::isa_reference::IsaStandard;
13077            snapshot.isa_mappings = IsaStandard::standard_entries();
13078            debug!(
13079                "ISA standard entries generated: {} standards",
13080                snapshot.isa_mappings.len()
13081            );
13082        }
13083
13084        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13085        // For each RPT, find the chronologically closest JE for the engagement's entity.
13086        {
13087            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13088                .engagements
13089                .iter()
13090                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13091                .collect();
13092
13093            for rpt in &mut snapshot.related_party_transactions {
13094                if rpt.journal_entry_id.is_some() {
13095                    continue; // already set
13096                }
13097                let entity = engagement_by_id
13098                    .get(&rpt.engagement_id.to_string())
13099                    .copied()
13100                    .unwrap_or("");
13101
13102                // Find closest JE by date in the entity's company
13103                let best_je = entries
13104                    .iter()
13105                    .filter(|je| je.header.company_code == entity)
13106                    .min_by_key(|je| {
13107                        (je.header.posting_date - rpt.transaction_date)
13108                            .num_days()
13109                            .abs()
13110                    });
13111
13112                if let Some(je) = best_je {
13113                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13114                }
13115            }
13116
13117            let linked = snapshot
13118                .related_party_transactions
13119                .iter()
13120                .filter(|t| t.journal_entry_id.is_some())
13121                .count();
13122            debug!(
13123                "Linked {}/{} related party transactions to journal entries",
13124                linked,
13125                snapshot.related_party_transactions.len()
13126            );
13127        }
13128
13129        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13130        // One opinion per engagement, derived from that engagement's findings,
13131        // going-concern assessment, and any component-auditor reports. Fills
13132        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13133        // export.
13134        if !snapshot.engagements.is_empty() {
13135            use datasynth_generators::audit_opinion_generator::{
13136                AuditOpinionGenerator, AuditOpinionInput,
13137            };
13138
13139            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13140            let inputs: Vec<AuditOpinionInput> = snapshot
13141                .engagements
13142                .iter()
13143                .map(|eng| {
13144                    let findings = snapshot
13145                        .findings
13146                        .iter()
13147                        .filter(|f| f.engagement_id == eng.engagement_id)
13148                        .cloned()
13149                        .collect();
13150                    let going_concern = snapshot
13151                        .going_concern_assessments
13152                        .iter()
13153                        .find(|gc| gc.entity_code == eng.client_entity_id)
13154                        .cloned();
13155                    // ComponentAuditorReport doesn't carry an engagement id, but
13156                    // component scope is keyed by `entity_code`, so filter on that.
13157                    let component_reports = snapshot
13158                        .component_reports
13159                        .iter()
13160                        .filter(|r| r.entity_code == eng.client_entity_id)
13161                        .cloned()
13162                        .collect();
13163
13164                    AuditOpinionInput {
13165                        entity_code: eng.client_entity_id.clone(),
13166                        entity_name: eng.client_name.clone(),
13167                        engagement_id: eng.engagement_id,
13168                        period_end: eng.period_end_date,
13169                        findings,
13170                        going_concern,
13171                        component_reports,
13172                        is_us_listed: matches!(
13173                            eng.engagement_type,
13174                            datasynth_core::audit::EngagementType::IntegratedAudit
13175                                | datasynth_core::audit::EngagementType::Sox404
13176                        ),
13177                        auditor_name: "DataSynth Audit LLP".to_string(),
13178                        engagement_partner: "Engagement Partner".to_string(),
13179                    }
13180                })
13181                .collect();
13182
13183            let generated = opinion_gen.generate_batch(&inputs);
13184            for g in generated {
13185                snapshot.key_audit_matters.extend(g.key_audit_matters);
13186                snapshot.audit_opinions.push(g.opinion);
13187            }
13188            debug!(
13189                "Generated {} audit opinions with {} key audit matters",
13190                snapshot.audit_opinions.len(),
13191                snapshot.key_audit_matters.len()
13192            );
13193        }
13194
13195        Ok(snapshot)
13196    }
13197
13198    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13199    ///
13200    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13201    /// from the current orchestrator state, runs the FSM engine, and maps the
13202    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13203    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13204    fn generate_audit_data_with_fsm(
13205        &mut self,
13206        entries: &[JournalEntry],
13207    ) -> SynthResult<AuditSnapshot> {
13208        use datasynth_audit_fsm::{
13209            context::EngagementContext,
13210            engine::AuditFsmEngine,
13211            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13212        };
13213        use rand::SeedableRng;
13214        use rand_chacha::ChaCha8Rng;
13215
13216        info!("Audit FSM: generating audit data via FSM engine");
13217
13218        let fsm_config = self
13219            .config
13220            .audit
13221            .fsm
13222            .as_ref()
13223            .expect("FSM config must be present when FSM is enabled");
13224
13225        // 1. Load blueprint from config string.
13226        let bwp = match fsm_config.blueprint.as_str() {
13227            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13228            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13229            _ => {
13230                warn!(
13231                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13232                    fsm_config.blueprint
13233                );
13234                BlueprintWithPreconditions::load_builtin_fsa()
13235            }
13236        }
13237        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13238
13239        // 2. Load overlay from config string.
13240        let overlay = match fsm_config.overlay.as_str() {
13241            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13242            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13243            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13244            _ => {
13245                warn!(
13246                    "Unknown FSM overlay '{}', falling back to builtin:default",
13247                    fsm_config.overlay
13248                );
13249                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13250            }
13251        }
13252        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13253
13254        // 3. Build EngagementContext from orchestrator state.
13255        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13256            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13257        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13258
13259        // Determine the engagement entity early so we can filter JEs.
13260        let company = self.config.companies.first();
13261        let company_code = company
13262            .map(|c| c.code.clone())
13263            .unwrap_or_else(|| "UNKNOWN".to_string());
13264        let company_name = company
13265            .map(|c| c.name.clone())
13266            .unwrap_or_else(|| "Unknown Company".to_string());
13267        let currency = company
13268            .map(|c| c.currency.clone())
13269            .unwrap_or_else(|| "USD".to_string());
13270
13271        // Filter JEs to the engagement entity for single-company coherence.
13272        let entity_entries: Vec<_> = entries
13273            .iter()
13274            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13275            .cloned()
13276            .collect();
13277        let entries = &entity_entries; // Shadow the parameter for remaining usage
13278
13279        // Financial aggregates from journal entries.
13280        let total_revenue: rust_decimal::Decimal = entries
13281            .iter()
13282            .flat_map(|e| e.lines.iter())
13283            .filter(|l| l.account_code.starts_with('4'))
13284            .map(|l| l.credit_amount - l.debit_amount)
13285            .sum();
13286
13287        let total_assets: rust_decimal::Decimal = entries
13288            .iter()
13289            .flat_map(|e| e.lines.iter())
13290            .filter(|l| l.account_code.starts_with('1'))
13291            .map(|l| l.debit_amount - l.credit_amount)
13292            .sum();
13293
13294        let total_expenses: rust_decimal::Decimal = entries
13295            .iter()
13296            .flat_map(|e| e.lines.iter())
13297            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13298            .map(|l| l.debit_amount)
13299            .sum();
13300
13301        let equity: rust_decimal::Decimal = entries
13302            .iter()
13303            .flat_map(|e| e.lines.iter())
13304            .filter(|l| l.account_code.starts_with('3'))
13305            .map(|l| l.credit_amount - l.debit_amount)
13306            .sum();
13307
13308        let total_debt: rust_decimal::Decimal = entries
13309            .iter()
13310            .flat_map(|e| e.lines.iter())
13311            .filter(|l| l.account_code.starts_with('2'))
13312            .map(|l| l.credit_amount - l.debit_amount)
13313            .sum();
13314
13315        let pretax_income = total_revenue - total_expenses;
13316
13317        let cogs: rust_decimal::Decimal = entries
13318            .iter()
13319            .flat_map(|e| e.lines.iter())
13320            .filter(|l| l.account_code.starts_with('5'))
13321            .map(|l| l.debit_amount)
13322            .sum();
13323        let gross_profit = total_revenue - cogs;
13324
13325        let current_assets: rust_decimal::Decimal = entries
13326            .iter()
13327            .flat_map(|e| e.lines.iter())
13328            .filter(|l| {
13329                l.account_code.starts_with("10")
13330                    || l.account_code.starts_with("11")
13331                    || l.account_code.starts_with("12")
13332                    || l.account_code.starts_with("13")
13333            })
13334            .map(|l| l.debit_amount - l.credit_amount)
13335            .sum();
13336        let current_liabilities: rust_decimal::Decimal = entries
13337            .iter()
13338            .flat_map(|e| e.lines.iter())
13339            .filter(|l| {
13340                l.account_code.starts_with("20")
13341                    || l.account_code.starts_with("21")
13342                    || l.account_code.starts_with("22")
13343            })
13344            .map(|l| l.credit_amount - l.debit_amount)
13345            .sum();
13346        let working_capital = current_assets - current_liabilities;
13347
13348        let depreciation: rust_decimal::Decimal = entries
13349            .iter()
13350            .flat_map(|e| e.lines.iter())
13351            .filter(|l| l.account_code.starts_with("60"))
13352            .map(|l| l.debit_amount)
13353            .sum();
13354        let operating_cash_flow = pretax_income + depreciation;
13355
13356        // GL accounts for reference data.
13357        let accounts: Vec<String> = self
13358            .coa
13359            .as_ref()
13360            .map(|coa| {
13361                coa.get_postable_accounts()
13362                    .iter()
13363                    .map(|acc| acc.account_code().to_string())
13364                    .collect()
13365            })
13366            .unwrap_or_default();
13367
13368        // Team member IDs and display names from master data.
13369        let team_member_ids: Vec<String> = self
13370            .master_data
13371            .employees
13372            .iter()
13373            .take(8) // Cap team size
13374            .map(|e| e.employee_id.clone())
13375            .collect();
13376        let team_member_pairs: Vec<(String, String)> = self
13377            .master_data
13378            .employees
13379            .iter()
13380            .take(8)
13381            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13382            .collect();
13383
13384        let vendor_names: Vec<String> = self
13385            .master_data
13386            .vendors
13387            .iter()
13388            .map(|v| v.name.clone())
13389            .collect();
13390        let customer_names: Vec<String> = self
13391            .master_data
13392            .customers
13393            .iter()
13394            .map(|c| c.name.clone())
13395            .collect();
13396
13397        let entity_codes: Vec<String> = self
13398            .config
13399            .companies
13400            .iter()
13401            .map(|c| c.code.clone())
13402            .collect();
13403
13404        // Journal entry IDs for evidence tracing (sample up to 50).
13405        let journal_entry_ids: Vec<String> = entries
13406            .iter()
13407            .take(50)
13408            .map(|e| e.header.document_id.to_string())
13409            .collect();
13410
13411        // Account balances for risk weighting (aggregate debit - credit per account).
13412        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13413        for entry in entries {
13414            for line in &entry.lines {
13415                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13416                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13417                *account_balances
13418                    .entry(line.account_code.clone())
13419                    .or_insert(0.0) += debit_f64 - credit_f64;
13420            }
13421        }
13422
13423        // Internal control IDs and anomaly refs are populated by the
13424        // caller when available; here we default to empty because the
13425        // orchestrator state may not have generated controls/anomalies
13426        // yet at this point in the pipeline.
13427        let control_ids: Vec<String> = Vec::new();
13428        let anomaly_refs: Vec<String> = Vec::new();
13429
13430        let mut context = EngagementContext {
13431            company_code,
13432            company_name,
13433            fiscal_year: start_date.year(),
13434            currency,
13435            total_revenue,
13436            total_assets,
13437            engagement_start: start_date,
13438            report_date: period_end,
13439            pretax_income,
13440            equity,
13441            gross_profit,
13442            working_capital,
13443            operating_cash_flow,
13444            total_debt,
13445            team_member_ids,
13446            team_member_pairs,
13447            accounts,
13448            vendor_names,
13449            customer_names,
13450            journal_entry_ids,
13451            account_balances,
13452            control_ids,
13453            anomaly_refs,
13454            journal_entries: entries.to_vec(),
13455            is_us_listed: false,
13456            entity_codes,
13457            auditor_firm_name: "DataSynth Audit LLP".into(),
13458            accounting_framework: self
13459                .config
13460                .accounting_standards
13461                .framework
13462                .map(|f| match f {
13463                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13464                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13465                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13466                        "French GAAP"
13467                    }
13468                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13469                        "German GAAP"
13470                    }
13471                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13472                        "Dual Reporting"
13473                    }
13474                })
13475                .unwrap_or("IFRS")
13476                .into(),
13477        };
13478
13479        // 4. Create and run the FSM engine.
13480        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13481        let rng = ChaCha8Rng::seed_from_u64(seed);
13482        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13483
13484        let mut result = engine
13485            .run_engagement(&context)
13486            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13487
13488        info!(
13489            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13490             {} phases completed, duration {:.1}h",
13491            result.event_log.len(),
13492            result.artifacts.total_artifacts(),
13493            result.anomalies.len(),
13494            result.phases_completed.len(),
13495            result.total_duration_hours,
13496        );
13497
13498        // 4b. Populate financial data in the artifact bag for downstream consumers.
13499        let tb_entity = context.company_code.clone();
13500        let tb_fy = context.fiscal_year;
13501        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13502        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13503            entries,
13504            &tb_entity,
13505            tb_fy,
13506            self.coa.as_ref().map(|c| c.as_ref()),
13507        );
13508
13509        // 5. Map ArtifactBag fields to AuditSnapshot.
13510        let bag = result.artifacts;
13511        let mut snapshot = AuditSnapshot {
13512            engagements: bag.engagements,
13513            engagement_letters: bag.engagement_letters,
13514            materiality_calculations: bag.materiality_calculations,
13515            risk_assessments: bag.risk_assessments,
13516            combined_risk_assessments: bag.combined_risk_assessments,
13517            workpapers: bag.workpapers,
13518            evidence: bag.evidence,
13519            findings: bag.findings,
13520            judgments: bag.judgments,
13521            sampling_plans: bag.sampling_plans,
13522            sampled_items: bag.sampled_items,
13523            analytical_results: bag.analytical_results,
13524            going_concern_assessments: bag.going_concern_assessments,
13525            subsequent_events: bag.subsequent_events,
13526            audit_opinions: bag.audit_opinions,
13527            key_audit_matters: bag.key_audit_matters,
13528            procedure_steps: bag.procedure_steps,
13529            samples: bag.samples,
13530            confirmations: bag.confirmations,
13531            confirmation_responses: bag.confirmation_responses,
13532            // Store the event trail for downstream export.
13533            fsm_event_trail: Some(result.event_log),
13534            // Fields not produced by the FSM engine remain at their defaults.
13535            ..Default::default()
13536        };
13537
13538        // 6. Add static reference data (same as legacy path).
13539        {
13540            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13541            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13542        }
13543        {
13544            use datasynth_standards::audit::isa_reference::IsaStandard;
13545            snapshot.isa_mappings = IsaStandard::standard_entries();
13546        }
13547
13548        info!(
13549            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13550             {} risk assessments, {} findings, {} materiality calcs",
13551            snapshot.engagements.len(),
13552            snapshot.workpapers.len(),
13553            snapshot.evidence.len(),
13554            snapshot.risk_assessments.len(),
13555            snapshot.findings.len(),
13556            snapshot.materiality_calculations.len(),
13557        );
13558
13559        Ok(snapshot)
13560    }
13561
13562    /// Export journal entries as graph data for ML training and network reconstruction.
13563    ///
13564    /// Builds a transaction graph where:
13565    /// - Nodes are GL accounts
13566    /// - Edges are money flows from credit to debit accounts
13567    /// - Edge attributes include amount, date, business process, anomaly flags
13568    fn export_graphs(
13569        &mut self,
13570        entries: &[JournalEntry],
13571        _coa: &Arc<ChartOfAccounts>,
13572        stats: &mut EnhancedGenerationStatistics,
13573    ) -> SynthResult<GraphExportSnapshot> {
13574        let pb = self.create_progress_bar(100, "Exporting Graphs");
13575
13576        let mut snapshot = GraphExportSnapshot::default();
13577
13578        // Get output directory
13579        let output_dir = self
13580            .output_path
13581            .clone()
13582            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13583        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13584
13585        // Process each graph type configuration
13586        for graph_type in &self.config.graph_export.graph_types {
13587            if let Some(pb) = &pb {
13588                pb.inc(10);
13589            }
13590
13591            // Build transaction graph
13592            let graph_config = TransactionGraphConfig {
13593                include_vendors: false,
13594                include_customers: false,
13595                create_debit_credit_edges: true,
13596                include_document_nodes: graph_type.include_document_nodes,
13597                min_edge_weight: graph_type.min_edge_weight,
13598                aggregate_parallel_edges: graph_type.aggregate_edges,
13599                framework: None,
13600            };
13601
13602            let mut builder = TransactionGraphBuilder::new(graph_config);
13603            builder.add_journal_entries(entries);
13604            let graph = builder.build();
13605
13606            // Update stats
13607            stats.graph_node_count += graph.node_count();
13608            stats.graph_edge_count += graph.edge_count();
13609
13610            if let Some(pb) = &pb {
13611                pb.inc(40);
13612            }
13613
13614            // Export to each configured format
13615            for format in &self.config.graph_export.formats {
13616                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13617
13618                // Create output directory
13619                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13620                    warn!("Failed to create graph output directory: {}", e);
13621                    continue;
13622                }
13623
13624                match format {
13625                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13626                        let pyg_config = PyGExportConfig {
13627                            common: datasynth_graph::CommonExportConfig {
13628                                export_node_features: true,
13629                                export_edge_features: true,
13630                                export_node_labels: true,
13631                                export_edge_labels: true,
13632                                export_masks: true,
13633                                train_ratio: self.config.graph_export.train_ratio,
13634                                val_ratio: self.config.graph_export.validation_ratio,
13635                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13636                            },
13637                            one_hot_categoricals: false,
13638                        };
13639
13640                        let exporter = PyGExporter::new(pyg_config);
13641                        match exporter.export(&graph, &format_dir) {
13642                            Ok(metadata) => {
13643                                snapshot.exports.insert(
13644                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13645                                    GraphExportInfo {
13646                                        name: graph_type.name.clone(),
13647                                        format: "pytorch_geometric".to_string(),
13648                                        output_path: format_dir.clone(),
13649                                        node_count: metadata.num_nodes,
13650                                        edge_count: metadata.num_edges,
13651                                    },
13652                                );
13653                                snapshot.graph_count += 1;
13654                            }
13655                            Err(e) => {
13656                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13657                            }
13658                        }
13659                    }
13660                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13661                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13662
13663                        let neo4j_config = Neo4jExportConfig {
13664                            export_node_properties: true,
13665                            export_edge_properties: true,
13666                            export_features: true,
13667                            generate_cypher: true,
13668                            generate_admin_import: true,
13669                            database_name: "synth".to_string(),
13670                            cypher_batch_size: 1000,
13671                        };
13672
13673                        let exporter = Neo4jExporter::new(neo4j_config);
13674                        match exporter.export(&graph, &format_dir) {
13675                            Ok(metadata) => {
13676                                snapshot.exports.insert(
13677                                    format!("{}_{}", graph_type.name, "neo4j"),
13678                                    GraphExportInfo {
13679                                        name: graph_type.name.clone(),
13680                                        format: "neo4j".to_string(),
13681                                        output_path: format_dir.clone(),
13682                                        node_count: metadata.num_nodes,
13683                                        edge_count: metadata.num_edges,
13684                                    },
13685                                );
13686                                snapshot.graph_count += 1;
13687                            }
13688                            Err(e) => {
13689                                warn!("Failed to export Neo4j graph: {}", e);
13690                            }
13691                        }
13692                    }
13693                    datasynth_config::schema::GraphExportFormat::Dgl => {
13694                        use datasynth_graph::{DGLExportConfig, DGLExporter};
13695
13696                        let dgl_config = DGLExportConfig {
13697                            common: datasynth_graph::CommonExportConfig {
13698                                export_node_features: true,
13699                                export_edge_features: true,
13700                                export_node_labels: true,
13701                                export_edge_labels: true,
13702                                export_masks: true,
13703                                train_ratio: self.config.graph_export.train_ratio,
13704                                val_ratio: self.config.graph_export.validation_ratio,
13705                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13706                            },
13707                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
13708                            include_pickle_script: true, // DGL ecosystem standard helper
13709                        };
13710
13711                        let exporter = DGLExporter::new(dgl_config);
13712                        match exporter.export(&graph, &format_dir) {
13713                            Ok(metadata) => {
13714                                snapshot.exports.insert(
13715                                    format!("{}_{}", graph_type.name, "dgl"),
13716                                    GraphExportInfo {
13717                                        name: graph_type.name.clone(),
13718                                        format: "dgl".to_string(),
13719                                        output_path: format_dir.clone(),
13720                                        node_count: metadata.common.num_nodes,
13721                                        edge_count: metadata.common.num_edges,
13722                                    },
13723                                );
13724                                snapshot.graph_count += 1;
13725                            }
13726                            Err(e) => {
13727                                warn!("Failed to export DGL graph: {}", e);
13728                            }
13729                        }
13730                    }
13731                    datasynth_config::schema::GraphExportFormat::RustGraph => {
13732                        use datasynth_graph::{
13733                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13734                        };
13735
13736                        let rustgraph_config = RustGraphExportConfig {
13737                            include_features: true,
13738                            include_temporal: true,
13739                            include_labels: true,
13740                            source_name: "datasynth".to_string(),
13741                            batch_id: None,
13742                            output_format: RustGraphOutputFormat::JsonLines,
13743                            export_node_properties: true,
13744                            export_edge_properties: true,
13745                            pretty_print: false,
13746                        };
13747
13748                        let exporter = RustGraphExporter::new(rustgraph_config);
13749                        match exporter.export(&graph, &format_dir) {
13750                            Ok(metadata) => {
13751                                snapshot.exports.insert(
13752                                    format!("{}_{}", graph_type.name, "rustgraph"),
13753                                    GraphExportInfo {
13754                                        name: graph_type.name.clone(),
13755                                        format: "rustgraph".to_string(),
13756                                        output_path: format_dir.clone(),
13757                                        node_count: metadata.num_nodes,
13758                                        edge_count: metadata.num_edges,
13759                                    },
13760                                );
13761                                snapshot.graph_count += 1;
13762                            }
13763                            Err(e) => {
13764                                warn!("Failed to export RustGraph: {}", e);
13765                            }
13766                        }
13767                    }
13768                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13769                        // Hypergraph export is handled separately in Phase 10b
13770                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13771                    }
13772                }
13773            }
13774
13775            if let Some(pb) = &pb {
13776                pb.inc(40);
13777            }
13778        }
13779
13780        stats.graph_export_count = snapshot.graph_count;
13781        snapshot.exported = snapshot.graph_count > 0;
13782
13783        if let Some(pb) = pb {
13784            pb.finish_with_message(format!(
13785                "Graphs exported: {} graphs ({} nodes, {} edges)",
13786                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13787            ));
13788        }
13789
13790        Ok(snapshot)
13791    }
13792
13793    /// Build additional graph types (banking, approval, entity) when relevant data
13794    /// is available. These run as a late phase because the data they need (banking
13795    /// snapshot, intercompany snapshot) is only generated after the main graph
13796    /// export phase.
13797    fn build_additional_graphs(
13798        &self,
13799        banking: &BankingSnapshot,
13800        intercompany: &IntercompanySnapshot,
13801        entries: &[JournalEntry],
13802        stats: &mut EnhancedGenerationStatistics,
13803    ) {
13804        let output_dir = self
13805            .output_path
13806            .clone()
13807            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13808        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13809
13810        // Banking graph: build when banking customers and transactions exist
13811        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13812            info!("Phase 10c: Building banking network graph");
13813            let config = BankingGraphConfig::default();
13814            let mut builder = BankingGraphBuilder::new(config);
13815            builder.add_customers(&banking.customers);
13816            builder.add_accounts(&banking.accounts, &banking.customers);
13817            builder.add_transactions(&banking.transactions);
13818            let graph = builder.build();
13819
13820            let node_count = graph.node_count();
13821            let edge_count = graph.edge_count();
13822            stats.graph_node_count += node_count;
13823            stats.graph_edge_count += edge_count;
13824
13825            // Export as PyG if configured
13826            for format in &self.config.graph_export.formats {
13827                if matches!(
13828                    format,
13829                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13830                ) {
13831                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13832                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13833                        warn!("Failed to create banking graph output dir: {}", e);
13834                        continue;
13835                    }
13836                    let pyg_config = PyGExportConfig::default();
13837                    let exporter = PyGExporter::new(pyg_config);
13838                    if let Err(e) = exporter.export(&graph, &format_dir) {
13839                        warn!("Failed to export banking graph as PyG: {}", e);
13840                    } else {
13841                        info!(
13842                            "Banking network graph exported: {} nodes, {} edges",
13843                            node_count, edge_count
13844                        );
13845                    }
13846                }
13847            }
13848        }
13849
13850        // Approval graph: build from journal entry approval workflows
13851        let approval_entries: Vec<_> = entries
13852            .iter()
13853            .filter(|je| je.header.approval_workflow.is_some())
13854            .collect();
13855
13856        if !approval_entries.is_empty() {
13857            info!(
13858                "Phase 10c: Building approval network graph ({} entries with approvals)",
13859                approval_entries.len()
13860            );
13861            let config = ApprovalGraphConfig::default();
13862            let mut builder = ApprovalGraphBuilder::new(config);
13863
13864            for je in &approval_entries {
13865                if let Some(ref wf) = je.header.approval_workflow {
13866                    for action in &wf.actions {
13867                        let record = datasynth_core::models::ApprovalRecord {
13868                            approval_id: format!(
13869                                "APR-{}-{}",
13870                                je.header.document_id, action.approval_level
13871                            ),
13872                            document_number: je.header.document_id.to_string(),
13873                            document_type: "JE".to_string(),
13874                            company_code: je.company_code().to_string(),
13875                            requester_id: wf.preparer_id.clone(),
13876                            requester_name: Some(wf.preparer_name.clone()),
13877                            approver_id: action.actor_id.clone(),
13878                            approver_name: action.actor_name.clone(),
13879                            approval_date: je.posting_date(),
13880                            action: format!("{:?}", action.action),
13881                            amount: wf.amount,
13882                            approval_limit: None,
13883                            comments: action.comments.clone(),
13884                            delegation_from: None,
13885                            is_auto_approved: false,
13886                        };
13887                        builder.add_approval(&record);
13888                    }
13889                }
13890            }
13891
13892            let graph = builder.build();
13893            let node_count = graph.node_count();
13894            let edge_count = graph.edge_count();
13895            stats.graph_node_count += node_count;
13896            stats.graph_edge_count += edge_count;
13897
13898            // Export as PyG if configured
13899            for format in &self.config.graph_export.formats {
13900                if matches!(
13901                    format,
13902                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13903                ) {
13904                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13905                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13906                        warn!("Failed to create approval graph output dir: {}", e);
13907                        continue;
13908                    }
13909                    let pyg_config = PyGExportConfig::default();
13910                    let exporter = PyGExporter::new(pyg_config);
13911                    if let Err(e) = exporter.export(&graph, &format_dir) {
13912                        warn!("Failed to export approval graph as PyG: {}", e);
13913                    } else {
13914                        info!(
13915                            "Approval network graph exported: {} nodes, {} edges",
13916                            node_count, edge_count
13917                        );
13918                    }
13919                }
13920            }
13921        }
13922
13923        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
13924        if self.config.companies.len() >= 2 {
13925            info!(
13926                "Phase 10c: Building entity relationship graph ({} companies)",
13927                self.config.companies.len()
13928            );
13929
13930            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13931                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13932
13933            // Map CompanyConfig → Company objects
13934            let parent_code = &self.config.companies[0].code;
13935            let mut companies: Vec<datasynth_core::models::Company> =
13936                Vec::with_capacity(self.config.companies.len());
13937
13938            // First company is the parent
13939            let first = &self.config.companies[0];
13940            companies.push(datasynth_core::models::Company::parent(
13941                &first.code,
13942                &first.name,
13943                &first.country,
13944                &first.currency,
13945            ));
13946
13947            // Remaining companies are subsidiaries (100% owned by parent)
13948            for cc in self.config.companies.iter().skip(1) {
13949                companies.push(datasynth_core::models::Company::subsidiary(
13950                    &cc.code,
13951                    &cc.name,
13952                    &cc.country,
13953                    &cc.currency,
13954                    parent_code,
13955                    rust_decimal::Decimal::from(100),
13956                ));
13957            }
13958
13959            // Build IntercompanyRelationship records (same logic as phase_intercompany)
13960            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13961                self.config
13962                    .companies
13963                    .iter()
13964                    .skip(1)
13965                    .enumerate()
13966                    .map(|(i, cc)| {
13967                        let mut rel =
13968                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
13969                                format!("REL{:03}", i + 1),
13970                                parent_code.clone(),
13971                                cc.code.clone(),
13972                                rust_decimal::Decimal::from(100),
13973                                start_date,
13974                            );
13975                        rel.functional_currency = cc.currency.clone();
13976                        rel
13977                    })
13978                    .collect();
13979
13980            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13981            builder.add_companies(&companies);
13982            builder.add_ownership_relationships(&relationships);
13983
13984            // Thread IC matched-pair transaction edges into the entity graph
13985            for pair in &intercompany.matched_pairs {
13986                builder.add_intercompany_edge(
13987                    &pair.seller_company,
13988                    &pair.buyer_company,
13989                    pair.amount,
13990                    &format!("{:?}", pair.transaction_type),
13991                );
13992            }
13993
13994            let graph = builder.build();
13995            let node_count = graph.node_count();
13996            let edge_count = graph.edge_count();
13997            stats.graph_node_count += node_count;
13998            stats.graph_edge_count += edge_count;
13999
14000            // Export as PyG if configured
14001            for format in &self.config.graph_export.formats {
14002                if matches!(
14003                    format,
14004                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14005                ) {
14006                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14007                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14008                        warn!("Failed to create entity graph output dir: {}", e);
14009                        continue;
14010                    }
14011                    let pyg_config = PyGExportConfig::default();
14012                    let exporter = PyGExporter::new(pyg_config);
14013                    if let Err(e) = exporter.export(&graph, &format_dir) {
14014                        warn!("Failed to export entity graph as PyG: {}", e);
14015                    } else {
14016                        info!(
14017                            "Entity relationship graph exported: {} nodes, {} edges",
14018                            node_count, edge_count
14019                        );
14020                    }
14021                }
14022            }
14023        } else {
14024            debug!(
14025                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14026                self.config.companies.len()
14027            );
14028        }
14029    }
14030
14031    /// Export a multi-layer hypergraph for RustGraph integration.
14032    ///
14033    /// Builds a 3-layer hypergraph:
14034    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14035    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14036    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14037    #[allow(clippy::too_many_arguments)]
14038    fn export_hypergraph(
14039        &self,
14040        coa: &Arc<ChartOfAccounts>,
14041        entries: &[JournalEntry],
14042        document_flows: &DocumentFlowSnapshot,
14043        sourcing: &SourcingSnapshot,
14044        hr: &HrSnapshot,
14045        manufacturing: &ManufacturingSnapshot,
14046        banking: &BankingSnapshot,
14047        audit: &AuditSnapshot,
14048        financial_reporting: &FinancialReportingSnapshot,
14049        ocpm: &OcpmSnapshot,
14050        compliance: &ComplianceRegulationsSnapshot,
14051        stats: &mut EnhancedGenerationStatistics,
14052    ) -> SynthResult<HypergraphExportInfo> {
14053        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14054        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14055        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14056        use datasynth_graph::models::hypergraph::AggregationStrategy;
14057
14058        let hg_settings = &self.config.graph_export.hypergraph;
14059
14060        // Parse aggregation strategy from config string
14061        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14062            "truncate" => AggregationStrategy::Truncate,
14063            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14064            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14065            "importance_sample" => AggregationStrategy::ImportanceSample,
14066            _ => AggregationStrategy::PoolByCounterparty,
14067        };
14068
14069        let builder_config = HypergraphConfig {
14070            max_nodes: hg_settings.max_nodes,
14071            aggregation_strategy,
14072            include_coso: hg_settings.governance_layer.include_coso,
14073            include_controls: hg_settings.governance_layer.include_controls,
14074            include_sox: hg_settings.governance_layer.include_sox,
14075            include_vendors: hg_settings.governance_layer.include_vendors,
14076            include_customers: hg_settings.governance_layer.include_customers,
14077            include_employees: hg_settings.governance_layer.include_employees,
14078            include_p2p: hg_settings.process_layer.include_p2p,
14079            include_o2c: hg_settings.process_layer.include_o2c,
14080            include_s2c: hg_settings.process_layer.include_s2c,
14081            include_h2r: hg_settings.process_layer.include_h2r,
14082            include_mfg: hg_settings.process_layer.include_mfg,
14083            include_bank: hg_settings.process_layer.include_bank,
14084            include_audit: hg_settings.process_layer.include_audit,
14085            include_r2r: hg_settings.process_layer.include_r2r,
14086            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14087            docs_per_counterparty_threshold: hg_settings
14088                .process_layer
14089                .docs_per_counterparty_threshold,
14090            include_accounts: hg_settings.accounting_layer.include_accounts,
14091            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14092            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14093            include_compliance: self.config.compliance_regulations.enabled,
14094            include_tax: true,
14095            include_treasury: true,
14096            include_esg: true,
14097            include_project: true,
14098            include_intercompany: true,
14099            include_temporal_events: true,
14100        };
14101
14102        let mut builder = HypergraphBuilder::new(builder_config);
14103
14104        // Layer 1: Governance & Controls
14105        builder.add_coso_framework();
14106
14107        // Add controls if available (generated during JE generation)
14108        // Controls are generated per-company; we use the standard set
14109        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14110            let controls = InternalControl::standard_controls();
14111            builder.add_controls(&controls);
14112        }
14113
14114        // Add master data
14115        builder.add_vendors(&self.master_data.vendors);
14116        builder.add_customers(&self.master_data.customers);
14117        builder.add_employees(&self.master_data.employees);
14118
14119        // Layer 2: Process Events (all process families)
14120        builder.add_p2p_documents(
14121            &document_flows.purchase_orders,
14122            &document_flows.goods_receipts,
14123            &document_flows.vendor_invoices,
14124            &document_flows.payments,
14125        );
14126        builder.add_o2c_documents(
14127            &document_flows.sales_orders,
14128            &document_flows.deliveries,
14129            &document_flows.customer_invoices,
14130        );
14131        builder.add_s2c_documents(
14132            &sourcing.sourcing_projects,
14133            &sourcing.qualifications,
14134            &sourcing.rfx_events,
14135            &sourcing.bids,
14136            &sourcing.bid_evaluations,
14137            &sourcing.contracts,
14138        );
14139        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14140        builder.add_mfg_documents(
14141            &manufacturing.production_orders,
14142            &manufacturing.quality_inspections,
14143            &manufacturing.cycle_counts,
14144        );
14145        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14146        builder.add_audit_documents(
14147            &audit.engagements,
14148            &audit.workpapers,
14149            &audit.findings,
14150            &audit.evidence,
14151            &audit.risk_assessments,
14152            &audit.judgments,
14153            &audit.materiality_calculations,
14154            &audit.audit_opinions,
14155            &audit.going_concern_assessments,
14156        );
14157        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14158
14159        // OCPM events as hyperedges
14160        if let Some(ref event_log) = ocpm.event_log {
14161            builder.add_ocpm_events(event_log);
14162        }
14163
14164        // Compliance regulations as cross-layer nodes
14165        if self.config.compliance_regulations.enabled
14166            && hg_settings.governance_layer.include_controls
14167        {
14168            // Reconstruct ComplianceStandard objects from the registry
14169            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14170            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14171                .standard_records
14172                .iter()
14173                .filter_map(|r| {
14174                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14175                    registry.get(&sid).cloned()
14176                })
14177                .collect();
14178
14179            builder.add_compliance_regulations(
14180                &standards,
14181                &compliance.findings,
14182                &compliance.filings,
14183            );
14184        }
14185
14186        // Layer 3: Accounting Network
14187        builder.add_accounts(coa);
14188        builder.add_journal_entries_as_hyperedges(entries);
14189
14190        // Build the hypergraph
14191        let hypergraph = builder.build();
14192
14193        // Export
14194        let output_dir = self
14195            .output_path
14196            .clone()
14197            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14198        let hg_dir = output_dir
14199            .join(&self.config.graph_export.output_subdirectory)
14200            .join(&hg_settings.output_subdirectory);
14201
14202        // Branch on output format
14203        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14204            "unified" => {
14205                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14206                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14207                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14208                })?;
14209                (
14210                    metadata.num_nodes,
14211                    metadata.num_edges,
14212                    metadata.num_hyperedges,
14213                )
14214            }
14215            _ => {
14216                // "native" or any unrecognized format → use existing exporter
14217                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14218                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14219                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14220                })?;
14221                (
14222                    metadata.num_nodes,
14223                    metadata.num_edges,
14224                    metadata.num_hyperedges,
14225                )
14226            }
14227        };
14228
14229        // Stream to RustGraph ingest endpoint if configured
14230        #[cfg(feature = "streaming")]
14231        if let Some(ref target_url) = hg_settings.stream_target {
14232            use crate::stream_client::{StreamClient, StreamConfig};
14233            use std::io::Write as _;
14234
14235            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14236            let stream_config = StreamConfig {
14237                target_url: target_url.clone(),
14238                batch_size: hg_settings.stream_batch_size,
14239                api_key,
14240                ..StreamConfig::default()
14241            };
14242
14243            match StreamClient::new(stream_config) {
14244                Ok(mut client) => {
14245                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14246                    match exporter.export_to_writer(&hypergraph, &mut client) {
14247                        Ok(_) => {
14248                            if let Err(e) = client.flush() {
14249                                warn!("Failed to flush stream client: {}", e);
14250                            } else {
14251                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14252                            }
14253                        }
14254                        Err(e) => {
14255                            warn!("Streaming export failed: {}", e);
14256                        }
14257                    }
14258                }
14259                Err(e) => {
14260                    warn!("Failed to create stream client: {}", e);
14261                }
14262            }
14263        }
14264
14265        // Update stats
14266        stats.graph_node_count += num_nodes;
14267        stats.graph_edge_count += num_edges;
14268        stats.graph_export_count += 1;
14269
14270        Ok(HypergraphExportInfo {
14271            node_count: num_nodes,
14272            edge_count: num_edges,
14273            hyperedge_count: num_hyperedges,
14274            output_path: hg_dir,
14275        })
14276    }
14277
14278    /// Generate banking KYC/AML data.
14279    ///
14280    /// Creates banking customers, accounts, and transactions with AML typology injection.
14281    /// Uses the BankingOrchestrator from synth-banking crate.
14282    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14283        let pb = self.create_progress_bar(100, "Generating Banking Data");
14284
14285        // Build the banking orchestrator from config
14286        let orchestrator = BankingOrchestratorBuilder::new()
14287            .config(self.config.banking.clone())
14288            .seed(self.seed + 9000)
14289            .country_pack(self.primary_pack().clone())
14290            .build();
14291
14292        if let Some(pb) = &pb {
14293            pb.inc(10);
14294        }
14295
14296        // Generate the banking data
14297        let result = orchestrator.generate();
14298
14299        if let Some(pb) = &pb {
14300            pb.inc(90);
14301            pb.finish_with_message(format!(
14302                "Banking: {} customers, {} transactions",
14303                result.customers.len(),
14304                result.transactions.len()
14305            ));
14306        }
14307
14308        // Cross-reference banking customers with core master data so that
14309        // banking customer names align with the enterprise customer list.
14310        // We rotate through core customers, overlaying their name and country
14311        // onto the generated banking customers where possible.
14312        let mut banking_customers = result.customers;
14313        let core_customers = &self.master_data.customers;
14314        if !core_customers.is_empty() {
14315            for (i, bc) in banking_customers.iter_mut().enumerate() {
14316                let core = &core_customers[i % core_customers.len()];
14317                bc.name = CustomerName::business(&core.name);
14318                bc.residence_country = core.country.clone();
14319                bc.enterprise_customer_id = Some(core.customer_id.clone());
14320            }
14321            debug!(
14322                "Cross-referenced {} banking customers with {} core customers",
14323                banking_customers.len(),
14324                core_customers.len()
14325            );
14326        }
14327
14328        Ok(BankingSnapshot {
14329            customers: banking_customers,
14330            accounts: result.accounts,
14331            transactions: result.transactions,
14332            transaction_labels: result.transaction_labels,
14333            customer_labels: result.customer_labels,
14334            account_labels: result.account_labels,
14335            relationship_labels: result.relationship_labels,
14336            narratives: result.narratives,
14337            suspicious_count: result.stats.suspicious_count,
14338            scenario_count: result.scenarios.len(),
14339        })
14340    }
14341
14342    /// Calculate total transactions to generate.
14343    fn calculate_total_transactions(&self) -> u64 {
14344        let months = self.config.global.period_months as f64;
14345        self.config
14346            .companies
14347            .iter()
14348            .map(|c| {
14349                let annual = c.annual_transaction_volume.count() as f64;
14350                let weighted = annual * c.volume_weight;
14351                (weighted * months / 12.0) as u64
14352            })
14353            .sum()
14354    }
14355
14356    /// Create a progress bar if progress display is enabled.
14357    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14358        if !self.phase_config.show_progress {
14359            return None;
14360        }
14361
14362        let pb = if let Some(mp) = &self.multi_progress {
14363            mp.add(ProgressBar::new(total))
14364        } else {
14365            ProgressBar::new(total)
14366        };
14367
14368        pb.set_style(
14369            ProgressStyle::default_bar()
14370                .template(&format!(
14371                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14372                ))
14373                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14374                .progress_chars("#>-"),
14375        );
14376
14377        Some(pb)
14378    }
14379
14380    /// Get the generated chart of accounts.
14381    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14382        self.coa.clone()
14383    }
14384
14385    /// Get the generated master data.
14386    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14387        &self.master_data
14388    }
14389
14390    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14391    fn phase_compliance_regulations(
14392        &mut self,
14393        _stats: &mut EnhancedGenerationStatistics,
14394    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14395        if !self.phase_config.generate_compliance_regulations {
14396            return Ok(ComplianceRegulationsSnapshot::default());
14397        }
14398
14399        info!("Phase: Generating Compliance Regulations Data");
14400
14401        let cr_config = &self.config.compliance_regulations;
14402
14403        // Determine jurisdictions: from config or inferred from companies
14404        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14405            self.config
14406                .companies
14407                .iter()
14408                .map(|c| c.country.clone())
14409                .collect::<std::collections::HashSet<_>>()
14410                .into_iter()
14411                .collect()
14412        } else {
14413            cr_config.jurisdictions.clone()
14414        };
14415
14416        // Determine reference date
14417        let fallback_date =
14418            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14419        let reference_date = cr_config
14420            .reference_date
14421            .as_ref()
14422            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14423            .unwrap_or_else(|| {
14424                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14425                    .unwrap_or(fallback_date)
14426            });
14427
14428        // Generate standards registry data
14429        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14430        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14431        let cross_reference_records = reg_gen.generate_cross_reference_records();
14432        let jurisdiction_records =
14433            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14434
14435        info!(
14436            "  Standards: {} records, {} cross-references, {} jurisdictions",
14437            standard_records.len(),
14438            cross_reference_records.len(),
14439            jurisdiction_records.len()
14440        );
14441
14442        // Generate audit procedures (if enabled)
14443        let audit_procedures = if cr_config.audit_procedures.enabled {
14444            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14445                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14446                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14447                confidence_level: cr_config.audit_procedures.confidence_level,
14448                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14449            };
14450            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14451                self.seed + 9000,
14452                proc_config,
14453            );
14454            let registry = reg_gen.registry();
14455            let mut all_procs = Vec::new();
14456            for jurisdiction in &jurisdictions {
14457                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14458                all_procs.extend(procs);
14459            }
14460            info!("  Audit procedures: {}", all_procs.len());
14461            all_procs
14462        } else {
14463            Vec::new()
14464        };
14465
14466        // Generate compliance findings (if enabled)
14467        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14468            let finding_config =
14469                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14470                    finding_rate: cr_config.findings.finding_rate,
14471                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14472                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14473                    generate_remediation: cr_config.findings.generate_remediation,
14474                };
14475            let mut finding_gen =
14476                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14477                    self.seed + 9100,
14478                    finding_config,
14479                );
14480            let mut all_findings = Vec::new();
14481            for company in &self.config.companies {
14482                let company_findings =
14483                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14484                all_findings.extend(company_findings);
14485            }
14486            info!("  Compliance findings: {}", all_findings.len());
14487            all_findings
14488        } else {
14489            Vec::new()
14490        };
14491
14492        // Generate regulatory filings (if enabled)
14493        let filings = if cr_config.filings.enabled {
14494            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14495                filing_types: cr_config.filings.filing_types.clone(),
14496                generate_status_progression: cr_config.filings.generate_status_progression,
14497            };
14498            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14499                self.seed + 9200,
14500                filing_config,
14501            );
14502            let company_codes: Vec<String> = self
14503                .config
14504                .companies
14505                .iter()
14506                .map(|c| c.code.clone())
14507                .collect();
14508            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14509                .unwrap_or(fallback_date);
14510            let filings = filing_gen.generate_filings(
14511                &company_codes,
14512                &jurisdictions,
14513                start_date,
14514                self.config.global.period_months,
14515            );
14516            info!("  Regulatory filings: {}", filings.len());
14517            filings
14518        } else {
14519            Vec::new()
14520        };
14521
14522        // Build compliance graph (if enabled)
14523        let compliance_graph = if cr_config.graph.enabled {
14524            let graph_config = datasynth_graph::ComplianceGraphConfig {
14525                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14526                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14527                include_cross_references: cr_config.graph.include_cross_references,
14528                include_supersession_edges: cr_config.graph.include_supersession_edges,
14529                include_account_links: cr_config.graph.include_account_links,
14530                include_control_links: cr_config.graph.include_control_links,
14531                include_company_links: cr_config.graph.include_company_links,
14532            };
14533            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14534
14535            // Add standard nodes
14536            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14537                .iter()
14538                .map(|r| datasynth_graph::StandardNodeInput {
14539                    standard_id: r.standard_id.clone(),
14540                    title: r.title.clone(),
14541                    category: r.category.clone(),
14542                    domain: r.domain.clone(),
14543                    is_active: r.is_active,
14544                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14545                    applicable_account_types: r.applicable_account_types.clone(),
14546                    applicable_processes: r.applicable_processes.clone(),
14547                })
14548                .collect();
14549            builder.add_standards(&standard_inputs);
14550
14551            // Add jurisdiction nodes
14552            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14553                jurisdiction_records
14554                    .iter()
14555                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14556                        country_code: r.country_code.clone(),
14557                        country_name: r.country_name.clone(),
14558                        framework: r.accounting_framework.clone(),
14559                        standard_count: r.standard_count,
14560                        tax_rate: r.statutory_tax_rate,
14561                    })
14562                    .collect();
14563            builder.add_jurisdictions(&jurisdiction_inputs);
14564
14565            // Add cross-reference edges
14566            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14567                cross_reference_records
14568                    .iter()
14569                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14570                        from_standard: r.from_standard.clone(),
14571                        to_standard: r.to_standard.clone(),
14572                        relationship: r.relationship.clone(),
14573                        convergence_level: r.convergence_level,
14574                    })
14575                    .collect();
14576            builder.add_cross_references(&xref_inputs);
14577
14578            // Add jurisdiction→standard mappings
14579            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14580                .iter()
14581                .map(|r| datasynth_graph::JurisdictionMappingInput {
14582                    country_code: r.jurisdiction.clone(),
14583                    standard_id: r.standard_id.clone(),
14584                })
14585                .collect();
14586            builder.add_jurisdiction_mappings(&mapping_inputs);
14587
14588            // Add procedure nodes
14589            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14590                .iter()
14591                .map(|p| datasynth_graph::ProcedureNodeInput {
14592                    procedure_id: p.procedure_id.clone(),
14593                    standard_id: p.standard_id.clone(),
14594                    procedure_type: p.procedure_type.clone(),
14595                    sample_size: p.sample_size,
14596                    confidence_level: p.confidence_level,
14597                })
14598                .collect();
14599            builder.add_procedures(&proc_inputs);
14600
14601            // Add finding nodes
14602            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14603                .iter()
14604                .map(|f| datasynth_graph::FindingNodeInput {
14605                    finding_id: f.finding_id.to_string(),
14606                    standard_id: f
14607                        .related_standards
14608                        .first()
14609                        .map(|s| s.as_str().to_string())
14610                        .unwrap_or_default(),
14611                    severity: f.severity.to_string(),
14612                    deficiency_level: f.deficiency_level.to_string(),
14613                    severity_score: f.deficiency_level.severity_score(),
14614                    control_id: f.control_id.clone(),
14615                    affected_accounts: f.affected_accounts.clone(),
14616                })
14617                .collect();
14618            builder.add_findings(&finding_inputs);
14619
14620            // Cross-domain: link standards to accounts from chart of accounts
14621            if cr_config.graph.include_account_links {
14622                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14623                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14624                for std_record in &standard_records {
14625                    if let Some(std_obj) =
14626                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14627                            &std_record.standard_id,
14628                        ))
14629                    {
14630                        for acct_type in &std_obj.applicable_account_types {
14631                            account_links.push(datasynth_graph::AccountLinkInput {
14632                                standard_id: std_record.standard_id.clone(),
14633                                account_code: acct_type.clone(),
14634                                account_name: acct_type.clone(),
14635                            });
14636                        }
14637                    }
14638                }
14639                builder.add_account_links(&account_links);
14640            }
14641
14642            // Cross-domain: link standards to internal controls
14643            if cr_config.graph.include_control_links {
14644                let mut control_links = Vec::new();
14645                // SOX/PCAOB standards link to all controls
14646                let sox_like_ids: Vec<String> = standard_records
14647                    .iter()
14648                    .filter(|r| {
14649                        r.standard_id.starts_with("SOX")
14650                            || r.standard_id.starts_with("PCAOB-AS-2201")
14651                    })
14652                    .map(|r| r.standard_id.clone())
14653                    .collect();
14654                // Get control IDs from config (C001-C060 standard controls)
14655                let control_ids = [
14656                    ("C001", "Cash Controls"),
14657                    ("C002", "Large Transaction Approval"),
14658                    ("C010", "PO Approval"),
14659                    ("C011", "Three-Way Match"),
14660                    ("C020", "Revenue Recognition"),
14661                    ("C021", "Credit Check"),
14662                    ("C030", "Manual JE Approval"),
14663                    ("C031", "Period Close Review"),
14664                    ("C032", "Account Reconciliation"),
14665                    ("C040", "Payroll Processing"),
14666                    ("C050", "Fixed Asset Capitalization"),
14667                    ("C060", "Intercompany Elimination"),
14668                ];
14669                for sox_id in &sox_like_ids {
14670                    for (ctrl_id, ctrl_name) in &control_ids {
14671                        control_links.push(datasynth_graph::ControlLinkInput {
14672                            standard_id: sox_id.clone(),
14673                            control_id: ctrl_id.to_string(),
14674                            control_name: ctrl_name.to_string(),
14675                        });
14676                    }
14677                }
14678                builder.add_control_links(&control_links);
14679            }
14680
14681            // Cross-domain: filing nodes with company links
14682            if cr_config.graph.include_company_links {
14683                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14684                    .iter()
14685                    .enumerate()
14686                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
14687                        filing_id: format!("F{:04}", i + 1),
14688                        filing_type: f.filing_type.to_string(),
14689                        company_code: f.company_code.clone(),
14690                        jurisdiction: f.jurisdiction.clone(),
14691                        status: format!("{:?}", f.status),
14692                    })
14693                    .collect();
14694                builder.add_filings(&filing_inputs);
14695            }
14696
14697            let graph = builder.build();
14698            info!(
14699                "  Compliance graph: {} nodes, {} edges",
14700                graph.nodes.len(),
14701                graph.edges.len()
14702            );
14703            Some(graph)
14704        } else {
14705            None
14706        };
14707
14708        self.check_resources_with_log("post-compliance-regulations")?;
14709
14710        Ok(ComplianceRegulationsSnapshot {
14711            standard_records,
14712            cross_reference_records,
14713            jurisdiction_records,
14714            audit_procedures,
14715            findings,
14716            filings,
14717            compliance_graph,
14718        })
14719    }
14720
14721    /// Build a lineage graph describing config → phase → output relationships.
14722    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14723        use super::lineage::LineageGraphBuilder;
14724
14725        let mut builder = LineageGraphBuilder::new();
14726
14727        // Config sections
14728        builder.add_config_section("config:global", "Global Config");
14729        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14730        builder.add_config_section("config:transactions", "Transaction Config");
14731
14732        // Generator phases
14733        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14734        builder.add_generator_phase("phase:je", "Journal Entry Generation");
14735
14736        // Config → phase edges
14737        builder.configured_by("phase:coa", "config:chart_of_accounts");
14738        builder.configured_by("phase:je", "config:transactions");
14739
14740        // Output files
14741        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14742        builder.produced_by("output:je", "phase:je");
14743
14744        // Optional phases based on config
14745        if self.phase_config.generate_master_data {
14746            builder.add_config_section("config:master_data", "Master Data Config");
14747            builder.add_generator_phase("phase:master_data", "Master Data Generation");
14748            builder.configured_by("phase:master_data", "config:master_data");
14749            builder.input_to("phase:master_data", "phase:je");
14750        }
14751
14752        if self.phase_config.generate_document_flows {
14753            builder.add_config_section("config:document_flows", "Document Flow Config");
14754            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14755            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14756            builder.configured_by("phase:p2p", "config:document_flows");
14757            builder.configured_by("phase:o2c", "config:document_flows");
14758
14759            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14760            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14761            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14762            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14763            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14764
14765            builder.produced_by("output:po", "phase:p2p");
14766            builder.produced_by("output:gr", "phase:p2p");
14767            builder.produced_by("output:vi", "phase:p2p");
14768            builder.produced_by("output:so", "phase:o2c");
14769            builder.produced_by("output:ci", "phase:o2c");
14770        }
14771
14772        if self.phase_config.inject_anomalies {
14773            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14774            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14775            builder.configured_by("phase:anomaly", "config:fraud");
14776            builder.add_output_file(
14777                "output:labels",
14778                "Anomaly Labels",
14779                "labels/anomaly_labels.csv",
14780            );
14781            builder.produced_by("output:labels", "phase:anomaly");
14782        }
14783
14784        if self.phase_config.generate_audit {
14785            builder.add_config_section("config:audit", "Audit Config");
14786            builder.add_generator_phase("phase:audit", "Audit Data Generation");
14787            builder.configured_by("phase:audit", "config:audit");
14788        }
14789
14790        if self.phase_config.generate_banking {
14791            builder.add_config_section("config:banking", "Banking Config");
14792            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14793            builder.configured_by("phase:banking", "config:banking");
14794        }
14795
14796        if self.config.llm.enabled {
14797            builder.add_config_section("config:llm", "LLM Enrichment Config");
14798            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14799            builder.configured_by("phase:llm_enrichment", "config:llm");
14800        }
14801
14802        if self.config.diffusion.enabled {
14803            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14804            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14805            builder.configured_by("phase:diffusion", "config:diffusion");
14806        }
14807
14808        if self.config.causal.enabled {
14809            builder.add_config_section("config:causal", "Causal Generation Config");
14810            builder.add_generator_phase("phase:causal", "Causal Overlay");
14811            builder.configured_by("phase:causal", "config:causal");
14812        }
14813
14814        builder.build()
14815    }
14816
14817    // -----------------------------------------------------------------------
14818    // Trial-balance helpers used to replace hardcoded proxy values
14819    // -----------------------------------------------------------------------
14820
14821    /// Compute total revenue for a company from its journal entries.
14822    ///
14823    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
14824    /// net credits on all revenue-account lines filtered to `company_code`.
14825    fn compute_company_revenue(
14826        entries: &[JournalEntry],
14827        company_code: &str,
14828    ) -> rust_decimal::Decimal {
14829        use rust_decimal::Decimal;
14830        let mut revenue = Decimal::ZERO;
14831        for je in entries {
14832            if je.header.company_code != company_code {
14833                continue;
14834            }
14835            for line in &je.lines {
14836                if line.gl_account.starts_with('4') {
14837                    // Revenue is credit-normal
14838                    revenue += line.credit_amount - line.debit_amount;
14839                }
14840            }
14841        }
14842        revenue.max(Decimal::ZERO)
14843    }
14844
14845    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
14846    ///
14847    /// Asset accounts start with "1"; liability accounts start with "2".
14848    fn compute_entity_net_assets(
14849        entries: &[JournalEntry],
14850        entity_code: &str,
14851    ) -> rust_decimal::Decimal {
14852        use rust_decimal::Decimal;
14853        let mut asset_net = Decimal::ZERO;
14854        let mut liability_net = Decimal::ZERO;
14855        for je in entries {
14856            if je.header.company_code != entity_code {
14857                continue;
14858            }
14859            for line in &je.lines {
14860                if line.gl_account.starts_with('1') {
14861                    asset_net += line.debit_amount - line.credit_amount;
14862                } else if line.gl_account.starts_with('2') {
14863                    liability_net += line.credit_amount - line.debit_amount;
14864                }
14865            }
14866        }
14867        asset_net - liability_net
14868    }
14869
14870    /// v3.5.1+: Run the statistical validation suite configured in
14871    /// `distributions.validation.tests` over the final amount
14872    /// distribution.  Collects every non-zero line-level amount (debit +
14873    /// credit) and hands it to the runners in
14874    /// `datasynth_core::distributions::validation`.
14875    ///
14876    /// Returns `Ok(None)` when validation is disabled (the default).
14877    /// When `reporting.fail_on_error = true` and any test fails, returns
14878    /// `Err` with a concise message; otherwise attaches the report to
14879    /// the result and lets callers inspect it.
14880    fn phase_statistical_validation(
14881        &self,
14882        entries: &[JournalEntry],
14883    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14884        use datasynth_config::schema::StatisticalTestConfig;
14885        use datasynth_core::distributions::{
14886            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
14887            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
14888        };
14889        use rust_decimal::prelude::ToPrimitive;
14890
14891        let cfg = &self.config.distributions.validation;
14892        if !cfg.enabled {
14893            return Ok(None);
14894        }
14895
14896        // Collect per-line positive amounts (debit + credit is zero on the
14897        // non-posting side, so this naturally picks the magnitude).
14898        let amounts: Vec<rust_decimal::Decimal> = entries
14899            .iter()
14900            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14901            .filter(|a| *a > rust_decimal::Decimal::ZERO)
14902            .collect();
14903
14904        // v4.1.0+ paired (amount, line_count) per entry for correlation
14905        // checks. Amount per entry is the debit-side total (= credit-side
14906        // total for a balanced entry).
14907        let paired_amount_linecount: Vec<(f64, f64)> = entries
14908            .iter()
14909            .filter_map(|je| {
14910                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
14911                if amt > rust_decimal::Decimal::ZERO {
14912                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
14913                } else {
14914                    None
14915                }
14916            })
14917            .collect();
14918
14919        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14920        for test_cfg in &cfg.tests {
14921            match test_cfg {
14922                StatisticalTestConfig::BenfordFirstDigit {
14923                    threshold_mad,
14924                    warning_mad,
14925                } => {
14926                    results.push(run_benford_first_digit(
14927                        &amounts,
14928                        *threshold_mad,
14929                        *warning_mad,
14930                    ));
14931                }
14932                StatisticalTestConfig::ChiSquared { bins, significance } => {
14933                    results.push(run_chi_squared(&amounts, *bins, *significance));
14934                }
14935                StatisticalTestConfig::DistributionFit {
14936                    target: _,
14937                    ks_significance,
14938                    method: _,
14939                } => {
14940                    // v3.5.1+: log-uniformity KS check. Target-specific
14941                    // fits against Normal / Exponential land in v4.1.1+.
14942                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
14943                }
14944                StatisticalTestConfig::AndersonDarling {
14945                    target: _,
14946                    significance,
14947                } => {
14948                    // v4.1.0+: A*² statistic against log-normal on the
14949                    // log-scale. Other targets follow the same pattern.
14950                    results.push(run_anderson_darling(&amounts, *significance));
14951                }
14952                StatisticalTestConfig::CorrelationCheck {
14953                    expected_correlations,
14954                } => {
14955                    // v4.1.0+: (amount, line_count) is tracked today.
14956                    // Other pairs resolve to Skipped pending richer
14957                    // per-entry attribute collection.
14958                    if expected_correlations.is_empty() {
14959                        results.push(StatisticalTestResult {
14960                            name: "correlation_check".to_string(),
14961                            outcome: TestOutcome::Skipped,
14962                            statistic: 0.0,
14963                            threshold: 0.0,
14964                            message: "no expected correlations declared".to_string(),
14965                        });
14966                    } else {
14967                        for ec in expected_correlations {
14968                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
14969                            let is_amount_linecount = (ec.field1 == "amount"
14970                                && ec.field2 == "line_count")
14971                                || (ec.field1 == "line_count" && ec.field2 == "amount");
14972                            if is_amount_linecount {
14973                                let xs: Vec<f64> =
14974                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
14975                                let ys: Vec<f64> =
14976                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
14977                                results.push(run_correlation_check(
14978                                    &pair_key,
14979                                    &xs,
14980                                    &ys,
14981                                    ec.expected_r,
14982                                    ec.tolerance,
14983                                ));
14984                            } else {
14985                                results.push(StatisticalTestResult {
14986                                    name: format!("correlation_check_{pair_key}"),
14987                                    outcome: TestOutcome::Skipped,
14988                                    statistic: 0.0,
14989                                    threshold: ec.tolerance,
14990                                    message: format!(
14991                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
14992                                        ec.field1, ec.field2
14993                                    ),
14994                                });
14995                            }
14996                        }
14997                    }
14998                }
14999            }
15000        }
15001
15002        let report = StatisticalValidationReport {
15003            sample_count: amounts.len(),
15004            results,
15005        };
15006
15007        if cfg.reporting.fail_on_error && !report.all_passed() {
15008            let failed = report.failed_names().join(", ");
15009            return Err(SynthError::validation(format!(
15010                "statistical validation failed: {failed}"
15011            )));
15012        }
15013
15014        Ok(Some(report))
15015    }
15016
15017    /// v3.3.0: analytics-metadata phase.
15018    ///
15019    /// Runs AFTER all JE-adding phases (including Phase 20b's
15020    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15021    /// gated by an individual `analytics_metadata.<flag>` toggle:
15022    ///
15023    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15024    ///    current-period account balances.
15025    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15026    ///    configured `global.industry`.
15027    /// 3. `ManagementReportGenerator` — management-report artefacts.
15028    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15029    fn phase_analytics_metadata(
15030        &mut self,
15031        entries: &[JournalEntry],
15032    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15033        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15034        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15035        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15036        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15037        use std::collections::BTreeMap;
15038
15039        let mut snap = AnalyticsMetadataSnapshot::default();
15040
15041        if !self.phase_config.generate_analytics_metadata {
15042            return Ok(snap);
15043        }
15044
15045        let cfg = &self.config.analytics_metadata;
15046        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15047            .map(|d| d.year())
15048            .unwrap_or(2025);
15049
15050        // ---- 1. Prior-year comparatives ----
15051        if cfg.prior_year {
15052            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15053            for company in &self.config.companies {
15054                // Aggregate current-period balances per account code +
15055                // account name from the entries slice.
15056                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15057                    BTreeMap::new();
15058                for je in entries {
15059                    if je.header.company_code != company.code {
15060                        continue;
15061                    }
15062                    for line in &je.lines {
15063                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15064                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15065                        });
15066                        entry.1 += line.debit_amount - line.credit_amount;
15067                    }
15068                }
15069                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15070                    .into_iter()
15071                    .filter(|(_, (_, bal))| !bal.is_zero())
15072                    .map(|(code, (name, bal))| (code, name, bal))
15073                    .collect();
15074                if !current.is_empty() {
15075                    let comparatives =
15076                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15077                    snap.prior_year_comparatives.extend(comparatives);
15078                }
15079            }
15080            info!(
15081                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15082                snap.prior_year_comparatives.len(),
15083                self.config.companies.len()
15084            );
15085        }
15086
15087        // ---- 2. Industry benchmarks ----
15088        if cfg.industry_benchmark {
15089            use datasynth_core::models::IndustrySector;
15090            let industry = match self.config.global.industry {
15091                IndustrySector::Manufacturing => "manufacturing",
15092                IndustrySector::Retail => "retail",
15093                IndustrySector::FinancialServices => "financial_services",
15094                IndustrySector::Technology => "technology",
15095                IndustrySector::Healthcare => "healthcare",
15096                _ => "other",
15097            };
15098            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15099            let benchmarks = gen.generate(industry, fiscal_year);
15100            info!(
15101                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15102                benchmarks.len()
15103            );
15104            snap.industry_benchmarks = benchmarks;
15105        }
15106
15107        // ---- 3. Management reports ----
15108        if cfg.management_reports {
15109            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15110            let period_months = self.config.global.period_months;
15111            for company in &self.config.companies {
15112                let reports =
15113                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15114                snap.management_reports.extend(reports);
15115            }
15116            info!(
15117                "v3.3.0 analytics: {} management reports across {} companies",
15118                snap.management_reports.len(),
15119                self.config.companies.len()
15120            );
15121        }
15122
15123        // ---- 4. Drift-event labels ----
15124        if cfg.drift_events {
15125            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15126                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15127            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15128                .unwrap_or(fallback_start);
15129            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15130            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15131            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15132            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15133            snap.drift_events = drifts;
15134        }
15135        // `entries` parameter reserved for future JE-aware drift detection
15136        let _ = entries;
15137
15138        Ok(snap)
15139    }
15140}
15141
15142/// Get the directory name for a graph export format.
15143fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15144    match format {
15145        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15146        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15147        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15148        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15149        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15150    }
15151}
15152
15153/// Aggregate journal entry lines into per-account trial balance rows.
15154///
15155/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15156/// debit/credit totals and a net balance (debit minus credit).
15157fn compute_trial_balance_entries(
15158    entries: &[JournalEntry],
15159    entity_code: &str,
15160    fiscal_year: i32,
15161    coa: Option<&ChartOfAccounts>,
15162) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15163    use std::collections::BTreeMap;
15164
15165    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15166        BTreeMap::new();
15167
15168    for je in entries {
15169        for line in &je.lines {
15170            let entry = balances.entry(line.account_code.clone()).or_default();
15171            entry.0 += line.debit_amount;
15172            entry.1 += line.credit_amount;
15173        }
15174    }
15175
15176    balances
15177        .into_iter()
15178        .map(
15179            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15180                account_description: coa
15181                    .and_then(|c| c.get_account(&account_code))
15182                    .map(|a| a.description().to_string())
15183                    .unwrap_or_else(|| account_code.clone()),
15184                account_code,
15185                debit_balance: debit,
15186                credit_balance: credit,
15187                net_balance: debit - credit,
15188                entity_code: entity_code.to_string(),
15189                period: format!("FY{}", fiscal_year),
15190            },
15191        )
15192        .collect()
15193}
15194
15195#[cfg(test)]
15196#[allow(clippy::unwrap_used)]
15197mod tests {
15198    use super::*;
15199    use datasynth_config::schema::*;
15200
15201    fn create_test_config() -> GeneratorConfig {
15202        GeneratorConfig {
15203            global: GlobalConfig {
15204                industry: IndustrySector::Manufacturing,
15205                start_date: "2024-01-01".to_string(),
15206                period_months: 1,
15207                seed: Some(42),
15208                parallel: false,
15209                group_currency: "USD".to_string(),
15210                presentation_currency: None,
15211                worker_threads: 0,
15212                memory_limit_mb: 0,
15213                fiscal_year_months: None,
15214            },
15215            companies: vec![CompanyConfig {
15216                code: "1000".to_string(),
15217                name: "Test Company".to_string(),
15218                currency: "USD".to_string(),
15219                functional_currency: None,
15220                country: "US".to_string(),
15221                annual_transaction_volume: TransactionVolume::TenK,
15222                volume_weight: 1.0,
15223                fiscal_year_variant: "K4".to_string(),
15224            }],
15225            chart_of_accounts: ChartOfAccountsConfig {
15226                complexity: CoAComplexity::Small,
15227                industry_specific: true,
15228                custom_accounts: None,
15229                min_hierarchy_depth: 2,
15230                max_hierarchy_depth: 4,
15231            },
15232            transactions: TransactionConfig::default(),
15233            output: OutputConfig::default(),
15234            fraud: FraudConfig::default(),
15235            internal_controls: InternalControlsConfig::default(),
15236            business_processes: BusinessProcessConfig::default(),
15237            user_personas: UserPersonaConfig::default(),
15238            templates: TemplateConfig::default(),
15239            approval: ApprovalConfig::default(),
15240            departments: DepartmentConfig::default(),
15241            master_data: MasterDataConfig::default(),
15242            document_flows: DocumentFlowConfig::default(),
15243            intercompany: IntercompanyConfig::default(),
15244            balance: BalanceConfig::default(),
15245            ocpm: OcpmConfig::default(),
15246            audit: AuditGenerationConfig::default(),
15247            banking: datasynth_banking::BankingConfig::default(),
15248            data_quality: DataQualitySchemaConfig::default(),
15249            scenario: ScenarioConfig::default(),
15250            temporal: TemporalDriftConfig::default(),
15251            graph_export: GraphExportConfig::default(),
15252            streaming: StreamingSchemaConfig::default(),
15253            rate_limit: RateLimitSchemaConfig::default(),
15254            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15255            relationships: RelationshipSchemaConfig::default(),
15256            accounting_standards: AccountingStandardsConfig::default(),
15257            audit_standards: AuditStandardsConfig::default(),
15258            distributions: Default::default(),
15259            temporal_patterns: Default::default(),
15260            vendor_network: VendorNetworkSchemaConfig::default(),
15261            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15262            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15263            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15264            organizational_events: OrganizationalEventsSchemaConfig::default(),
15265            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15266            market_drift: MarketDriftSchemaConfig::default(),
15267            drift_labeling: DriftLabelingSchemaConfig::default(),
15268            anomaly_injection: Default::default(),
15269            industry_specific: Default::default(),
15270            fingerprint_privacy: Default::default(),
15271            quality_gates: Default::default(),
15272            compliance: Default::default(),
15273            webhooks: Default::default(),
15274            llm: Default::default(),
15275            diffusion: Default::default(),
15276            causal: Default::default(),
15277            source_to_pay: Default::default(),
15278            financial_reporting: Default::default(),
15279            hr: Default::default(),
15280            manufacturing: Default::default(),
15281            sales_quotes: Default::default(),
15282            tax: Default::default(),
15283            treasury: Default::default(),
15284            project_accounting: Default::default(),
15285            esg: Default::default(),
15286            country_packs: None,
15287            scenarios: Default::default(),
15288            session: Default::default(),
15289            compliance_regulations: Default::default(),
15290            analytics_metadata: Default::default(),
15291        }
15292    }
15293
15294    #[test]
15295    fn test_enhanced_orchestrator_creation() {
15296        let config = create_test_config();
15297        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15298        assert!(orchestrator.is_ok());
15299    }
15300
15301    #[test]
15302    fn test_minimal_generation() {
15303        let config = create_test_config();
15304        let phase_config = PhaseConfig {
15305            generate_master_data: false,
15306            generate_document_flows: false,
15307            generate_journal_entries: true,
15308            inject_anomalies: false,
15309            show_progress: false,
15310            ..Default::default()
15311        };
15312
15313        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15314        let result = orchestrator.generate();
15315
15316        assert!(result.is_ok());
15317        let result = result.unwrap();
15318        assert!(!result.journal_entries.is_empty());
15319    }
15320
15321    #[test]
15322    fn test_master_data_generation() {
15323        let config = create_test_config();
15324        let phase_config = PhaseConfig {
15325            generate_master_data: true,
15326            generate_document_flows: false,
15327            generate_journal_entries: false,
15328            inject_anomalies: false,
15329            show_progress: false,
15330            vendors_per_company: 5,
15331            customers_per_company: 5,
15332            materials_per_company: 10,
15333            assets_per_company: 5,
15334            employees_per_company: 10,
15335            ..Default::default()
15336        };
15337
15338        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15339        let result = orchestrator.generate().unwrap();
15340
15341        assert!(!result.master_data.vendors.is_empty());
15342        assert!(!result.master_data.customers.is_empty());
15343        assert!(!result.master_data.materials.is_empty());
15344    }
15345
15346    #[test]
15347    fn test_document_flow_generation() {
15348        let config = create_test_config();
15349        let phase_config = PhaseConfig {
15350            generate_master_data: true,
15351            generate_document_flows: true,
15352            generate_journal_entries: false,
15353            inject_anomalies: false,
15354            inject_data_quality: false,
15355            validate_balances: false,
15356            generate_ocpm_events: false,
15357            show_progress: false,
15358            vendors_per_company: 5,
15359            customers_per_company: 5,
15360            materials_per_company: 10,
15361            assets_per_company: 5,
15362            employees_per_company: 10,
15363            p2p_chains: 5,
15364            o2c_chains: 5,
15365            ..Default::default()
15366        };
15367
15368        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15369        let result = orchestrator.generate().unwrap();
15370
15371        // Should have generated P2P and O2C chains
15372        assert!(!result.document_flows.p2p_chains.is_empty());
15373        assert!(!result.document_flows.o2c_chains.is_empty());
15374
15375        // Flattened documents should be populated
15376        assert!(!result.document_flows.purchase_orders.is_empty());
15377        assert!(!result.document_flows.sales_orders.is_empty());
15378    }
15379
15380    #[test]
15381    fn test_anomaly_injection() {
15382        let config = create_test_config();
15383        let phase_config = PhaseConfig {
15384            generate_master_data: false,
15385            generate_document_flows: false,
15386            generate_journal_entries: true,
15387            inject_anomalies: true,
15388            show_progress: false,
15389            ..Default::default()
15390        };
15391
15392        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15393        let result = orchestrator.generate().unwrap();
15394
15395        // Should have journal entries
15396        assert!(!result.journal_entries.is_empty());
15397
15398        // With ~833 entries and 2% rate, expect some anomalies
15399        // Note: This is probabilistic, so we just verify the structure exists
15400        assert!(result.anomaly_labels.summary.is_some());
15401    }
15402
15403    #[test]
15404    fn test_full_generation_pipeline() {
15405        let config = create_test_config();
15406        let phase_config = PhaseConfig {
15407            generate_master_data: true,
15408            generate_document_flows: true,
15409            generate_journal_entries: true,
15410            inject_anomalies: false,
15411            inject_data_quality: false,
15412            validate_balances: true,
15413            generate_ocpm_events: false,
15414            show_progress: false,
15415            vendors_per_company: 3,
15416            customers_per_company: 3,
15417            materials_per_company: 5,
15418            assets_per_company: 3,
15419            employees_per_company: 5,
15420            p2p_chains: 3,
15421            o2c_chains: 3,
15422            ..Default::default()
15423        };
15424
15425        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15426        let result = orchestrator.generate().unwrap();
15427
15428        // All phases should have results
15429        assert!(!result.master_data.vendors.is_empty());
15430        assert!(!result.master_data.customers.is_empty());
15431        assert!(!result.document_flows.p2p_chains.is_empty());
15432        assert!(!result.document_flows.o2c_chains.is_empty());
15433        assert!(!result.journal_entries.is_empty());
15434        assert!(result.statistics.accounts_count > 0);
15435
15436        // Subledger linking should have run
15437        assert!(!result.subledger.ap_invoices.is_empty());
15438        assert!(!result.subledger.ar_invoices.is_empty());
15439
15440        // Balance validation should have run
15441        assert!(result.balance_validation.validated);
15442        assert!(result.balance_validation.entries_processed > 0);
15443    }
15444
15445    #[test]
15446    fn test_subledger_linking() {
15447        let config = create_test_config();
15448        let phase_config = PhaseConfig {
15449            generate_master_data: true,
15450            generate_document_flows: true,
15451            generate_journal_entries: false,
15452            inject_anomalies: false,
15453            inject_data_quality: false,
15454            validate_balances: false,
15455            generate_ocpm_events: false,
15456            show_progress: false,
15457            vendors_per_company: 5,
15458            customers_per_company: 5,
15459            materials_per_company: 10,
15460            assets_per_company: 3,
15461            employees_per_company: 5,
15462            p2p_chains: 5,
15463            o2c_chains: 5,
15464            ..Default::default()
15465        };
15466
15467        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15468        let result = orchestrator.generate().unwrap();
15469
15470        // Should have document flows
15471        assert!(!result.document_flows.vendor_invoices.is_empty());
15472        assert!(!result.document_flows.customer_invoices.is_empty());
15473
15474        // Subledger should be linked from document flows
15475        assert!(!result.subledger.ap_invoices.is_empty());
15476        assert!(!result.subledger.ar_invoices.is_empty());
15477
15478        // AP invoices count should match vendor invoices count
15479        assert_eq!(
15480            result.subledger.ap_invoices.len(),
15481            result.document_flows.vendor_invoices.len()
15482        );
15483
15484        // AR invoices count should match customer invoices count
15485        assert_eq!(
15486            result.subledger.ar_invoices.len(),
15487            result.document_flows.customer_invoices.len()
15488        );
15489
15490        // Statistics should reflect subledger counts
15491        assert_eq!(
15492            result.statistics.ap_invoice_count,
15493            result.subledger.ap_invoices.len()
15494        );
15495        assert_eq!(
15496            result.statistics.ar_invoice_count,
15497            result.subledger.ar_invoices.len()
15498        );
15499    }
15500
15501    #[test]
15502    fn test_balance_validation() {
15503        let config = create_test_config();
15504        let phase_config = PhaseConfig {
15505            generate_master_data: false,
15506            generate_document_flows: false,
15507            generate_journal_entries: true,
15508            inject_anomalies: false,
15509            validate_balances: true,
15510            show_progress: false,
15511            ..Default::default()
15512        };
15513
15514        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15515        let result = orchestrator.generate().unwrap();
15516
15517        // Balance validation should run
15518        assert!(result.balance_validation.validated);
15519        assert!(result.balance_validation.entries_processed > 0);
15520
15521        // Generated JEs should be balanced (no unbalanced entries)
15522        assert!(!result.balance_validation.has_unbalanced_entries);
15523
15524        // Total debits should equal total credits
15525        assert_eq!(
15526            result.balance_validation.total_debits,
15527            result.balance_validation.total_credits
15528        );
15529    }
15530
15531    #[test]
15532    fn test_statistics_accuracy() {
15533        let config = create_test_config();
15534        let phase_config = PhaseConfig {
15535            generate_master_data: true,
15536            generate_document_flows: false,
15537            generate_journal_entries: true,
15538            inject_anomalies: false,
15539            show_progress: false,
15540            vendors_per_company: 10,
15541            customers_per_company: 20,
15542            materials_per_company: 15,
15543            assets_per_company: 5,
15544            employees_per_company: 8,
15545            ..Default::default()
15546        };
15547
15548        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15549        let result = orchestrator.generate().unwrap();
15550
15551        // Statistics should match actual data
15552        assert_eq!(
15553            result.statistics.vendor_count,
15554            result.master_data.vendors.len()
15555        );
15556        assert_eq!(
15557            result.statistics.customer_count,
15558            result.master_data.customers.len()
15559        );
15560        assert_eq!(
15561            result.statistics.material_count,
15562            result.master_data.materials.len()
15563        );
15564        assert_eq!(
15565            result.statistics.total_entries as usize,
15566            result.journal_entries.len()
15567        );
15568    }
15569
15570    #[test]
15571    fn test_phase_config_defaults() {
15572        let config = PhaseConfig::default();
15573        assert!(config.generate_master_data);
15574        assert!(config.generate_document_flows);
15575        assert!(config.generate_journal_entries);
15576        assert!(!config.inject_anomalies);
15577        assert!(config.validate_balances);
15578        assert!(config.show_progress);
15579        assert!(config.vendors_per_company > 0);
15580        assert!(config.customers_per_company > 0);
15581    }
15582
15583    #[test]
15584    fn test_get_coa_before_generation() {
15585        let config = create_test_config();
15586        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15587
15588        // Before generation, CoA should be None
15589        assert!(orchestrator.get_coa().is_none());
15590    }
15591
15592    #[test]
15593    fn test_get_coa_after_generation() {
15594        let config = create_test_config();
15595        let phase_config = PhaseConfig {
15596            generate_master_data: false,
15597            generate_document_flows: false,
15598            generate_journal_entries: true,
15599            inject_anomalies: false,
15600            show_progress: false,
15601            ..Default::default()
15602        };
15603
15604        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15605        let _ = orchestrator.generate().unwrap();
15606
15607        // After generation, CoA should be available
15608        assert!(orchestrator.get_coa().is_some());
15609    }
15610
15611    #[test]
15612    fn test_get_master_data() {
15613        let config = create_test_config();
15614        let phase_config = PhaseConfig {
15615            generate_master_data: true,
15616            generate_document_flows: false,
15617            generate_journal_entries: false,
15618            inject_anomalies: false,
15619            show_progress: false,
15620            vendors_per_company: 5,
15621            customers_per_company: 5,
15622            materials_per_company: 5,
15623            assets_per_company: 5,
15624            employees_per_company: 5,
15625            ..Default::default()
15626        };
15627
15628        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15629        let result = orchestrator.generate().unwrap();
15630
15631        // After generate(), master_data is moved into the result
15632        assert!(!result.master_data.vendors.is_empty());
15633    }
15634
15635    #[test]
15636    fn test_with_progress_builder() {
15637        let config = create_test_config();
15638        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15639            .unwrap()
15640            .with_progress(false);
15641
15642        // Should still work without progress
15643        assert!(!orchestrator.phase_config.show_progress);
15644    }
15645
15646    #[test]
15647    fn test_multi_company_generation() {
15648        let mut config = create_test_config();
15649        config.companies.push(CompanyConfig {
15650            code: "2000".to_string(),
15651            name: "Subsidiary".to_string(),
15652            currency: "EUR".to_string(),
15653            functional_currency: None,
15654            country: "DE".to_string(),
15655            annual_transaction_volume: TransactionVolume::TenK,
15656            volume_weight: 0.5,
15657            fiscal_year_variant: "K4".to_string(),
15658        });
15659
15660        let phase_config = PhaseConfig {
15661            generate_master_data: true,
15662            generate_document_flows: false,
15663            generate_journal_entries: true,
15664            inject_anomalies: false,
15665            show_progress: false,
15666            vendors_per_company: 5,
15667            customers_per_company: 5,
15668            materials_per_company: 5,
15669            assets_per_company: 5,
15670            employees_per_company: 5,
15671            ..Default::default()
15672        };
15673
15674        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15675        let result = orchestrator.generate().unwrap();
15676
15677        // Should have master data for both companies
15678        assert!(result.statistics.vendor_count >= 10); // 5 per company
15679        assert!(result.statistics.customer_count >= 10);
15680        assert!(result.statistics.companies_count == 2);
15681    }
15682
15683    #[test]
15684    fn test_empty_master_data_skips_document_flows() {
15685        let config = create_test_config();
15686        let phase_config = PhaseConfig {
15687            generate_master_data: false,   // Skip master data
15688            generate_document_flows: true, // Try to generate flows
15689            generate_journal_entries: false,
15690            inject_anomalies: false,
15691            show_progress: false,
15692            ..Default::default()
15693        };
15694
15695        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15696        let result = orchestrator.generate().unwrap();
15697
15698        // Without master data, document flows should be empty
15699        assert!(result.document_flows.p2p_chains.is_empty());
15700        assert!(result.document_flows.o2c_chains.is_empty());
15701    }
15702
15703    #[test]
15704    fn test_journal_entry_line_item_count() {
15705        let config = create_test_config();
15706        let phase_config = PhaseConfig {
15707            generate_master_data: false,
15708            generate_document_flows: false,
15709            generate_journal_entries: true,
15710            inject_anomalies: false,
15711            show_progress: false,
15712            ..Default::default()
15713        };
15714
15715        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15716        let result = orchestrator.generate().unwrap();
15717
15718        // Total line items should match sum of all entry line counts
15719        let calculated_line_items: u64 = result
15720            .journal_entries
15721            .iter()
15722            .map(|e| e.line_count() as u64)
15723            .sum();
15724        assert_eq!(result.statistics.total_line_items, calculated_line_items);
15725    }
15726
15727    #[test]
15728    fn test_audit_generation() {
15729        let config = create_test_config();
15730        let phase_config = PhaseConfig {
15731            generate_master_data: false,
15732            generate_document_flows: false,
15733            generate_journal_entries: true,
15734            inject_anomalies: false,
15735            show_progress: false,
15736            generate_audit: true,
15737            audit_engagements: 2,
15738            workpapers_per_engagement: 5,
15739            evidence_per_workpaper: 2,
15740            risks_per_engagement: 3,
15741            findings_per_engagement: 2,
15742            judgments_per_engagement: 2,
15743            ..Default::default()
15744        };
15745
15746        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15747        let result = orchestrator.generate().unwrap();
15748
15749        // Should have generated audit data
15750        assert_eq!(result.audit.engagements.len(), 2);
15751        assert!(!result.audit.workpapers.is_empty());
15752        assert!(!result.audit.evidence.is_empty());
15753        assert!(!result.audit.risk_assessments.is_empty());
15754        assert!(!result.audit.findings.is_empty());
15755        assert!(!result.audit.judgments.is_empty());
15756
15757        // New ISA entity collections should also be populated
15758        assert!(
15759            !result.audit.confirmations.is_empty(),
15760            "ISA 505 confirmations should be generated"
15761        );
15762        assert!(
15763            !result.audit.confirmation_responses.is_empty(),
15764            "ISA 505 confirmation responses should be generated"
15765        );
15766        assert!(
15767            !result.audit.procedure_steps.is_empty(),
15768            "ISA 330 procedure steps should be generated"
15769        );
15770        // Samples may or may not be generated depending on workpaper sampling methods
15771        assert!(
15772            !result.audit.analytical_results.is_empty(),
15773            "ISA 520 analytical procedures should be generated"
15774        );
15775        assert!(
15776            !result.audit.ia_functions.is_empty(),
15777            "ISA 610 IA functions should be generated (one per engagement)"
15778        );
15779        assert!(
15780            !result.audit.related_parties.is_empty(),
15781            "ISA 550 related parties should be generated"
15782        );
15783
15784        // Statistics should match
15785        assert_eq!(
15786            result.statistics.audit_engagement_count,
15787            result.audit.engagements.len()
15788        );
15789        assert_eq!(
15790            result.statistics.audit_workpaper_count,
15791            result.audit.workpapers.len()
15792        );
15793        assert_eq!(
15794            result.statistics.audit_evidence_count,
15795            result.audit.evidence.len()
15796        );
15797        assert_eq!(
15798            result.statistics.audit_risk_count,
15799            result.audit.risk_assessments.len()
15800        );
15801        assert_eq!(
15802            result.statistics.audit_finding_count,
15803            result.audit.findings.len()
15804        );
15805        assert_eq!(
15806            result.statistics.audit_judgment_count,
15807            result.audit.judgments.len()
15808        );
15809        assert_eq!(
15810            result.statistics.audit_confirmation_count,
15811            result.audit.confirmations.len()
15812        );
15813        assert_eq!(
15814            result.statistics.audit_confirmation_response_count,
15815            result.audit.confirmation_responses.len()
15816        );
15817        assert_eq!(
15818            result.statistics.audit_procedure_step_count,
15819            result.audit.procedure_steps.len()
15820        );
15821        assert_eq!(
15822            result.statistics.audit_sample_count,
15823            result.audit.samples.len()
15824        );
15825        assert_eq!(
15826            result.statistics.audit_analytical_result_count,
15827            result.audit.analytical_results.len()
15828        );
15829        assert_eq!(
15830            result.statistics.audit_ia_function_count,
15831            result.audit.ia_functions.len()
15832        );
15833        assert_eq!(
15834            result.statistics.audit_ia_report_count,
15835            result.audit.ia_reports.len()
15836        );
15837        assert_eq!(
15838            result.statistics.audit_related_party_count,
15839            result.audit.related_parties.len()
15840        );
15841        assert_eq!(
15842            result.statistics.audit_related_party_transaction_count,
15843            result.audit.related_party_transactions.len()
15844        );
15845    }
15846
15847    #[test]
15848    fn test_new_phases_disabled_by_default() {
15849        let config = create_test_config();
15850        // Verify new config fields default to disabled
15851        assert!(!config.llm.enabled);
15852        assert!(!config.diffusion.enabled);
15853        assert!(!config.causal.enabled);
15854
15855        let phase_config = PhaseConfig {
15856            generate_master_data: false,
15857            generate_document_flows: false,
15858            generate_journal_entries: true,
15859            inject_anomalies: false,
15860            show_progress: false,
15861            ..Default::default()
15862        };
15863
15864        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15865        let result = orchestrator.generate().unwrap();
15866
15867        // All new phase statistics should be zero when disabled
15868        assert_eq!(result.statistics.llm_enrichment_ms, 0);
15869        assert_eq!(result.statistics.llm_vendors_enriched, 0);
15870        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15871        assert_eq!(result.statistics.diffusion_samples_generated, 0);
15872        assert_eq!(result.statistics.causal_generation_ms, 0);
15873        assert_eq!(result.statistics.causal_samples_generated, 0);
15874        assert!(result.statistics.causal_validation_passed.is_none());
15875        assert_eq!(result.statistics.counterfactual_pair_count, 0);
15876        assert!(result.counterfactual_pairs.is_empty());
15877    }
15878
15879    #[test]
15880    fn test_counterfactual_generation_enabled() {
15881        let config = create_test_config();
15882        let phase_config = PhaseConfig {
15883            generate_master_data: false,
15884            generate_document_flows: false,
15885            generate_journal_entries: true,
15886            inject_anomalies: false,
15887            show_progress: false,
15888            generate_counterfactuals: true,
15889            generate_period_close: false, // Disable so entry count matches counterfactual pairs
15890            ..Default::default()
15891        };
15892
15893        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15894        let result = orchestrator.generate().unwrap();
15895
15896        // With JE generation enabled, counterfactual pairs should be generated
15897        if !result.journal_entries.is_empty() {
15898            assert_eq!(
15899                result.counterfactual_pairs.len(),
15900                result.journal_entries.len()
15901            );
15902            assert_eq!(
15903                result.statistics.counterfactual_pair_count,
15904                result.journal_entries.len()
15905            );
15906            // Each pair should have a distinct pair_id
15907            let ids: std::collections::HashSet<_> = result
15908                .counterfactual_pairs
15909                .iter()
15910                .map(|p| p.pair_id.clone())
15911                .collect();
15912            assert_eq!(ids.len(), result.counterfactual_pairs.len());
15913        }
15914    }
15915
15916    #[test]
15917    fn test_llm_enrichment_enabled() {
15918        let mut config = create_test_config();
15919        config.llm.enabled = true;
15920        config.llm.max_vendor_enrichments = 3;
15921
15922        let phase_config = PhaseConfig {
15923            generate_master_data: true,
15924            generate_document_flows: false,
15925            generate_journal_entries: false,
15926            inject_anomalies: false,
15927            show_progress: false,
15928            vendors_per_company: 5,
15929            customers_per_company: 3,
15930            materials_per_company: 3,
15931            assets_per_company: 3,
15932            employees_per_company: 3,
15933            ..Default::default()
15934        };
15935
15936        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15937        let result = orchestrator.generate().unwrap();
15938
15939        // LLM enrichment should have run
15940        assert!(result.statistics.llm_vendors_enriched > 0);
15941        assert!(result.statistics.llm_vendors_enriched <= 3);
15942    }
15943
15944    #[test]
15945    fn test_diffusion_enhancement_enabled() {
15946        let mut config = create_test_config();
15947        config.diffusion.enabled = true;
15948        config.diffusion.n_steps = 50;
15949        config.diffusion.sample_size = 20;
15950
15951        let phase_config = PhaseConfig {
15952            generate_master_data: false,
15953            generate_document_flows: false,
15954            generate_journal_entries: true,
15955            inject_anomalies: false,
15956            show_progress: false,
15957            ..Default::default()
15958        };
15959
15960        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15961        let result = orchestrator.generate().unwrap();
15962
15963        // Diffusion phase should have generated samples
15964        assert_eq!(result.statistics.diffusion_samples_generated, 20);
15965    }
15966
15967    #[test]
15968    fn test_causal_overlay_enabled() {
15969        let mut config = create_test_config();
15970        config.causal.enabled = true;
15971        config.causal.template = "fraud_detection".to_string();
15972        config.causal.sample_size = 100;
15973        config.causal.validate = true;
15974
15975        let phase_config = PhaseConfig {
15976            generate_master_data: false,
15977            generate_document_flows: false,
15978            generate_journal_entries: true,
15979            inject_anomalies: false,
15980            show_progress: false,
15981            ..Default::default()
15982        };
15983
15984        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15985        let result = orchestrator.generate().unwrap();
15986
15987        // Causal phase should have generated samples
15988        assert_eq!(result.statistics.causal_samples_generated, 100);
15989        // Validation should have run
15990        assert!(result.statistics.causal_validation_passed.is_some());
15991    }
15992
15993    #[test]
15994    fn test_causal_overlay_revenue_cycle_template() {
15995        let mut config = create_test_config();
15996        config.causal.enabled = true;
15997        config.causal.template = "revenue_cycle".to_string();
15998        config.causal.sample_size = 50;
15999        config.causal.validate = false;
16000
16001        let phase_config = PhaseConfig {
16002            generate_master_data: false,
16003            generate_document_flows: false,
16004            generate_journal_entries: true,
16005            inject_anomalies: false,
16006            show_progress: false,
16007            ..Default::default()
16008        };
16009
16010        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16011        let result = orchestrator.generate().unwrap();
16012
16013        // Causal phase should have generated samples
16014        assert_eq!(result.statistics.causal_samples_generated, 50);
16015        // Validation was disabled
16016        assert!(result.statistics.causal_validation_passed.is_none());
16017    }
16018
16019    #[test]
16020    fn test_all_new_phases_enabled_together() {
16021        let mut config = create_test_config();
16022        config.llm.enabled = true;
16023        config.llm.max_vendor_enrichments = 2;
16024        config.diffusion.enabled = true;
16025        config.diffusion.n_steps = 20;
16026        config.diffusion.sample_size = 10;
16027        config.causal.enabled = true;
16028        config.causal.sample_size = 50;
16029        config.causal.validate = true;
16030
16031        let phase_config = PhaseConfig {
16032            generate_master_data: true,
16033            generate_document_flows: false,
16034            generate_journal_entries: true,
16035            inject_anomalies: false,
16036            show_progress: false,
16037            vendors_per_company: 5,
16038            customers_per_company: 3,
16039            materials_per_company: 3,
16040            assets_per_company: 3,
16041            employees_per_company: 3,
16042            ..Default::default()
16043        };
16044
16045        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16046        let result = orchestrator.generate().unwrap();
16047
16048        // All three phases should have run
16049        assert!(result.statistics.llm_vendors_enriched > 0);
16050        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16051        assert_eq!(result.statistics.causal_samples_generated, 50);
16052        assert!(result.statistics.causal_validation_passed.is_some());
16053    }
16054
16055    #[test]
16056    fn test_statistics_serialization_with_new_fields() {
16057        let stats = EnhancedGenerationStatistics {
16058            total_entries: 100,
16059            total_line_items: 500,
16060            llm_enrichment_ms: 42,
16061            llm_vendors_enriched: 10,
16062            diffusion_enhancement_ms: 100,
16063            diffusion_samples_generated: 50,
16064            causal_generation_ms: 200,
16065            causal_samples_generated: 100,
16066            causal_validation_passed: Some(true),
16067            ..Default::default()
16068        };
16069
16070        let json = serde_json::to_string(&stats).unwrap();
16071        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16072
16073        assert_eq!(deserialized.llm_enrichment_ms, 42);
16074        assert_eq!(deserialized.llm_vendors_enriched, 10);
16075        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16076        assert_eq!(deserialized.diffusion_samples_generated, 50);
16077        assert_eq!(deserialized.causal_generation_ms, 200);
16078        assert_eq!(deserialized.causal_samples_generated, 100);
16079        assert_eq!(deserialized.causal_validation_passed, Some(true));
16080    }
16081
16082    #[test]
16083    fn test_statistics_backward_compat_deserialization() {
16084        // Old JSON without the new fields should still deserialize
16085        let old_json = r#"{
16086            "total_entries": 100,
16087            "total_line_items": 500,
16088            "accounts_count": 50,
16089            "companies_count": 1,
16090            "period_months": 12,
16091            "vendor_count": 10,
16092            "customer_count": 20,
16093            "material_count": 15,
16094            "asset_count": 5,
16095            "employee_count": 8,
16096            "p2p_chain_count": 5,
16097            "o2c_chain_count": 5,
16098            "ap_invoice_count": 5,
16099            "ar_invoice_count": 5,
16100            "ocpm_event_count": 0,
16101            "ocpm_object_count": 0,
16102            "ocpm_case_count": 0,
16103            "audit_engagement_count": 0,
16104            "audit_workpaper_count": 0,
16105            "audit_evidence_count": 0,
16106            "audit_risk_count": 0,
16107            "audit_finding_count": 0,
16108            "audit_judgment_count": 0,
16109            "anomalies_injected": 0,
16110            "data_quality_issues": 0,
16111            "banking_customer_count": 0,
16112            "banking_account_count": 0,
16113            "banking_transaction_count": 0,
16114            "banking_suspicious_count": 0,
16115            "graph_export_count": 0,
16116            "graph_node_count": 0,
16117            "graph_edge_count": 0
16118        }"#;
16119
16120        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16121
16122        // New fields should default to 0 / None
16123        assert_eq!(stats.llm_enrichment_ms, 0);
16124        assert_eq!(stats.llm_vendors_enriched, 0);
16125        assert_eq!(stats.diffusion_enhancement_ms, 0);
16126        assert_eq!(stats.diffusion_samples_generated, 0);
16127        assert_eq!(stats.causal_generation_ms, 0);
16128        assert_eq!(stats.causal_samples_generated, 0);
16129        assert!(stats.causal_validation_passed.is_none());
16130    }
16131}