Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output (builder pattern).
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Set a streaming phase sink on an existing orchestrator.
1497    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498        self.phase_sink = Some(sink);
1499    }
1500
1501    /// Emit a batch of items to the phase sink (if configured).
1502    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503        if let Some(ref sink) = self.phase_sink {
1504            for item in items {
1505                if let Ok(value) = serde_json::to_value(item) {
1506                    if let Err(e) = sink.emit(phase, type_name, &value) {
1507                        warn!(
1508                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509                        );
1510                    }
1511                }
1512            }
1513            if let Err(e) = sink.phase_complete(phase) {
1514                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515            }
1516        }
1517    }
1518
1519    /// Enable/disable progress bars.
1520    pub fn with_progress(mut self, show: bool) -> Self {
1521        self.phase_config.show_progress = show;
1522        if show {
1523            self.multi_progress = Some(MultiProgress::new());
1524        }
1525        self
1526    }
1527
1528    /// Set the output path for disk space monitoring.
1529    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530        let path = path.into();
1531        self.output_path = Some(path.clone());
1532        // Rebuild resource guard with the output path
1533        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534        self
1535    }
1536
1537    /// Access the country pack registry.
1538    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539        &self.country_pack_registry
1540    }
1541
1542    /// Look up a country pack by country code string.
1543    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544        self.country_pack_registry.get_by_str(country)
1545    }
1546
1547    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1548    /// company, defaulting to `"US"` if no companies are configured.
1549    fn primary_country_code(&self) -> &str {
1550        self.config
1551            .companies
1552            .first()
1553            .map(|c| c.country.as_str())
1554            .unwrap_or("US")
1555    }
1556
1557    /// Resolve the country pack for the primary (first) company.
1558    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559        self.country_pack_for(self.primary_country_code())
1560    }
1561
1562    /// Resolve the CoA framework from config/country-pack.
1563    fn resolve_coa_framework(&self) -> CoAFramework {
1564        if self.config.accounting_standards.enabled {
1565            match self.config.accounting_standards.framework {
1566                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567                    return CoAFramework::FrenchPcg;
1568                }
1569                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570                    return CoAFramework::GermanSkr04;
1571                }
1572                _ => {}
1573            }
1574        }
1575        // Fallback: derive from country pack
1576        let pack = self.primary_pack();
1577        match pack.accounting.framework.as_str() {
1578            "french_gaap" => CoAFramework::FrenchPcg,
1579            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580            _ => CoAFramework::UsGaap,
1581        }
1582    }
1583
1584    /// Check if copula generators are available.
1585    ///
1586    /// Returns true if the orchestrator has copula generators for preserving
1587    /// correlations (typically from fingerprint-based generation).
1588    pub fn has_copulas(&self) -> bool {
1589        !self.copula_generators.is_empty()
1590    }
1591
1592    /// Get the copula generators.
1593    ///
1594    /// Returns a reference to the copula generators for use during generation.
1595    /// These can be used to generate correlated samples that preserve the
1596    /// statistical relationships from the source data.
1597    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598        &self.copula_generators
1599    }
1600
1601    /// Get a mutable reference to the copula generators.
1602    ///
1603    /// Allows generators to sample from copulas during data generation.
1604    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605        &mut self.copula_generators
1606    }
1607
1608    /// Sample correlated values from a named copula.
1609    ///
1610    /// Returns None if the copula doesn't exist.
1611    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612        self.copula_generators
1613            .iter_mut()
1614            .find(|c| c.name == copula_name)
1615            .map(|c| c.generator.sample())
1616    }
1617
1618    /// Create an orchestrator from a fingerprint file.
1619    ///
1620    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1621    /// and creates an orchestrator configured to generate data matching
1622    /// the statistical properties of the original data.
1623    ///
1624    /// # Arguments
1625    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1626    /// * `phase_config` - Phase configuration for generation
1627    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1628    ///
1629    /// # Example
1630    /// ```no_run
1631    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1632    /// use std::path::Path;
1633    ///
1634    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1635    ///     Path::new("fingerprint.dsf"),
1636    ///     PhaseConfig::default(),
1637    ///     1.0,
1638    /// ).unwrap();
1639    /// ```
1640    pub fn from_fingerprint(
1641        fingerprint_path: &std::path::Path,
1642        phase_config: PhaseConfig,
1643        scale: f64,
1644    ) -> SynthResult<Self> {
1645        info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647        // Read the fingerprint
1648        let reader = FingerprintReader::new();
1649        let fingerprint = reader
1650            .read_from_file(fingerprint_path)
1651            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654    }
1655
1656    /// Create an orchestrator from a loaded fingerprint.
1657    ///
1658    /// # Arguments
1659    /// * `fingerprint` - The loaded fingerprint
1660    /// * `phase_config` - Phase configuration for generation
1661    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1662    pub fn from_fingerprint_data(
1663        fingerprint: Fingerprint,
1664        phase_config: PhaseConfig,
1665        scale: f64,
1666    ) -> SynthResult<Self> {
1667        info!(
1668            "Synthesizing config from fingerprint (version: {}, tables: {})",
1669            fingerprint.manifest.version,
1670            fingerprint.schema.tables.len()
1671        );
1672
1673        // Generate a seed for the synthesis
1674        let seed: u64 = rand::random();
1675        info!("Fingerprint synthesis seed: {}", seed);
1676
1677        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1678        let options = SynthesisOptions {
1679            scale,
1680            seed: Some(seed),
1681            preserve_correlations: true,
1682            inject_anomalies: true,
1683        };
1684        let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686        // Synthesize full result including copula generators
1687        let synthesis_result = synthesizer
1688            .synthesize_full(&fingerprint, seed)
1689            .map_err(|e| {
1690                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691            })?;
1692
1693        // Start with a base config from the fingerprint's industry if available
1694        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695            Self::base_config_for_industry(industry)
1696        } else {
1697            Self::base_config_for_industry("manufacturing")
1698        };
1699
1700        // Apply the synthesized patches
1701        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703        // Log synthesis results
1704        info!(
1705            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706            fingerprint.schema.tables.len(),
1707            scale,
1708            synthesis_result.copula_generators.len()
1709        );
1710
1711        if !synthesis_result.copula_generators.is_empty() {
1712            for spec in &synthesis_result.copula_generators {
1713                info!(
1714                    "  Copula '{}' for table '{}': {} columns",
1715                    spec.name,
1716                    spec.table,
1717                    spec.columns.len()
1718                );
1719            }
1720        }
1721
1722        // Create the orchestrator with the synthesized config
1723        let mut orchestrator = Self::new(config, phase_config)?;
1724
1725        // Store copula generators for use during generation
1726        orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728        Ok(orchestrator)
1729    }
1730
1731    /// Create a base config for a given industry.
1732    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733        use datasynth_config::presets::create_preset;
1734        use datasynth_config::TransactionVolume;
1735        use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737        let sector = match industry.to_lowercase().as_str() {
1738            "manufacturing" => IndustrySector::Manufacturing,
1739            "retail" => IndustrySector::Retail,
1740            "financial" | "financial_services" => IndustrySector::FinancialServices,
1741            "healthcare" => IndustrySector::Healthcare,
1742            "technology" | "tech" => IndustrySector::Technology,
1743            _ => IndustrySector::Manufacturing,
1744        };
1745
1746        // Create a preset with reasonable defaults
1747        create_preset(
1748            sector,
1749            1,  // company count
1750            12, // period months
1751            CoAComplexity::Medium,
1752            TransactionVolume::TenK,
1753        )
1754    }
1755
1756    /// Apply a config patch to a GeneratorConfig.
1757    fn apply_config_patch(
1758        mut config: GeneratorConfig,
1759        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760    ) -> GeneratorConfig {
1761        use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763        for (key, value) in patch.values() {
1764            match (key.as_str(), value) {
1765                // Transaction count is handled via TransactionVolume enum on companies
1766                // Log it but cannot directly set it (would need to modify company volumes)
1767                ("transactions.count", ConfigValue::Integer(n)) => {
1768                    info!(
1769                        "Fingerprint suggests {} transactions (apply via company volumes)",
1770                        n
1771                    );
1772                }
1773                ("global.period_months", ConfigValue::Integer(n)) => {
1774                    config.global.period_months = (*n).clamp(1, 120) as u32;
1775                }
1776                ("global.start_date", ConfigValue::String(s)) => {
1777                    config.global.start_date = s.clone();
1778                }
1779                ("global.seed", ConfigValue::Integer(n)) => {
1780                    config.global.seed = Some(*n as u64);
1781                }
1782                ("fraud.enabled", ConfigValue::Bool(b)) => {
1783                    config.fraud.enabled = *b;
1784                }
1785                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786                    config.fraud.fraud_rate = *f;
1787                }
1788                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789                    config.data_quality.enabled = *b;
1790                }
1791                // Handle anomaly injection paths (mapped to fraud config)
1792                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                _ => {
1799                    debug!("Ignoring unknown config patch key: {}", key);
1800                }
1801            }
1802        }
1803
1804        config
1805    }
1806
1807    /// Build a resource guard from the configuration.
1808    fn build_resource_guard(
1809        config: &GeneratorConfig,
1810        output_path: Option<PathBuf>,
1811    ) -> ResourceGuard {
1812        let mut builder = ResourceGuardBuilder::new();
1813
1814        // Configure memory limit if set
1815        if config.global.memory_limit_mb > 0 {
1816            builder = builder.memory_limit(config.global.memory_limit_mb);
1817        }
1818
1819        // Configure disk monitoring for output path
1820        if let Some(path) = output_path {
1821            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1822        }
1823
1824        // Use conservative degradation settings for production safety
1825        builder = builder.conservative();
1826
1827        builder.build()
1828    }
1829
1830    /// Check resources (memory, disk, CPU) and return degradation level.
1831    ///
1832    /// Returns an error if hard limits are exceeded.
1833    /// Returns Ok(DegradationLevel) indicating current resource state.
1834    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835        self.resource_guard.check()
1836    }
1837
1838    /// Check resources with logging.
1839    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840        let level = self.resource_guard.check()?;
1841
1842        if level != DegradationLevel::Normal {
1843            warn!(
1844                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845                phase,
1846                level,
1847                self.resource_guard.current_memory_mb(),
1848                self.resource_guard.available_disk_mb()
1849            );
1850        }
1851
1852        Ok(level)
1853    }
1854
1855    /// Get current degradation actions based on resource state.
1856    fn get_degradation_actions(&self) -> DegradationActions {
1857        self.resource_guard.get_actions()
1858    }
1859
1860    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1861    fn check_memory_limit(&self) -> SynthResult<()> {
1862        self.check_resources()?;
1863        Ok(())
1864    }
1865
1866    /// Run the complete generation workflow.
1867    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868        info!("Starting enhanced generation workflow");
1869        info!(
1870            "Config: industry={:?}, period_months={}, companies={}",
1871            self.config.global.industry,
1872            self.config.global.period_months,
1873            self.config.companies.len()
1874        );
1875
1876        // Set decimal serialization mode (thread-local, affects JSON output).
1877        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1878        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879        datasynth_core::serde_decimal::set_numeric_native(is_native);
1880        struct NumericModeGuard;
1881        impl Drop for NumericModeGuard {
1882            fn drop(&mut self) {
1883                datasynth_core::serde_decimal::set_numeric_native(false);
1884            }
1885        }
1886        let _numeric_guard = if is_native {
1887            Some(NumericModeGuard)
1888        } else {
1889            None
1890        };
1891
1892        // Initial resource check before starting
1893        let initial_level = self.check_resources_with_log("initial")?;
1894        if initial_level == DegradationLevel::Emergency {
1895            return Err(SynthError::resource(
1896                "Insufficient resources to start generation",
1897            ));
1898        }
1899
1900        let mut stats = EnhancedGenerationStatistics {
1901            companies_count: self.config.companies.len(),
1902            period_months: self.config.global.period_months,
1903            ..Default::default()
1904        };
1905
1906        // Phase 1: Chart of Accounts
1907        let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909        // Phase 2: Master Data
1910        self.phase_master_data(&mut stats)?;
1911
1912        // Emit master data to stream sink
1913        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917        // Phase 3: Document Flows + Subledger Linking
1918        let (mut document_flows, mut subledger, fa_journal_entries) =
1919            self.phase_document_flows(&mut stats)?;
1920
1921        // Emit document flows to stream sink
1922        self.emit_phase_items(
1923            "document_flows",
1924            "PurchaseOrder",
1925            &document_flows.purchase_orders,
1926        );
1927        self.emit_phase_items(
1928            "document_flows",
1929            "GoodsReceipt",
1930            &document_flows.goods_receipts,
1931        );
1932        self.emit_phase_items(
1933            "document_flows",
1934            "VendorInvoice",
1935            &document_flows.vendor_invoices,
1936        );
1937        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940        // Phase 3b: Opening Balances (before JE generation)
1941        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943        // Phase 3c: Convert opening balances to journal entries and prepend them.
1944        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1945        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1946        // balance map type.
1947        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948            .iter()
1949            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950            .collect();
1951        if !opening_balance_jes.is_empty() {
1952            debug!(
1953                "Prepending {} opening balance JEs to entries",
1954                opening_balance_jes.len()
1955            );
1956        }
1957
1958        // Phase 4: Journal Entries
1959        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1962        // starts from the correct initial state.
1963        if !opening_balance_jes.is_empty() {
1964            let mut combined = opening_balance_jes;
1965            combined.extend(entries);
1966            entries = combined;
1967        }
1968
1969        // Phase 4c: Append FA acquisition journal entries to main entries
1970        if !fa_journal_entries.is_empty() {
1971            debug!(
1972                "Appending {} FA acquisition JEs to main entries",
1973                fa_journal_entries.len()
1974            );
1975            entries.extend(fa_journal_entries);
1976        }
1977
1978        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1979        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981        // Get current degradation actions for optional phases
1982        let actions = self.get_degradation_actions();
1983
1984        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1985        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1988        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1989        if !sourcing.contracts.is_empty() {
1990            let mut linked_count = 0usize;
1991            // Collect (vendor_id, po_id) pairs from P2P chains
1992            let po_vendor_pairs: Vec<(String, String)> = document_flows
1993                .p2p_chains
1994                .iter()
1995                .map(|chain| {
1996                    (
1997                        chain.purchase_order.vendor_id.clone(),
1998                        chain.purchase_order.header.document_id.clone(),
1999                    )
2000                })
2001                .collect();
2002
2003            for chain in &mut document_flows.p2p_chains {
2004                if chain.purchase_order.contract_id.is_none() {
2005                    if let Some(contract) = sourcing
2006                        .contracts
2007                        .iter()
2008                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009                    {
2010                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011                        linked_count += 1;
2012                    }
2013                }
2014            }
2015
2016            // Populate reverse FK: purchase_order_ids on each contract
2017            for contract in &mut sourcing.contracts {
2018                let po_ids: Vec<String> = po_vendor_pairs
2019                    .iter()
2020                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021                    .map(|(_, po_id)| po_id.clone())
2022                    .collect();
2023                if !po_ids.is_empty() {
2024                    contract.purchase_order_ids = po_ids;
2025                }
2026            }
2027
2028            if linked_count > 0 {
2029                debug!(
2030                    "Linked {} purchase orders to S2C contracts by vendor match",
2031                    linked_count
2032                );
2033            }
2034        }
2035
2036        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2037        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039        // Phase 5c: Append IC journal entries to main entries
2040        if !intercompany.seller_journal_entries.is_empty()
2041            || !intercompany.buyer_journal_entries.is_empty()
2042        {
2043            let ic_je_count = intercompany.seller_journal_entries.len()
2044                + intercompany.buyer_journal_entries.len();
2045            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047            debug!(
2048                "Appended {} IC journal entries to main entries",
2049                ic_je_count
2050            );
2051        }
2052
2053        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2054        if !intercompany.elimination_entries.is_empty() {
2055            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056                &intercompany.elimination_entries,
2057            );
2058            if !elim_jes.is_empty() {
2059                debug!(
2060                    "Appended {} elimination journal entries to main entries",
2061                    elim_jes.len()
2062                );
2063                // IC elimination net-zero assertion (v2.5 hardening)
2064                let elim_debit: rust_decimal::Decimal =
2065                    elim_jes.iter().map(|je| je.total_debit()).sum();
2066                let elim_credit: rust_decimal::Decimal =
2067                    elim_jes.iter().map(|je| je.total_credit()).sum();
2068                let elim_diff = (elim_debit - elim_credit).abs();
2069                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2070                if elim_diff > tolerance {
2071                    return Err(datasynth_core::error::SynthError::generation(format!(
2072                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2073                        elim_debit, elim_credit, elim_diff, tolerance
2074                    )));
2075                }
2076                debug!(
2077                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2078                    elim_debit, elim_credit, elim_diff
2079                );
2080                entries.extend(elim_jes);
2081            }
2082        }
2083
2084        // Phase 5e: Wire IC source documents into document flow snapshot
2085        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2086            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2087                document_flows
2088                    .customer_invoices
2089                    .extend(ic_docs.seller_invoices.iter().cloned());
2090                document_flows
2091                    .purchase_orders
2092                    .extend(ic_docs.buyer_orders.iter().cloned());
2093                document_flows
2094                    .goods_receipts
2095                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2096                document_flows
2097                    .vendor_invoices
2098                    .extend(ic_docs.buyer_invoices.iter().cloned());
2099                debug!(
2100                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2101                    ic_docs.seller_invoices.len(),
2102                    ic_docs.buyer_orders.len(),
2103                    ic_docs.buyer_goods_receipts.len(),
2104                    ic_docs.buyer_invoices.len(),
2105                );
2106            }
2107        }
2108
2109        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2110        let hr = self.phase_hr_data(&mut stats)?;
2111
2112        // Phase 6b: Generate JEs from payroll runs
2113        if !hr.payroll_runs.is_empty() {
2114            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2115            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2116            entries.extend(payroll_jes);
2117        }
2118
2119        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2120        if !hr.pension_journal_entries.is_empty() {
2121            debug!(
2122                "Generated {} JEs from pension plans",
2123                hr.pension_journal_entries.len()
2124            );
2125            entries.extend(hr.pension_journal_entries.iter().cloned());
2126        }
2127
2128        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2129        if !hr.stock_comp_journal_entries.is_empty() {
2130            debug!(
2131                "Generated {} JEs from stock-based compensation",
2132                hr.stock_comp_journal_entries.len()
2133            );
2134            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2135        }
2136
2137        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2138        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2139
2140        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2141        if !manufacturing_snap.production_orders.is_empty() {
2142            let currency = self
2143                .config
2144                .companies
2145                .first()
2146                .map(|c| c.currency.as_str())
2147                .unwrap_or("USD");
2148            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2149                &manufacturing_snap.production_orders,
2150                &manufacturing_snap.quality_inspections,
2151                currency,
2152            );
2153            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2154            entries.extend(mfg_jes);
2155        }
2156
2157        // Phase 7a-warranty: Generate warranty provisions per company
2158        if !manufacturing_snap.quality_inspections.is_empty() {
2159            let framework = match self.config.accounting_standards.framework {
2160                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2161                _ => "US_GAAP",
2162            };
2163            for company in &self.config.companies {
2164                let company_orders: Vec<_> = manufacturing_snap
2165                    .production_orders
2166                    .iter()
2167                    .filter(|o| o.company_code == company.code)
2168                    .cloned()
2169                    .collect();
2170                let company_inspections: Vec<_> = manufacturing_snap
2171                    .quality_inspections
2172                    .iter()
2173                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2174                    .cloned()
2175                    .collect();
2176                if company_inspections.is_empty() {
2177                    continue;
2178                }
2179                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2180                let warranty_result = warranty_gen.generate(
2181                    &company.code,
2182                    &company_orders,
2183                    &company_inspections,
2184                    &company.currency,
2185                    framework,
2186                );
2187                if !warranty_result.journal_entries.is_empty() {
2188                    debug!(
2189                        "Generated {} warranty provision JEs for {}",
2190                        warranty_result.journal_entries.len(),
2191                        company.code
2192                    );
2193                    entries.extend(warranty_result.journal_entries);
2194                }
2195            }
2196        }
2197
2198        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2199        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2200        {
2201            let cogs_currency = self
2202                .config
2203                .companies
2204                .first()
2205                .map(|c| c.currency.as_str())
2206                .unwrap_or("USD");
2207            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2208                &document_flows.deliveries,
2209                &manufacturing_snap.production_orders,
2210                cogs_currency,
2211            );
2212            if !cogs_jes.is_empty() {
2213                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2214                entries.extend(cogs_jes);
2215            }
2216        }
2217
2218        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2219        //
2220        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2221        // subledger inventory positions.  Here we reconcile them so that position balances
2222        // reflect the actual stock movements within the generation period.
2223        if !manufacturing_snap.inventory_movements.is_empty()
2224            && !subledger.inventory_positions.is_empty()
2225        {
2226            use datasynth_core::models::MovementType as MfgMovementType;
2227            let mut receipt_count = 0usize;
2228            let mut issue_count = 0usize;
2229            for movement in &manufacturing_snap.inventory_movements {
2230                // Find a matching position by material code and company
2231                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2232                    p.material_id == movement.material_code
2233                        && p.company_code == movement.entity_code
2234                }) {
2235                    match movement.movement_type {
2236                        MfgMovementType::GoodsReceipt => {
2237                            // Increase stock and update weighted-average cost
2238                            pos.add_quantity(
2239                                movement.quantity,
2240                                movement.value,
2241                                movement.movement_date,
2242                            );
2243                            receipt_count += 1;
2244                        }
2245                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2246                            // Decrease stock (best-effort; silently skip if insufficient)
2247                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2248                            issue_count += 1;
2249                        }
2250                        _ => {}
2251                    }
2252                }
2253            }
2254            debug!(
2255                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2256                manufacturing_snap.inventory_movements.len(),
2257                receipt_count,
2258                issue_count,
2259            );
2260        }
2261
2262        // Update final entry/line-item stats after all JE-generating phases
2263        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2264        if !entries.is_empty() {
2265            stats.total_entries = entries.len() as u64;
2266            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2267            debug!(
2268                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2269                stats.total_entries, stats.total_line_items
2270            );
2271        }
2272
2273        // Phase 7b: Apply internal controls to journal entries
2274        if self.config.internal_controls.enabled && !entries.is_empty() {
2275            info!("Phase 7b: Applying internal controls to journal entries");
2276            let control_config = ControlGeneratorConfig {
2277                exception_rate: self.config.internal_controls.exception_rate,
2278                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2279                enable_sox_marking: true,
2280                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2281                    self.config.internal_controls.sox_materiality_threshold,
2282                )
2283                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2284                ..Default::default()
2285            };
2286            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2287            for entry in &mut entries {
2288                control_gen.apply_controls(entry, &coa);
2289            }
2290            let with_controls = entries
2291                .iter()
2292                .filter(|e| !e.header.control_ids.is_empty())
2293                .count();
2294            info!(
2295                "Applied controls to {} entries ({} with control IDs assigned)",
2296                entries.len(),
2297                with_controls
2298            );
2299        }
2300
2301        // Phase 7c: Extract SoD violations from annotated journal entries.
2302        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2303        // Here we materialise those flags into standalone SodViolation records.
2304        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2305            .iter()
2306            .filter(|e| e.header.sod_violation)
2307            .filter_map(|e| {
2308                e.header.sod_conflict_type.map(|ct| {
2309                    use datasynth_core::models::{RiskLevel, SodViolation};
2310                    let severity = match ct {
2311                        datasynth_core::models::SodConflictType::PaymentReleaser
2312                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2313                            RiskLevel::Critical
2314                        }
2315                        datasynth_core::models::SodConflictType::PreparerApprover
2316                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2317                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2318                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2319                            RiskLevel::High
2320                        }
2321                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2322                            RiskLevel::Medium
2323                        }
2324                    };
2325                    let action = format!(
2326                        "SoD conflict {:?} on entry {} ({})",
2327                        ct, e.header.document_id, e.header.company_code
2328                    );
2329                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2330                })
2331            })
2332            .collect();
2333        if !sod_violations.is_empty() {
2334            info!(
2335                "Phase 7c: Extracted {} SoD violations from {} entries",
2336                sod_violations.len(),
2337                entries.len()
2338            );
2339        }
2340
2341        // Emit journal entries to stream sink (after all JE-generating phases)
2342        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2343
2344        // Phase 8: Anomaly Injection (after all JE-generating phases)
2345        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2346
2347        // Emit anomaly labels to stream sink
2348        self.emit_phase_items(
2349            "anomaly_injection",
2350            "LabeledAnomaly",
2351            &anomaly_labels.labels,
2352        );
2353
2354        // Propagate fraud labels from journal entries to source documents.
2355        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2356        // instead of tracing through document_references.json.
2357        {
2358            use std::collections::HashMap;
2359            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2360            //
2361            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2362            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2363            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2364            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2365            // we register BOTH the prefixed form (raw reference) AND the bare form
2366            // (post-colon portion) in the map. Also register the JE's document_id
2367            // UUID so documents that set `journal_entry_id` match via that path.
2368            //
2369            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2370            // looked up "foo", silently producing 0 propagations.
2371            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2372            for je in &entries {
2373                if je.header.is_fraud {
2374                    if let Some(ref fraud_type) = je.header.fraud_type {
2375                        if let Some(ref reference) = je.header.reference {
2376                            // Register the full reference ("GR:PO-2024-000001")
2377                            fraud_map.insert(reference.clone(), *fraud_type);
2378                            // Also register the bare document ID ("PO-2024-000001")
2379                            // by stripping the "PREFIX:" if present.
2380                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2381                                if !bare.is_empty() {
2382                                    fraud_map.insert(bare.to_string(), *fraud_type);
2383                                }
2384                            }
2385                        }
2386                        // Also tag via journal_entry_id on document headers
2387                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2388                    }
2389                }
2390            }
2391            if !fraud_map.is_empty() {
2392                let mut propagated = 0usize;
2393                // Use DocumentHeader::propagate_fraud method for each doc type
2394                macro_rules! propagate_to {
2395                    ($collection:expr) => {
2396                        for doc in &mut $collection {
2397                            if doc.header.propagate_fraud(&fraud_map) {
2398                                propagated += 1;
2399                            }
2400                        }
2401                    };
2402                }
2403                propagate_to!(document_flows.purchase_orders);
2404                propagate_to!(document_flows.goods_receipts);
2405                propagate_to!(document_flows.vendor_invoices);
2406                propagate_to!(document_flows.payments);
2407                propagate_to!(document_flows.sales_orders);
2408                propagate_to!(document_flows.deliveries);
2409                propagate_to!(document_flows.customer_invoices);
2410                if propagated > 0 {
2411                    info!(
2412                        "Propagated fraud labels to {} document flow records",
2413                        propagated
2414                    );
2415                }
2416            }
2417        }
2418
2419        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2420        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2421
2422        // Emit red flags to stream sink
2423        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2424
2425        // Phase 26b: Collusion Ring Generation (after red flags)
2426        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2427
2428        // Emit collusion rings to stream sink
2429        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2430
2431        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2432        let balance_validation = self.phase_balance_validation(&entries)?;
2433
2434        // Phase 9b: GL-to-Subledger Reconciliation
2435        let subledger_reconciliation =
2436            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2437
2438        // Phase 10: Data Quality Injection
2439        let (data_quality_stats, quality_issues) =
2440            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2441
2442        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2443        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2444
2445        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2446        {
2447            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2448
2449            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2450            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2451            let mut unbalanced_clean = 0usize;
2452            for je in &entries {
2453                if je.header.is_fraud || je.header.is_anomaly {
2454                    continue;
2455                }
2456                let diff = (je.total_debit() - je.total_credit()).abs();
2457                if diff > tolerance {
2458                    unbalanced_clean += 1;
2459                    if unbalanced_clean <= 3 {
2460                        warn!(
2461                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2462                            je.header.document_id,
2463                            je.total_debit(),
2464                            je.total_credit(),
2465                            diff
2466                        );
2467                    }
2468                }
2469            }
2470            if unbalanced_clean > 0 {
2471                return Err(datasynth_core::error::SynthError::generation(format!(
2472                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2473                     First few logged above. Tolerance={}",
2474                    unbalanced_clean, tolerance
2475                )));
2476            }
2477            debug!(
2478                "Phase 10c: All {} non-anomaly JEs individually balanced",
2479                entries
2480                    .iter()
2481                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2482                    .count()
2483            );
2484
2485            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2486            let company_codes: Vec<String> = self
2487                .config
2488                .companies
2489                .iter()
2490                .map(|c| c.code.clone())
2491                .collect();
2492            for company_code in &company_codes {
2493                let mut assets = rust_decimal::Decimal::ZERO;
2494                let mut liab_equity = rust_decimal::Decimal::ZERO;
2495
2496                for entry in &entries {
2497                    if entry.header.company_code != *company_code {
2498                        continue;
2499                    }
2500                    for line in &entry.lines {
2501                        let acct = &line.gl_account;
2502                        let net = line.debit_amount - line.credit_amount;
2503                        // Asset accounts (1xxx): normal debit balance
2504                        if acct.starts_with('1') {
2505                            assets += net;
2506                        }
2507                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2508                        else if acct.starts_with('2') || acct.starts_with('3') {
2509                            liab_equity -= net; // credit-normal, so negate debit-net
2510                        }
2511                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2512                        // so they net to zero after closing entries
2513                    }
2514                }
2515
2516                let bs_diff = (assets - liab_equity).abs();
2517                if bs_diff > tolerance {
2518                    warn!(
2519                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2520                         revenue/expense closing entries may not fully offset",
2521                        company_code, assets, liab_equity, bs_diff
2522                    );
2523                    // Warn rather than error: multi-period datasets may have timing
2524                    // differences from accruals/deferrals that resolve in later periods.
2525                    // The TB footing check (Assert 1) is the hard gate.
2526                } else {
2527                    debug!(
2528                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2529                        company_code, assets, liab_equity, bs_diff
2530                    );
2531                }
2532            }
2533
2534            info!("Phase 10c: All generation-time accounting assertions passed");
2535        }
2536
2537        // Phase 11: Audit Data
2538        let audit = self.phase_audit_data(&entries, &mut stats)?;
2539
2540        // Phase 12: Banking KYC/AML Data
2541        let mut banking = self.phase_banking_data(&mut stats)?;
2542
2543        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2544        // Creates coherence between the accounting layer (payments, JEs) and the
2545        // banking layer (bank transactions). A vendor invoice payment now appears
2546        // on both sides with cross-references and fraud labels propagated.
2547        if self.phase_config.generate_banking
2548            && !document_flows.payments.is_empty()
2549            && !banking.accounts.is_empty()
2550        {
2551            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2552            if bridge_rate > 0.0 {
2553                let mut bridge =
2554                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2555                        self.seed,
2556                    );
2557                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2558                    &document_flows.payments,
2559                    &banking.customers,
2560                    &banking.accounts,
2561                    bridge_rate,
2562                );
2563                info!(
2564                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2565                    bridge_stats.bridged_count,
2566                    bridge_stats.transactions_emitted,
2567                    bridge_stats.fraud_propagated,
2568                );
2569                let bridged_count = bridged_txns.len();
2570                banking.transactions.extend(bridged_txns);
2571
2572                // Re-run velocity computation so bridged txns also get features
2573                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2574                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2575                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2576                        &mut banking.transactions,
2577                    );
2578                }
2579
2580                // Recompute suspicious count after bridging
2581                banking.suspicious_count = banking
2582                    .transactions
2583                    .iter()
2584                    .filter(|t| t.is_suspicious)
2585                    .count();
2586                stats.banking_transaction_count = banking.transactions.len();
2587                stats.banking_suspicious_count = banking.suspicious_count;
2588            }
2589        }
2590
2591        // Phase 13: Graph Export
2592        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2593
2594        // Phase 14: LLM Enrichment
2595        self.phase_llm_enrichment(&mut stats);
2596
2597        // Phase 15: Diffusion Enhancement
2598        self.phase_diffusion_enhancement(&mut stats);
2599
2600        // Phase 16: Causal Overlay
2601        self.phase_causal_overlay(&mut stats);
2602
2603        // Phase 17: Bank Reconciliation + Financial Statements
2604        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2605        // provision data (from accounting_standards / tax snapshots) can be wired in.
2606        let mut financial_reporting = self.phase_financial_reporting(
2607            &document_flows,
2608            &entries,
2609            &coa,
2610            &hr,
2611            &audit,
2612            &mut stats,
2613        )?;
2614
2615        // BS coherence check: assets = liabilities + equity
2616        {
2617            use datasynth_core::models::StatementType;
2618            for stmt in &financial_reporting.consolidated_statements {
2619                if stmt.statement_type == StatementType::BalanceSheet {
2620                    let total_assets: rust_decimal::Decimal = stmt
2621                        .line_items
2622                        .iter()
2623                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2624                        .map(|li| li.amount)
2625                        .sum();
2626                    let total_le: rust_decimal::Decimal = stmt
2627                        .line_items
2628                        .iter()
2629                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2630                        .map(|li| li.amount)
2631                        .sum();
2632                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2633                        warn!(
2634                            "BS equation imbalance: assets={}, L+E={}",
2635                            total_assets, total_le
2636                        );
2637                    }
2638                }
2639            }
2640        }
2641
2642        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2643        let accounting_standards =
2644            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2645
2646        // Phase 18a: Merge ECL journal entries into main GL
2647        if !accounting_standards.ecl_journal_entries.is_empty() {
2648            debug!(
2649                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2650                accounting_standards.ecl_journal_entries.len()
2651            );
2652            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2653        }
2654
2655        // Phase 18a: Merge provision journal entries into main GL
2656        if !accounting_standards.provision_journal_entries.is_empty() {
2657            debug!(
2658                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2659                accounting_standards.provision_journal_entries.len()
2660            );
2661            entries.extend(
2662                accounting_standards
2663                    .provision_journal_entries
2664                    .iter()
2665                    .cloned(),
2666            );
2667        }
2668
2669        // Phase 18b: OCPM Events (after all process data is available)
2670        let ocpm = self.phase_ocpm_events(
2671            &document_flows,
2672            &sourcing,
2673            &hr,
2674            &manufacturing_snap,
2675            &banking,
2676            &audit,
2677            &financial_reporting,
2678            &mut stats,
2679        )?;
2680
2681        // Emit OCPM events to stream sink
2682        if let Some(ref event_log) = ocpm.event_log {
2683            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2684        }
2685
2686        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
2687        if let Some(ref event_log) = ocpm.event_log {
2688            // Build reverse index: document_ref → (event_id, case_id, object_ids)
2689            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2690                std::collections::HashMap::new();
2691            for (idx, event) in event_log.events.iter().enumerate() {
2692                if let Some(ref doc_ref) = event.document_ref {
2693                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2694                }
2695            }
2696
2697            if !doc_index.is_empty() {
2698                let mut annotated = 0usize;
2699                for entry in &mut entries {
2700                    let doc_id_str = entry.header.document_id.to_string();
2701                    // Collect matching event indices from document_id and reference
2702                    let mut matched_indices: Vec<usize> = Vec::new();
2703                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2704                        matched_indices.extend(indices);
2705                    }
2706                    if let Some(ref reference) = entry.header.reference {
2707                        let bare_ref = reference
2708                            .find(':')
2709                            .map(|i| &reference[i + 1..])
2710                            .unwrap_or(reference.as_str());
2711                        if let Some(indices) = doc_index.get(bare_ref) {
2712                            for &idx in indices {
2713                                if !matched_indices.contains(&idx) {
2714                                    matched_indices.push(idx);
2715                                }
2716                            }
2717                        }
2718                    }
2719                    // Apply matches to JE header
2720                    if !matched_indices.is_empty() {
2721                        for &idx in &matched_indices {
2722                            let event = &event_log.events[idx];
2723                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2724                                entry.header.ocpm_event_ids.push(event.event_id);
2725                            }
2726                            for obj_ref in &event.object_refs {
2727                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2728                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
2729                                }
2730                            }
2731                            if entry.header.ocpm_case_id.is_none() {
2732                                entry.header.ocpm_case_id = event.case_id;
2733                            }
2734                        }
2735                        annotated += 1;
2736                    }
2737                }
2738                debug!(
2739                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2740                    annotated
2741                );
2742            }
2743        }
2744
2745        // Phase 19: Sales Quotes, Management KPIs, Budgets
2746        let sales_kpi_budgets =
2747            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2748
2749        // Phase 22: Treasury Data Generation
2750        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2751        // are included in the pre-tax income used by phase_tax_generation.
2752        let treasury =
2753            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2754
2755        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2756        if !treasury.journal_entries.is_empty() {
2757            debug!(
2758                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2759                treasury.journal_entries.len()
2760            );
2761            entries.extend(treasury.journal_entries.iter().cloned());
2762        }
2763
2764        // Phase 20: Tax Generation
2765        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2766
2767        // Phase 20 JEs: Merge tax posting journal entries into main GL
2768        if !tax.tax_posting_journal_entries.is_empty() {
2769            debug!(
2770                "Merging {} tax posting JEs into GL",
2771                tax.tax_posting_journal_entries.len()
2772            );
2773            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2774        }
2775
2776        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2777        // Build supplementary cash flow items from upstream JE data (depreciation,
2778        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2779        {
2780            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2781
2782            let framework_str = {
2783                use datasynth_config::schema::AccountingFrameworkConfig;
2784                match self
2785                    .config
2786                    .accounting_standards
2787                    .framework
2788                    .unwrap_or_default()
2789                {
2790                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2791                        "IFRS"
2792                    }
2793                    _ => "US_GAAP",
2794                }
2795            };
2796
2797            // Sum depreciation debits (account 6000) from close JEs
2798            let depreciation_total: rust_decimal::Decimal = entries
2799                .iter()
2800                .filter(|je| je.header.document_type == "CL")
2801                .flat_map(|je| je.lines.iter())
2802                .filter(|l| l.gl_account.starts_with("6000"))
2803                .map(|l| l.debit_amount)
2804                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2805
2806            // Sum interest expense debits (account 7100)
2807            let interest_paid: rust_decimal::Decimal = entries
2808                .iter()
2809                .flat_map(|je| je.lines.iter())
2810                .filter(|l| l.gl_account.starts_with("7100"))
2811                .map(|l| l.debit_amount)
2812                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2813
2814            // Sum tax expense debits (account 8000)
2815            let tax_paid: rust_decimal::Decimal = entries
2816                .iter()
2817                .flat_map(|je| je.lines.iter())
2818                .filter(|l| l.gl_account.starts_with("8000"))
2819                .map(|l| l.debit_amount)
2820                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2821
2822            // Sum capex debits on fixed assets (account 1500)
2823            let capex: rust_decimal::Decimal = entries
2824                .iter()
2825                .flat_map(|je| je.lines.iter())
2826                .filter(|l| l.gl_account.starts_with("1500"))
2827                .map(|l| l.debit_amount)
2828                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2829
2830            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2831            let dividends_paid: rust_decimal::Decimal = entries
2832                .iter()
2833                .flat_map(|je| je.lines.iter())
2834                .filter(|l| l.gl_account == "2170")
2835                .map(|l| l.debit_amount)
2836                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2837
2838            let cf_data = CashFlowSourceData {
2839                depreciation_total,
2840                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2841                delta_ar: rust_decimal::Decimal::ZERO,
2842                delta_ap: rust_decimal::Decimal::ZERO,
2843                delta_inventory: rust_decimal::Decimal::ZERO,
2844                capex,
2845                debt_issuance: rust_decimal::Decimal::ZERO,
2846                debt_repayment: rust_decimal::Decimal::ZERO,
2847                interest_paid,
2848                tax_paid,
2849                dividends_paid,
2850                framework: framework_str.to_string(),
2851            };
2852
2853            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2854            if !enhanced_cf_items.is_empty() {
2855                // Merge into ALL cash flow statements (standalone + consolidated)
2856                use datasynth_core::models::StatementType;
2857                let merge_count = enhanced_cf_items.len();
2858                for stmt in financial_reporting
2859                    .financial_statements
2860                    .iter_mut()
2861                    .chain(financial_reporting.consolidated_statements.iter_mut())
2862                    .chain(
2863                        financial_reporting
2864                            .standalone_statements
2865                            .values_mut()
2866                            .flat_map(|v| v.iter_mut()),
2867                    )
2868                {
2869                    if stmt.statement_type == StatementType::CashFlowStatement {
2870                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2871                    }
2872                }
2873                info!(
2874                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2875                    merge_count
2876                );
2877            }
2878        }
2879
2880        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2881        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2882        self.generate_notes_to_financial_statements(
2883            &mut financial_reporting,
2884            &accounting_standards,
2885            &tax,
2886            &hr,
2887            &audit,
2888            &treasury,
2889        );
2890
2891        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2892        // When we have 2+ companies, derive segment data from actual journal entries
2893        // to complement or replace the FS-generator-based segments.
2894        if self.config.companies.len() >= 2 && !entries.is_empty() {
2895            let companies: Vec<(String, String)> = self
2896                .config
2897                .companies
2898                .iter()
2899                .map(|c| (c.code.clone(), c.name.clone()))
2900                .collect();
2901            let ic_elim: rust_decimal::Decimal =
2902                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2903            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2904                .unwrap_or(NaiveDate::MIN);
2905            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2906            let period_label = format!(
2907                "{}-{:02}",
2908                end_date.year(),
2909                (end_date - chrono::Days::new(1)).month()
2910            );
2911
2912            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2913            let (je_segments, je_recon) =
2914                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2915            if !je_segments.is_empty() {
2916                info!(
2917                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2918                    je_segments.len(),
2919                    ic_elim,
2920                );
2921                // Replace if existing segment_reports were empty; otherwise supplement
2922                if financial_reporting.segment_reports.is_empty() {
2923                    financial_reporting.segment_reports = je_segments;
2924                    financial_reporting.segment_reconciliations = vec![je_recon];
2925                } else {
2926                    financial_reporting.segment_reports.extend(je_segments);
2927                    financial_reporting.segment_reconciliations.push(je_recon);
2928                }
2929            }
2930        }
2931
2932        // Phase 21: ESG Data Generation
2933        let esg_snap =
2934            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2935
2936        // Phase 23: Project Accounting Data Generation
2937        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2938
2939        // Phase 24: Process Evolution + Organizational Events
2940        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2941
2942        // Phase 24b: Disruption Events
2943        let disruption_events = self.phase_disruption_events(&mut stats)?;
2944
2945        // Phase 27: Bi-Temporal Vendor Version Chains
2946        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2947
2948        // Phase 28: Entity Relationship Graph + Cross-Process Links
2949        let (entity_relationship_graph, cross_process_links) =
2950            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2951
2952        // Phase 29: Industry-specific GL accounts
2953        let industry_output = self.phase_industry_data(&mut stats);
2954
2955        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2956        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2957
2958        // Phase 19b: Hypergraph Export (after all data is available)
2959        self.phase_hypergraph_export(
2960            &coa,
2961            &entries,
2962            &document_flows,
2963            &sourcing,
2964            &hr,
2965            &manufacturing_snap,
2966            &banking,
2967            &audit,
2968            &financial_reporting,
2969            &ocpm,
2970            &compliance_regulations,
2971            &mut stats,
2972        )?;
2973
2974        // Phase 10c: Additional graph builders (approval, entity, banking)
2975        // These run after all data is available since they need banking/IC data.
2976        if self.phase_config.generate_graph_export {
2977            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2978        }
2979
2980        // Log informational messages for config sections not yet fully wired
2981        if self.config.streaming.enabled {
2982            info!("Note: streaming config is enabled but batch mode does not use it");
2983        }
2984        if self.config.vendor_network.enabled {
2985            debug!("Vendor network config available; relationship graph generation is partial");
2986        }
2987        if self.config.customer_segmentation.enabled {
2988            debug!("Customer segmentation config available; segment-aware generation is partial");
2989        }
2990
2991        // Log final resource statistics
2992        let resource_stats = self.resource_guard.stats();
2993        info!(
2994            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2995            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2996            resource_stats.disk.estimated_bytes_written,
2997            resource_stats.degradation_level
2998        );
2999
3000        // Flush any remaining stream sink data
3001        if let Some(ref sink) = self.phase_sink {
3002            if let Err(e) = sink.flush() {
3003                warn!("Stream sink flush failed: {e}");
3004            }
3005        }
3006
3007        // Build data lineage graph
3008        let lineage = self.build_lineage_graph();
3009
3010        // Evaluate quality gates if enabled in config
3011        let gate_result = if self.config.quality_gates.enabled {
3012            let profile_name = &self.config.quality_gates.profile;
3013            match datasynth_eval::gates::get_profile(profile_name) {
3014                Some(profile) => {
3015                    // Build an evaluation populated with actual generation metrics.
3016                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3017
3018                    // Populate balance sheet evaluation from balance validation results
3019                    if balance_validation.validated {
3020                        eval.coherence.balance =
3021                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3022                                equation_balanced: balance_validation.is_balanced,
3023                                max_imbalance: (balance_validation.total_debits
3024                                    - balance_validation.total_credits)
3025                                    .abs(),
3026                                periods_evaluated: 1,
3027                                periods_imbalanced: if balance_validation.is_balanced {
3028                                    0
3029                                } else {
3030                                    1
3031                                },
3032                                period_results: Vec::new(),
3033                                companies_evaluated: self.config.companies.len(),
3034                            });
3035                    }
3036
3037                    // Set coherence passes based on balance validation
3038                    eval.coherence.passes = balance_validation.is_balanced;
3039                    if !balance_validation.is_balanced {
3040                        eval.coherence
3041                            .failures
3042                            .push("Balance sheet equation not satisfied".to_string());
3043                    }
3044
3045                    // Set statistical score based on entry count (basic sanity)
3046                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3047                    eval.statistical.passes = !entries.is_empty();
3048
3049                    // Set quality score from data quality stats
3050                    eval.quality.overall_score = 0.9; // Default high for generated data
3051                    eval.quality.passes = true;
3052
3053                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3054                    info!(
3055                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3056                        profile_name, result.gates_passed, result.gates_total, result.summary
3057                    );
3058                    Some(result)
3059                }
3060                None => {
3061                    warn!(
3062                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3063                        profile_name
3064                    );
3065                    None
3066                }
3067            }
3068        } else {
3069            None
3070        };
3071
3072        // Generate internal controls if enabled
3073        let internal_controls = if self.config.internal_controls.enabled {
3074            InternalControl::standard_controls()
3075        } else {
3076            Vec::new()
3077        };
3078
3079        Ok(EnhancedGenerationResult {
3080            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3081            master_data: std::mem::take(&mut self.master_data),
3082            document_flows,
3083            subledger,
3084            ocpm,
3085            audit,
3086            banking,
3087            graph_export,
3088            sourcing,
3089            financial_reporting,
3090            hr,
3091            accounting_standards,
3092            manufacturing: manufacturing_snap,
3093            sales_kpi_budgets,
3094            tax,
3095            esg: esg_snap,
3096            treasury,
3097            project_accounting,
3098            process_evolution,
3099            organizational_events,
3100            disruption_events,
3101            intercompany,
3102            journal_entries: entries,
3103            anomaly_labels,
3104            balance_validation,
3105            data_quality_stats,
3106            quality_issues,
3107            statistics: stats,
3108            lineage: Some(lineage),
3109            gate_result,
3110            internal_controls,
3111            sod_violations,
3112            opening_balances,
3113            subledger_reconciliation,
3114            counterfactual_pairs,
3115            red_flags,
3116            collusion_rings,
3117            temporal_vendor_chains,
3118            entity_relationship_graph,
3119            cross_process_links,
3120            industry_output,
3121            compliance_regulations,
3122        })
3123    }
3124
3125    // ========================================================================
3126    // Generation Phase Methods
3127    // ========================================================================
3128
3129    /// Phase 1: Generate Chart of Accounts and update statistics.
3130    fn phase_chart_of_accounts(
3131        &mut self,
3132        stats: &mut EnhancedGenerationStatistics,
3133    ) -> SynthResult<Arc<ChartOfAccounts>> {
3134        info!("Phase 1: Generating Chart of Accounts");
3135        let coa = self.generate_coa()?;
3136        stats.accounts_count = coa.account_count();
3137        info!(
3138            "Chart of Accounts generated: {} accounts",
3139            stats.accounts_count
3140        );
3141        self.check_resources_with_log("post-coa")?;
3142        Ok(coa)
3143    }
3144
3145    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3146    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3147        if self.phase_config.generate_master_data {
3148            info!("Phase 2: Generating Master Data");
3149            self.generate_master_data()?;
3150            stats.vendor_count = self.master_data.vendors.len();
3151            stats.customer_count = self.master_data.customers.len();
3152            stats.material_count = self.master_data.materials.len();
3153            stats.asset_count = self.master_data.assets.len();
3154            stats.employee_count = self.master_data.employees.len();
3155            info!(
3156                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3157                stats.vendor_count, stats.customer_count, stats.material_count,
3158                stats.asset_count, stats.employee_count
3159            );
3160            self.check_resources_with_log("post-master-data")?;
3161        } else {
3162            debug!("Phase 2: Skipped (master data generation disabled)");
3163        }
3164        Ok(())
3165    }
3166
3167    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3168    fn phase_document_flows(
3169        &mut self,
3170        stats: &mut EnhancedGenerationStatistics,
3171    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3172        let mut document_flows = DocumentFlowSnapshot::default();
3173        let mut subledger = SubledgerSnapshot::default();
3174        // Dunning JEs (interest + charges) accumulated here and merged into the
3175        // main FA-JE list below so they appear in the GL.
3176        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3177
3178        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3179            info!("Phase 3: Generating Document Flows");
3180            self.generate_document_flows(&mut document_flows)?;
3181            stats.p2p_chain_count = document_flows.p2p_chains.len();
3182            stats.o2c_chain_count = document_flows.o2c_chains.len();
3183            info!(
3184                "Document flows generated: {} P2P chains, {} O2C chains",
3185                stats.p2p_chain_count, stats.o2c_chain_count
3186            );
3187
3188            // Phase 3b: Link document flows to subledgers (for data coherence)
3189            debug!("Phase 3b: Linking document flows to subledgers");
3190            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3191            stats.ap_invoice_count = subledger.ap_invoices.len();
3192            stats.ar_invoice_count = subledger.ar_invoices.len();
3193            debug!(
3194                "Subledgers linked: {} AP invoices, {} AR invoices",
3195                stats.ap_invoice_count, stats.ar_invoice_count
3196            );
3197
3198            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3199            // Without this step the subledger is systematically overstated because
3200            // amount_remaining is set at invoice creation and never reduced by
3201            // the payments that were generated in the document-flow phase.
3202            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3203            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3204            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3205            debug!("Payment settlements applied to AP and AR subledgers");
3206
3207            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3208            // The as-of date is the last day of the configured period.
3209            if let Ok(start_date) =
3210                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3211            {
3212                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3213                    - chrono::Days::new(1);
3214                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3215                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3216                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3217                // derived from JE-level aggregation and will typically differ. This is a known
3218                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3219                // generated independently. A future reconciliation phase should align them by
3220                // using subledger totals as the authoritative source for BS Receivables.
3221                for company in &self.config.companies {
3222                    let ar_report = ARAgingReport::from_invoices(
3223                        company.code.clone(),
3224                        &subledger.ar_invoices,
3225                        as_of_date,
3226                    );
3227                    subledger.ar_aging_reports.push(ar_report);
3228
3229                    let ap_report = APAgingReport::from_invoices(
3230                        company.code.clone(),
3231                        &subledger.ap_invoices,
3232                        as_of_date,
3233                    );
3234                    subledger.ap_aging_reports.push(ap_report);
3235                }
3236                debug!(
3237                    "AR/AP aging reports built: {} AR, {} AP",
3238                    subledger.ar_aging_reports.len(),
3239                    subledger.ap_aging_reports.len()
3240                );
3241
3242                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3243                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3244                {
3245                    use datasynth_generators::DunningGenerator;
3246                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3247                    for company in &self.config.companies {
3248                        let currency = company.currency.as_str();
3249                        // Collect mutable references to AR invoices for this company
3250                        // (dunning generator updates dunning_info on invoices in-place).
3251                        let mut company_invoices: Vec<
3252                            datasynth_core::models::subledger::ar::ARInvoice,
3253                        > = subledger
3254                            .ar_invoices
3255                            .iter()
3256                            .filter(|inv| inv.company_code == company.code)
3257                            .cloned()
3258                            .collect();
3259
3260                        if company_invoices.is_empty() {
3261                            continue;
3262                        }
3263
3264                        let result = dunning_gen.execute_dunning_run(
3265                            &company.code,
3266                            as_of_date,
3267                            &mut company_invoices,
3268                            currency,
3269                        );
3270
3271                        // Write back updated dunning info to the main AR invoice list
3272                        for updated in &company_invoices {
3273                            if let Some(orig) = subledger
3274                                .ar_invoices
3275                                .iter_mut()
3276                                .find(|i| i.invoice_number == updated.invoice_number)
3277                            {
3278                                orig.dunning_info = updated.dunning_info.clone();
3279                            }
3280                        }
3281
3282                        subledger.dunning_runs.push(result.dunning_run);
3283                        subledger.dunning_letters.extend(result.letters);
3284                        // Dunning JEs (interest + charges) collected into local buffer.
3285                        dunning_journal_entries.extend(result.journal_entries);
3286                    }
3287                    debug!(
3288                        "Dunning runs complete: {} runs, {} letters",
3289                        subledger.dunning_runs.len(),
3290                        subledger.dunning_letters.len()
3291                    );
3292                }
3293            }
3294
3295            self.check_resources_with_log("post-document-flows")?;
3296        } else {
3297            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3298        }
3299
3300        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3301        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3302        if !self.master_data.assets.is_empty() {
3303            debug!("Generating FA subledger records");
3304            let company_code = self
3305                .config
3306                .companies
3307                .first()
3308                .map(|c| c.code.as_str())
3309                .unwrap_or("1000");
3310            let currency = self
3311                .config
3312                .companies
3313                .first()
3314                .map(|c| c.currency.as_str())
3315                .unwrap_or("USD");
3316
3317            let mut fa_gen = datasynth_generators::FAGenerator::new(
3318                datasynth_generators::FAGeneratorConfig::default(),
3319                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3320            );
3321
3322            for asset in &self.master_data.assets {
3323                let (record, je) = fa_gen.generate_asset_acquisition(
3324                    company_code,
3325                    &format!("{:?}", asset.asset_class),
3326                    &asset.description,
3327                    asset.acquisition_date,
3328                    currency,
3329                    asset.cost_center.as_deref(),
3330                );
3331                subledger.fa_records.push(record);
3332                fa_journal_entries.push(je);
3333            }
3334
3335            stats.fa_subledger_count = subledger.fa_records.len();
3336            debug!(
3337                "FA subledger records generated: {} (with {} acquisition JEs)",
3338                stats.fa_subledger_count,
3339                fa_journal_entries.len()
3340            );
3341        }
3342
3343        // Generate Inventory subledger records from master data materials
3344        if !self.master_data.materials.is_empty() {
3345            debug!("Generating Inventory subledger records");
3346            let first_company = self.config.companies.first();
3347            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3348            let inv_currency = first_company
3349                .map(|c| c.currency.clone())
3350                .unwrap_or_else(|| "USD".to_string());
3351
3352            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3353                datasynth_generators::InventoryGeneratorConfig::default(),
3354                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3355                inv_currency.clone(),
3356            );
3357
3358            for (i, material) in self.master_data.materials.iter().enumerate() {
3359                let plant = format!("PLANT{:02}", (i % 3) + 1);
3360                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3361                let initial_qty = rust_decimal::Decimal::from(
3362                    material
3363                        .safety_stock
3364                        .to_string()
3365                        .parse::<i64>()
3366                        .unwrap_or(100),
3367                );
3368
3369                let position = inv_gen.generate_position(
3370                    company_code,
3371                    &plant,
3372                    &storage_loc,
3373                    &material.material_id,
3374                    &material.description,
3375                    initial_qty,
3376                    Some(material.standard_cost),
3377                    &inv_currency,
3378                );
3379                subledger.inventory_positions.push(position);
3380            }
3381
3382            stats.inventory_subledger_count = subledger.inventory_positions.len();
3383            debug!(
3384                "Inventory subledger records generated: {}",
3385                stats.inventory_subledger_count
3386            );
3387        }
3388
3389        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3390        if !subledger.fa_records.is_empty() {
3391            if let Ok(start_date) =
3392                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3393            {
3394                let company_code = self
3395                    .config
3396                    .companies
3397                    .first()
3398                    .map(|c| c.code.as_str())
3399                    .unwrap_or("1000");
3400                let fiscal_year = start_date.year();
3401                let start_period = start_date.month();
3402                let end_period =
3403                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3404
3405                let depr_cfg = FaDepreciationScheduleConfig {
3406                    fiscal_year,
3407                    start_period,
3408                    end_period,
3409                    seed_offset: 800,
3410                };
3411                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3412                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3413                let run_count = runs.len();
3414                subledger.depreciation_runs = runs;
3415                debug!(
3416                    "Depreciation runs generated: {} runs for {} periods",
3417                    run_count, self.config.global.period_months
3418                );
3419            }
3420        }
3421
3422        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3423        if !subledger.inventory_positions.is_empty() {
3424            if let Ok(start_date) =
3425                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3426            {
3427                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3428                    - chrono::Days::new(1);
3429
3430                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3431                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3432
3433                for company in &self.config.companies {
3434                    let result = inv_val_gen.generate(
3435                        &company.code,
3436                        &subledger.inventory_positions,
3437                        as_of_date,
3438                    );
3439                    subledger.inventory_valuations.push(result);
3440                }
3441                debug!(
3442                    "Inventory valuations generated: {} company reports",
3443                    subledger.inventory_valuations.len()
3444                );
3445            }
3446        }
3447
3448        Ok((document_flows, subledger, fa_journal_entries))
3449    }
3450
3451    /// Phase 3c: Generate OCPM events from document flows.
3452    #[allow(clippy::too_many_arguments)]
3453    fn phase_ocpm_events(
3454        &mut self,
3455        document_flows: &DocumentFlowSnapshot,
3456        sourcing: &SourcingSnapshot,
3457        hr: &HrSnapshot,
3458        manufacturing: &ManufacturingSnapshot,
3459        banking: &BankingSnapshot,
3460        audit: &AuditSnapshot,
3461        financial_reporting: &FinancialReportingSnapshot,
3462        stats: &mut EnhancedGenerationStatistics,
3463    ) -> SynthResult<OcpmSnapshot> {
3464        let degradation = self.check_resources()?;
3465        if degradation >= DegradationLevel::Reduced {
3466            debug!(
3467                "Phase skipped due to resource pressure (degradation: {:?})",
3468                degradation
3469            );
3470            return Ok(OcpmSnapshot::default());
3471        }
3472        if self.phase_config.generate_ocpm_events {
3473            info!("Phase 3c: Generating OCPM Events");
3474            let ocpm_snapshot = self.generate_ocpm_events(
3475                document_flows,
3476                sourcing,
3477                hr,
3478                manufacturing,
3479                banking,
3480                audit,
3481                financial_reporting,
3482            )?;
3483            stats.ocpm_event_count = ocpm_snapshot.event_count;
3484            stats.ocpm_object_count = ocpm_snapshot.object_count;
3485            stats.ocpm_case_count = ocpm_snapshot.case_count;
3486            info!(
3487                "OCPM events generated: {} events, {} objects, {} cases",
3488                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3489            );
3490            self.check_resources_with_log("post-ocpm")?;
3491            Ok(ocpm_snapshot)
3492        } else {
3493            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3494            Ok(OcpmSnapshot::default())
3495        }
3496    }
3497
3498    /// Phase 4: Generate journal entries from document flows and standalone generation.
3499    fn phase_journal_entries(
3500        &mut self,
3501        coa: &Arc<ChartOfAccounts>,
3502        document_flows: &DocumentFlowSnapshot,
3503        _stats: &mut EnhancedGenerationStatistics,
3504    ) -> SynthResult<Vec<JournalEntry>> {
3505        let mut entries = Vec::new();
3506
3507        // Phase 4a: Generate JEs from document flows (for data coherence)
3508        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3509            debug!("Phase 4a: Generating JEs from document flows");
3510            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3511            debug!("Generated {} JEs from document flows", flow_entries.len());
3512            entries.extend(flow_entries);
3513        }
3514
3515        // Phase 4b: Generate standalone journal entries
3516        if self.phase_config.generate_journal_entries {
3517            info!("Phase 4: Generating Journal Entries");
3518            let je_entries = self.generate_journal_entries(coa)?;
3519            info!("Generated {} standalone journal entries", je_entries.len());
3520            entries.extend(je_entries);
3521        } else {
3522            debug!("Phase 4: Skipped (journal entry generation disabled)");
3523        }
3524
3525        if !entries.is_empty() {
3526            // Note: stats.total_entries/total_line_items are set in generate()
3527            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3528            self.check_resources_with_log("post-journal-entries")?;
3529        }
3530
3531        Ok(entries)
3532    }
3533
3534    /// Phase 5: Inject anomalies into journal entries.
3535    fn phase_anomaly_injection(
3536        &mut self,
3537        entries: &mut [JournalEntry],
3538        actions: &DegradationActions,
3539        stats: &mut EnhancedGenerationStatistics,
3540    ) -> SynthResult<AnomalyLabels> {
3541        if self.phase_config.inject_anomalies
3542            && !entries.is_empty()
3543            && !actions.skip_anomaly_injection
3544        {
3545            info!("Phase 5: Injecting Anomalies");
3546            let result = self.inject_anomalies(entries)?;
3547            stats.anomalies_injected = result.labels.len();
3548            info!("Injected {} anomalies", stats.anomalies_injected);
3549            self.check_resources_with_log("post-anomaly-injection")?;
3550            Ok(result)
3551        } else if actions.skip_anomaly_injection {
3552            warn!("Phase 5: Skipped due to resource degradation");
3553            Ok(AnomalyLabels::default())
3554        } else {
3555            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3556            Ok(AnomalyLabels::default())
3557        }
3558    }
3559
3560    /// Phase 6: Validate balance sheet equation on journal entries.
3561    fn phase_balance_validation(
3562        &mut self,
3563        entries: &[JournalEntry],
3564    ) -> SynthResult<BalanceValidationResult> {
3565        if self.phase_config.validate_balances && !entries.is_empty() {
3566            debug!("Phase 6: Validating Balances");
3567            let balance_validation = self.validate_journal_entries(entries)?;
3568            if balance_validation.is_balanced {
3569                debug!("Balance validation passed");
3570            } else {
3571                warn!(
3572                    "Balance validation found {} errors",
3573                    balance_validation.validation_errors.len()
3574                );
3575            }
3576            Ok(balance_validation)
3577        } else {
3578            Ok(BalanceValidationResult::default())
3579        }
3580    }
3581
3582    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3583    fn phase_data_quality_injection(
3584        &mut self,
3585        entries: &mut [JournalEntry],
3586        actions: &DegradationActions,
3587        stats: &mut EnhancedGenerationStatistics,
3588    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3589        if self.phase_config.inject_data_quality
3590            && !entries.is_empty()
3591            && !actions.skip_data_quality
3592        {
3593            info!("Phase 7: Injecting Data Quality Variations");
3594            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3595            stats.data_quality_issues = dq_stats.records_with_issues;
3596            info!("Injected {} data quality issues", stats.data_quality_issues);
3597            self.check_resources_with_log("post-data-quality")?;
3598            Ok((dq_stats, quality_issues))
3599        } else if actions.skip_data_quality {
3600            warn!("Phase 7: Skipped due to resource degradation");
3601            Ok((DataQualityStats::default(), Vec::new()))
3602        } else {
3603            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3604            Ok((DataQualityStats::default(), Vec::new()))
3605        }
3606    }
3607
3608    /// Phase 10b: Generate period-close journal entries.
3609    ///
3610    /// Generates:
3611    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3612    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3613    ///    for the configured period.
3614    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3615    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3616    ///    earnings via the Income Summary (3600) clearing account.
3617    fn phase_period_close(
3618        &mut self,
3619        entries: &mut Vec<JournalEntry>,
3620        subledger: &SubledgerSnapshot,
3621        stats: &mut EnhancedGenerationStatistics,
3622    ) -> SynthResult<()> {
3623        if !self.phase_config.generate_period_close || entries.is_empty() {
3624            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3625            return Ok(());
3626        }
3627
3628        info!("Phase 10b: Generating period-close journal entries");
3629
3630        use datasynth_core::accounts::{
3631            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3632        };
3633        use rust_decimal::Decimal;
3634
3635        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3636            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3637        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3638        // Posting date for close entries is the last day of the period
3639        let close_date = end_date - chrono::Days::new(1);
3640
3641        // Statutory tax rate (21% — configurable rates come in later tiers)
3642        let tax_rate = Decimal::new(21, 2); // 0.21
3643
3644        // Collect company codes from config
3645        let company_codes: Vec<String> = self
3646            .config
3647            .companies
3648            .iter()
3649            .map(|c| c.code.clone())
3650            .collect();
3651
3652        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3653        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3654        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3655
3656        // --- Depreciation JEs (per asset) ---
3657        // Compute period depreciation for each active fixed asset using straight-line method.
3658        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3659        let period_months = self.config.global.period_months;
3660        for asset in &subledger.fa_records {
3661            // Skip assets that are inactive / fully depreciated / non-depreciable
3662            use datasynth_core::models::subledger::fa::AssetStatus;
3663            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3664                continue;
3665            }
3666            let useful_life_months = asset.useful_life_months();
3667            if useful_life_months == 0 {
3668                // Land or CIP — not depreciated
3669                continue;
3670            }
3671            let salvage_value = asset.salvage_value();
3672            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3673            if depreciable_base == Decimal::ZERO {
3674                continue;
3675            }
3676            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3677                * Decimal::from(period_months))
3678            .round_dp(2);
3679            if period_depr <= Decimal::ZERO {
3680                continue;
3681            }
3682
3683            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3684            depr_header.document_type = "CL".to_string();
3685            depr_header.header_text = Some(format!(
3686                "Depreciation - {} {}",
3687                asset.asset_number, asset.description
3688            ));
3689            depr_header.created_by = "CLOSE_ENGINE".to_string();
3690            depr_header.source = TransactionSource::Automated;
3691            depr_header.business_process = Some(BusinessProcess::R2R);
3692
3693            let doc_id = depr_header.document_id;
3694            let mut depr_je = JournalEntry::new(depr_header);
3695
3696            // DR Depreciation Expense (6000)
3697            depr_je.add_line(JournalEntryLine::debit(
3698                doc_id,
3699                1,
3700                expense_accounts::DEPRECIATION.to_string(),
3701                period_depr,
3702            ));
3703            // CR Accumulated Depreciation (1510)
3704            depr_je.add_line(JournalEntryLine::credit(
3705                doc_id,
3706                2,
3707                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3708                period_depr,
3709            ));
3710
3711            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3712            close_jes.push(depr_je);
3713        }
3714
3715        if !subledger.fa_records.is_empty() {
3716            debug!(
3717                "Generated {} depreciation JEs from {} FA records",
3718                close_jes.len(),
3719                subledger.fa_records.len()
3720            );
3721        }
3722
3723        // --- Accrual entries (standard period-end accruals per company) ---
3724        // Generate standard accrued expense entries (utilities, rent, interest) using
3725        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3726        {
3727            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3728            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3729
3730            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3731            let accrual_items: &[(&str, &str, &str)] = &[
3732                ("Accrued Utilities", "6200", "2100"),
3733                ("Accrued Rent", "6300", "2100"),
3734                ("Accrued Interest", "6100", "2150"),
3735            ];
3736
3737            for company_code in &company_codes {
3738                // Estimate company revenue from existing JEs
3739                let company_revenue: Decimal = entries
3740                    .iter()
3741                    .filter(|e| e.header.company_code == *company_code)
3742                    .flat_map(|e| e.lines.iter())
3743                    .filter(|l| l.gl_account.starts_with('4'))
3744                    .map(|l| l.credit_amount - l.debit_amount)
3745                    .fold(Decimal::ZERO, |acc, v| acc + v);
3746
3747                if company_revenue <= Decimal::ZERO {
3748                    continue;
3749                }
3750
3751                // Use 0.5% of period revenue per accrual item as a proxy
3752                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3753                if accrual_base <= Decimal::ZERO {
3754                    continue;
3755                }
3756
3757                for (description, expense_acct, liability_acct) in accrual_items {
3758                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3759                        company_code,
3760                        description,
3761                        accrual_base,
3762                        expense_acct,
3763                        liability_acct,
3764                        close_date,
3765                        None,
3766                    );
3767                    close_jes.push(accrual_je);
3768                    if let Some(rev_je) = reversal_je {
3769                        close_jes.push(rev_je);
3770                    }
3771                }
3772            }
3773
3774            debug!(
3775                "Generated accrual entries for {} companies",
3776                company_codes.len()
3777            );
3778        }
3779
3780        for company_code in &company_codes {
3781            // Calculate net income for this company from existing JEs:
3782            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3783            // Revenue (4xxx): credit-normal, so net = credits - debits
3784            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3785            let mut total_revenue = Decimal::ZERO;
3786            let mut total_expenses = Decimal::ZERO;
3787
3788            for entry in entries.iter() {
3789                if entry.header.company_code != *company_code {
3790                    continue;
3791                }
3792                for line in &entry.lines {
3793                    let category = AccountCategory::from_account(&line.gl_account);
3794                    match category {
3795                        AccountCategory::Revenue => {
3796                            // Revenue is credit-normal: net revenue = credits - debits
3797                            total_revenue += line.credit_amount - line.debit_amount;
3798                        }
3799                        AccountCategory::Cogs
3800                        | AccountCategory::OperatingExpense
3801                        | AccountCategory::OtherIncomeExpense
3802                        | AccountCategory::Tax => {
3803                            // Expenses are debit-normal: net expense = debits - credits
3804                            total_expenses += line.debit_amount - line.credit_amount;
3805                        }
3806                        _ => {}
3807                    }
3808                }
3809            }
3810
3811            let pre_tax_income = total_revenue - total_expenses;
3812
3813            // Skip if no income statement activity
3814            if pre_tax_income == Decimal::ZERO {
3815                debug!(
3816                    "Company {}: no pre-tax income, skipping period close",
3817                    company_code
3818                );
3819                continue;
3820            }
3821
3822            // --- Tax provision / DTA JE ---
3823            if pre_tax_income > Decimal::ZERO {
3824                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3825                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3826
3827                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3828                tax_header.document_type = "CL".to_string();
3829                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3830                tax_header.created_by = "CLOSE_ENGINE".to_string();
3831                tax_header.source = TransactionSource::Automated;
3832                tax_header.business_process = Some(BusinessProcess::R2R);
3833
3834                let doc_id = tax_header.document_id;
3835                let mut tax_je = JournalEntry::new(tax_header);
3836
3837                // DR Tax Expense (8000)
3838                tax_je.add_line(JournalEntryLine::debit(
3839                    doc_id,
3840                    1,
3841                    tax_accounts::TAX_EXPENSE.to_string(),
3842                    tax_amount,
3843                ));
3844                // CR Income Tax Payable (2130)
3845                tax_je.add_line(JournalEntryLine::credit(
3846                    doc_id,
3847                    2,
3848                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3849                    tax_amount,
3850                ));
3851
3852                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3853                close_jes.push(tax_je);
3854            } else {
3855                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3856                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3857                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3858                if dta_amount > Decimal::ZERO {
3859                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3860                    dta_header.document_type = "CL".to_string();
3861                    dta_header.header_text =
3862                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3863                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3864                    dta_header.source = TransactionSource::Automated;
3865                    dta_header.business_process = Some(BusinessProcess::R2R);
3866
3867                    let doc_id = dta_header.document_id;
3868                    let mut dta_je = JournalEntry::new(dta_header);
3869
3870                    // DR Deferred Tax Asset (1600)
3871                    dta_je.add_line(JournalEntryLine::debit(
3872                        doc_id,
3873                        1,
3874                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3875                        dta_amount,
3876                    ));
3877                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3878                    // reflecting the benefit of the future deductible temporary difference.
3879                    dta_je.add_line(JournalEntryLine::credit(
3880                        doc_id,
3881                        2,
3882                        tax_accounts::TAX_EXPENSE.to_string(),
3883                        dta_amount,
3884                    ));
3885
3886                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3887                    close_jes.push(dta_je);
3888                    debug!(
3889                        "Company {}: loss year — recognised DTA of {}",
3890                        company_code, dta_amount
3891                    );
3892                }
3893            }
3894
3895            // --- Dividend JEs (v2.4) ---
3896            // If the entity is profitable after tax, declare a 10% dividend payout.
3897            // This runs AFTER tax provision so the dividend is based on post-tax income
3898            // but BEFORE the retained earnings close so the RE transfer reflects the
3899            // reduced balance.
3900            let tax_provision = if pre_tax_income > Decimal::ZERO {
3901                (pre_tax_income * tax_rate).round_dp(2)
3902            } else {
3903                Decimal::ZERO
3904            };
3905            let net_income = pre_tax_income - tax_provision;
3906
3907            if net_income > Decimal::ZERO {
3908                use datasynth_generators::DividendGenerator;
3909                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3910                let mut div_gen = DividendGenerator::new(self.seed + 460);
3911                let currency_str = self
3912                    .config
3913                    .companies
3914                    .iter()
3915                    .find(|c| c.code == *company_code)
3916                    .map(|c| c.currency.as_str())
3917                    .unwrap_or("USD");
3918                let div_result = div_gen.generate(
3919                    company_code,
3920                    close_date,
3921                    Decimal::new(1, 0), // $1 per share placeholder
3922                    dividend_amount,
3923                    currency_str,
3924                );
3925                let div_je_count = div_result.journal_entries.len();
3926                close_jes.extend(div_result.journal_entries);
3927                debug!(
3928                    "Company {}: declared dividend of {} ({} JEs)",
3929                    company_code, dividend_amount, div_je_count
3930                );
3931            }
3932
3933            // --- Income statement closing JE ---
3934            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3935            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3936            // close the pre-tax loss into Retained Earnings as-is.
3937            if net_income != Decimal::ZERO {
3938                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3939                close_header.document_type = "CL".to_string();
3940                close_header.header_text =
3941                    Some(format!("Income statement close - {}", company_code));
3942                close_header.created_by = "CLOSE_ENGINE".to_string();
3943                close_header.source = TransactionSource::Automated;
3944                close_header.business_process = Some(BusinessProcess::R2R);
3945
3946                let doc_id = close_header.document_id;
3947                let mut close_je = JournalEntry::new(close_header);
3948
3949                let abs_net_income = net_income.abs();
3950
3951                if net_income > Decimal::ZERO {
3952                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3953                    close_je.add_line(JournalEntryLine::debit(
3954                        doc_id,
3955                        1,
3956                        equity_accounts::INCOME_SUMMARY.to_string(),
3957                        abs_net_income,
3958                    ));
3959                    close_je.add_line(JournalEntryLine::credit(
3960                        doc_id,
3961                        2,
3962                        equity_accounts::RETAINED_EARNINGS.to_string(),
3963                        abs_net_income,
3964                    ));
3965                } else {
3966                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3967                    close_je.add_line(JournalEntryLine::debit(
3968                        doc_id,
3969                        1,
3970                        equity_accounts::RETAINED_EARNINGS.to_string(),
3971                        abs_net_income,
3972                    ));
3973                    close_je.add_line(JournalEntryLine::credit(
3974                        doc_id,
3975                        2,
3976                        equity_accounts::INCOME_SUMMARY.to_string(),
3977                        abs_net_income,
3978                    ));
3979                }
3980
3981                debug_assert!(
3982                    close_je.is_balanced(),
3983                    "Income statement closing JE must be balanced"
3984                );
3985                close_jes.push(close_je);
3986            }
3987        }
3988
3989        let close_count = close_jes.len();
3990        if close_count > 0 {
3991            info!("Generated {} period-close journal entries", close_count);
3992            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3993            entries.extend(close_jes);
3994            stats.period_close_je_count = close_count;
3995
3996            // Update total entry/line-item stats
3997            stats.total_entries = entries.len() as u64;
3998            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3999        } else {
4000            debug!("No period-close entries generated (no income statement activity)");
4001        }
4002
4003        Ok(())
4004    }
4005
4006    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4007    fn phase_audit_data(
4008        &mut self,
4009        entries: &[JournalEntry],
4010        stats: &mut EnhancedGenerationStatistics,
4011    ) -> SynthResult<AuditSnapshot> {
4012        if self.phase_config.generate_audit {
4013            info!("Phase 8: Generating Audit Data");
4014            let audit_snapshot = self.generate_audit_data(entries)?;
4015            stats.audit_engagement_count = audit_snapshot.engagements.len();
4016            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4017            stats.audit_evidence_count = audit_snapshot.evidence.len();
4018            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4019            stats.audit_finding_count = audit_snapshot.findings.len();
4020            stats.audit_judgment_count = audit_snapshot.judgments.len();
4021            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4022            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4023            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4024            stats.audit_sample_count = audit_snapshot.samples.len();
4025            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4026            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4027            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4028            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4029            stats.audit_related_party_transaction_count =
4030                audit_snapshot.related_party_transactions.len();
4031            info!(
4032                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4033                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4034                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4035                 {} RP transactions",
4036                stats.audit_engagement_count,
4037                stats.audit_workpaper_count,
4038                stats.audit_evidence_count,
4039                stats.audit_risk_count,
4040                stats.audit_finding_count,
4041                stats.audit_judgment_count,
4042                stats.audit_confirmation_count,
4043                stats.audit_procedure_step_count,
4044                stats.audit_sample_count,
4045                stats.audit_analytical_result_count,
4046                stats.audit_ia_function_count,
4047                stats.audit_ia_report_count,
4048                stats.audit_related_party_count,
4049                stats.audit_related_party_transaction_count,
4050            );
4051            self.check_resources_with_log("post-audit")?;
4052            Ok(audit_snapshot)
4053        } else {
4054            debug!("Phase 8: Skipped (audit generation disabled)");
4055            Ok(AuditSnapshot::default())
4056        }
4057    }
4058
4059    /// Phase 9: Generate banking KYC/AML data.
4060    fn phase_banking_data(
4061        &mut self,
4062        stats: &mut EnhancedGenerationStatistics,
4063    ) -> SynthResult<BankingSnapshot> {
4064        if self.phase_config.generate_banking {
4065            info!("Phase 9: Generating Banking KYC/AML Data");
4066            let banking_snapshot = self.generate_banking_data()?;
4067            stats.banking_customer_count = banking_snapshot.customers.len();
4068            stats.banking_account_count = banking_snapshot.accounts.len();
4069            stats.banking_transaction_count = banking_snapshot.transactions.len();
4070            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4071            info!(
4072                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4073                stats.banking_customer_count, stats.banking_account_count,
4074                stats.banking_transaction_count, stats.banking_suspicious_count
4075            );
4076            self.check_resources_with_log("post-banking")?;
4077            Ok(banking_snapshot)
4078        } else {
4079            debug!("Phase 9: Skipped (banking generation disabled)");
4080            Ok(BankingSnapshot::default())
4081        }
4082    }
4083
4084    /// Phase 10: Export accounting network graphs for ML training.
4085    fn phase_graph_export(
4086        &mut self,
4087        entries: &[JournalEntry],
4088        coa: &Arc<ChartOfAccounts>,
4089        stats: &mut EnhancedGenerationStatistics,
4090    ) -> SynthResult<GraphExportSnapshot> {
4091        if self.phase_config.generate_graph_export && !entries.is_empty() {
4092            info!("Phase 10: Exporting Accounting Network Graphs");
4093            match self.export_graphs(entries, coa, stats) {
4094                Ok(snapshot) => {
4095                    info!(
4096                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4097                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4098                    );
4099                    Ok(snapshot)
4100                }
4101                Err(e) => {
4102                    warn!("Phase 10: Graph export failed: {}", e);
4103                    Ok(GraphExportSnapshot::default())
4104                }
4105            }
4106        } else {
4107            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4108            Ok(GraphExportSnapshot::default())
4109        }
4110    }
4111
4112    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4113    #[allow(clippy::too_many_arguments)]
4114    fn phase_hypergraph_export(
4115        &self,
4116        coa: &Arc<ChartOfAccounts>,
4117        entries: &[JournalEntry],
4118        document_flows: &DocumentFlowSnapshot,
4119        sourcing: &SourcingSnapshot,
4120        hr: &HrSnapshot,
4121        manufacturing: &ManufacturingSnapshot,
4122        banking: &BankingSnapshot,
4123        audit: &AuditSnapshot,
4124        financial_reporting: &FinancialReportingSnapshot,
4125        ocpm: &OcpmSnapshot,
4126        compliance: &ComplianceRegulationsSnapshot,
4127        stats: &mut EnhancedGenerationStatistics,
4128    ) -> SynthResult<()> {
4129        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4130            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4131            match self.export_hypergraph(
4132                coa,
4133                entries,
4134                document_flows,
4135                sourcing,
4136                hr,
4137                manufacturing,
4138                banking,
4139                audit,
4140                financial_reporting,
4141                ocpm,
4142                compliance,
4143                stats,
4144            ) {
4145                Ok(info) => {
4146                    info!(
4147                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4148                        info.node_count, info.edge_count, info.hyperedge_count
4149                    );
4150                }
4151                Err(e) => {
4152                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4153                }
4154            }
4155        } else {
4156            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4157        }
4158        Ok(())
4159    }
4160
4161    /// Phase 11: LLM Enrichment.
4162    ///
4163    /// Uses an LLM provider (mock by default) to enrich vendor names with
4164    /// realistic, context-aware names. This phase is non-blocking: failures
4165    /// log a warning but do not stop the generation pipeline.
4166    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4167        if !self.config.llm.enabled {
4168            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4169            return;
4170        }
4171
4172        info!("Phase 11: Starting LLM Enrichment");
4173        let start = std::time::Instant::now();
4174
4175        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4176            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4177            // and the corresponding API key environment variable is present.
4178            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4179                let schema_provider = &self.config.llm.provider;
4180                let api_key_env = match schema_provider.as_str() {
4181                    "openai" => Some("OPENAI_API_KEY"),
4182                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4183                    "custom" => Some("LLM_API_KEY"),
4184                    _ => None,
4185                };
4186                if let Some(key_env) = api_key_env {
4187                    if std::env::var(key_env).is_ok() {
4188                        let llm_config = datasynth_core::llm::LlmConfig {
4189                            model: self.config.llm.model.clone(),
4190                            api_key_env: key_env.to_string(),
4191                            ..datasynth_core::llm::LlmConfig::default()
4192                        };
4193                        match HttpLlmProvider::new(llm_config) {
4194                            Ok(p) => Arc::new(p),
4195                            Err(e) => {
4196                                warn!(
4197                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4198                                    e
4199                                );
4200                                Arc::new(MockLlmProvider::new(self.seed))
4201                            }
4202                        }
4203                    } else {
4204                        Arc::new(MockLlmProvider::new(self.seed))
4205                    }
4206                } else {
4207                    Arc::new(MockLlmProvider::new(self.seed))
4208                }
4209            };
4210            let enricher = VendorLlmEnricher::new(provider);
4211
4212            let industry = format!("{:?}", self.config.global.industry);
4213            let max_enrichments = self
4214                .config
4215                .llm
4216                .max_vendor_enrichments
4217                .min(self.master_data.vendors.len());
4218
4219            let mut enriched_count = 0usize;
4220            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4221                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4222                    Ok(name) => {
4223                        vendor.name = name;
4224                        enriched_count += 1;
4225                    }
4226                    Err(e) => {
4227                        warn!(
4228                            "LLM vendor enrichment failed for {}: {}",
4229                            vendor.vendor_id, e
4230                        );
4231                    }
4232                }
4233            }
4234
4235            enriched_count
4236        }));
4237
4238        match result {
4239            Ok(enriched_count) => {
4240                stats.llm_vendors_enriched = enriched_count;
4241                let elapsed = start.elapsed();
4242                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4243                info!(
4244                    "Phase 11 complete: {} vendors enriched in {}ms",
4245                    enriched_count, stats.llm_enrichment_ms
4246                );
4247            }
4248            Err(_) => {
4249                let elapsed = start.elapsed();
4250                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4251                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4252            }
4253        }
4254    }
4255
4256    /// Phase 12: Diffusion Enhancement.
4257    ///
4258    /// Generates a sample set using the statistical diffusion backend to
4259    /// demonstrate distribution-matching data generation. This phase is
4260    /// non-blocking: failures log a warning but do not stop the pipeline.
4261    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4262        if !self.config.diffusion.enabled {
4263            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4264            return;
4265        }
4266
4267        info!("Phase 12: Starting Diffusion Enhancement");
4268        let start = std::time::Instant::now();
4269
4270        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4271            // Target distribution: transaction amounts (log-normal-like)
4272            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4273            let stds = vec![2000.0, 1.5, 1.0];
4274
4275            let diffusion_config = DiffusionConfig {
4276                n_steps: self.config.diffusion.n_steps,
4277                seed: self.seed,
4278                ..Default::default()
4279            };
4280
4281            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4282
4283            let n_samples = self.config.diffusion.sample_size;
4284            let n_features = 3; // amount, line_items, approval_level
4285            let samples = backend.generate(n_samples, n_features, self.seed);
4286
4287            samples.len()
4288        }));
4289
4290        match result {
4291            Ok(sample_count) => {
4292                stats.diffusion_samples_generated = sample_count;
4293                let elapsed = start.elapsed();
4294                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4295                info!(
4296                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4297                    sample_count, stats.diffusion_enhancement_ms
4298                );
4299            }
4300            Err(_) => {
4301                let elapsed = start.elapsed();
4302                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4303                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4304            }
4305        }
4306    }
4307
4308    /// Phase 13: Causal Overlay.
4309    ///
4310    /// Builds a structural causal model from a built-in template (e.g.,
4311    /// fraud_detection) and generates causal samples. Optionally validates
4312    /// that the output respects the causal structure. This phase is
4313    /// non-blocking: failures log a warning but do not stop the pipeline.
4314    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4315        if !self.config.causal.enabled {
4316            debug!("Phase 13: Skipped (causal generation disabled)");
4317            return;
4318        }
4319
4320        info!("Phase 13: Starting Causal Overlay");
4321        let start = std::time::Instant::now();
4322
4323        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4324            // Select template based on config
4325            let graph = match self.config.causal.template.as_str() {
4326                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4327                _ => CausalGraph::fraud_detection_template(),
4328            };
4329
4330            let scm = StructuralCausalModel::new(graph.clone())
4331                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4332
4333            let n_samples = self.config.causal.sample_size;
4334            let samples = scm
4335                .generate(n_samples, self.seed)
4336                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4337
4338            // Optionally validate causal structure
4339            let validation_passed = if self.config.causal.validate {
4340                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4341                if report.valid {
4342                    info!(
4343                        "Causal validation passed: all {} checks OK",
4344                        report.checks.len()
4345                    );
4346                } else {
4347                    warn!(
4348                        "Causal validation: {} violations detected: {:?}",
4349                        report.violations.len(),
4350                        report.violations
4351                    );
4352                }
4353                Some(report.valid)
4354            } else {
4355                None
4356            };
4357
4358            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4359        }));
4360
4361        match result {
4362            Ok(Ok((sample_count, validation_passed))) => {
4363                stats.causal_samples_generated = sample_count;
4364                stats.causal_validation_passed = validation_passed;
4365                let elapsed = start.elapsed();
4366                stats.causal_generation_ms = elapsed.as_millis() as u64;
4367                info!(
4368                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4369                    sample_count, stats.causal_generation_ms, validation_passed,
4370                );
4371            }
4372            Ok(Err(e)) => {
4373                let elapsed = start.elapsed();
4374                stats.causal_generation_ms = elapsed.as_millis() as u64;
4375                warn!("Phase 13: Causal generation failed: {}", e);
4376            }
4377            Err(_) => {
4378                let elapsed = start.elapsed();
4379                stats.causal_generation_ms = elapsed.as_millis() as u64;
4380                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4381            }
4382        }
4383    }
4384
4385    /// Phase 14: Generate S2C sourcing data.
4386    fn phase_sourcing_data(
4387        &mut self,
4388        stats: &mut EnhancedGenerationStatistics,
4389    ) -> SynthResult<SourcingSnapshot> {
4390        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4391            debug!("Phase 14: Skipped (sourcing generation disabled)");
4392            return Ok(SourcingSnapshot::default());
4393        }
4394        let degradation = self.check_resources()?;
4395        if degradation >= DegradationLevel::Reduced {
4396            debug!(
4397                "Phase skipped due to resource pressure (degradation: {:?})",
4398                degradation
4399            );
4400            return Ok(SourcingSnapshot::default());
4401        }
4402
4403        info!("Phase 14: Generating S2C Sourcing Data");
4404        let seed = self.seed;
4405
4406        // Gather vendor data from master data
4407        let vendor_ids: Vec<String> = self
4408            .master_data
4409            .vendors
4410            .iter()
4411            .map(|v| v.vendor_id.clone())
4412            .collect();
4413        if vendor_ids.is_empty() {
4414            debug!("Phase 14: Skipped (no vendors available)");
4415            return Ok(SourcingSnapshot::default());
4416        }
4417
4418        let categories: Vec<(String, String)> = vec![
4419            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4420            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4421            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4422            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4423            ("CAT-LOG".to_string(), "Logistics".to_string()),
4424        ];
4425        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4426            .iter()
4427            .map(|(id, name)| {
4428                (
4429                    id.clone(),
4430                    name.clone(),
4431                    rust_decimal::Decimal::from(100_000),
4432                )
4433            })
4434            .collect();
4435
4436        let company_code = self
4437            .config
4438            .companies
4439            .first()
4440            .map(|c| c.code.as_str())
4441            .unwrap_or("1000");
4442        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4443            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4444        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4445        let fiscal_year = start_date.year() as u16;
4446        let owner_ids: Vec<String> = self
4447            .master_data
4448            .employees
4449            .iter()
4450            .take(5)
4451            .map(|e| e.employee_id.clone())
4452            .collect();
4453        let owner_id = owner_ids
4454            .first()
4455            .map(std::string::String::as_str)
4456            .unwrap_or("BUYER-001");
4457
4458        // Step 1: Spend Analysis
4459        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4460        let spend_analyses =
4461            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4462
4463        // Step 2: Sourcing Projects
4464        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4465        let sourcing_projects = if owner_ids.is_empty() {
4466            Vec::new()
4467        } else {
4468            project_gen.generate(
4469                company_code,
4470                &categories_with_spend,
4471                &owner_ids,
4472                start_date,
4473                self.config.global.period_months,
4474            )
4475        };
4476        stats.sourcing_project_count = sourcing_projects.len();
4477
4478        // Step 3: Qualifications
4479        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4480        let mut qual_gen = QualificationGenerator::new(seed + 2);
4481        let qualifications = qual_gen.generate(
4482            company_code,
4483            &qual_vendor_ids,
4484            sourcing_projects.first().map(|p| p.project_id.as_str()),
4485            owner_id,
4486            start_date,
4487        );
4488
4489        // Step 4: RFx Events
4490        let mut rfx_gen = RfxGenerator::new(seed + 3);
4491        let rfx_events: Vec<RfxEvent> = sourcing_projects
4492            .iter()
4493            .map(|proj| {
4494                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4495                rfx_gen.generate(
4496                    company_code,
4497                    &proj.project_id,
4498                    &proj.category_id,
4499                    &qualified_vids,
4500                    owner_id,
4501                    start_date,
4502                    50000.0,
4503                )
4504            })
4505            .collect();
4506        stats.rfx_event_count = rfx_events.len();
4507
4508        // Step 5: Bids
4509        let mut bid_gen = BidGenerator::new(seed + 4);
4510        let mut all_bids = Vec::new();
4511        for rfx in &rfx_events {
4512            let bidder_count = vendor_ids.len().clamp(2, 5);
4513            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4514            let bids = bid_gen.generate(rfx, &responding, start_date);
4515            all_bids.extend(bids);
4516        }
4517        stats.bid_count = all_bids.len();
4518
4519        // Step 6: Bid Evaluations
4520        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4521        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4522            .iter()
4523            .map(|rfx| {
4524                let rfx_bids: Vec<SupplierBid> = all_bids
4525                    .iter()
4526                    .filter(|b| b.rfx_id == rfx.rfx_id)
4527                    .cloned()
4528                    .collect();
4529                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4530            })
4531            .collect();
4532
4533        // Step 7: Contracts from winning bids
4534        let mut contract_gen = ContractGenerator::new(seed + 6);
4535        let contracts: Vec<ProcurementContract> = bid_evaluations
4536            .iter()
4537            .zip(rfx_events.iter())
4538            .filter_map(|(eval, rfx)| {
4539                eval.ranked_bids.first().and_then(|winner| {
4540                    all_bids
4541                        .iter()
4542                        .find(|b| b.bid_id == winner.bid_id)
4543                        .map(|winning_bid| {
4544                            contract_gen.generate_from_bid(
4545                                winning_bid,
4546                                Some(&rfx.sourcing_project_id),
4547                                &rfx.category_id,
4548                                owner_id,
4549                                start_date,
4550                            )
4551                        })
4552                })
4553            })
4554            .collect();
4555        stats.contract_count = contracts.len();
4556
4557        // Step 8: Catalog Items
4558        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4559        let catalog_items = catalog_gen.generate(&contracts);
4560        stats.catalog_item_count = catalog_items.len();
4561
4562        // Step 9: Scorecards
4563        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4564        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4565            .iter()
4566            .fold(
4567                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4568                |mut acc, c| {
4569                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4570                    acc
4571                },
4572            )
4573            .into_iter()
4574            .collect();
4575        let scorecards = scorecard_gen.generate(
4576            company_code,
4577            &vendor_contracts,
4578            start_date,
4579            end_date,
4580            owner_id,
4581        );
4582        stats.scorecard_count = scorecards.len();
4583
4584        // Back-populate cross-references on sourcing projects (Task 35)
4585        // Link each project to its RFx events, contracts, and spend analyses
4586        let mut sourcing_projects = sourcing_projects;
4587        for project in &mut sourcing_projects {
4588            // Link RFx events generated for this project
4589            project.rfx_ids = rfx_events
4590                .iter()
4591                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4592                .map(|rfx| rfx.rfx_id.clone())
4593                .collect();
4594
4595            // Link contract awarded from this project's RFx
4596            project.contract_id = contracts
4597                .iter()
4598                .find(|c| {
4599                    c.sourcing_project_id
4600                        .as_deref()
4601                        .is_some_and(|sp| sp == project.project_id)
4602                })
4603                .map(|c| c.contract_id.clone());
4604
4605            // Link spend analysis for matching category (use category_id as the reference)
4606            project.spend_analysis_id = spend_analyses
4607                .iter()
4608                .find(|sa| sa.category_id == project.category_id)
4609                .map(|sa| sa.category_id.clone());
4610        }
4611
4612        info!(
4613            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4614            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4615            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4616        );
4617        self.check_resources_with_log("post-sourcing")?;
4618
4619        Ok(SourcingSnapshot {
4620            spend_analyses,
4621            sourcing_projects,
4622            qualifications,
4623            rfx_events,
4624            bids: all_bids,
4625            bid_evaluations,
4626            contracts,
4627            catalog_items,
4628            scorecards,
4629        })
4630    }
4631
4632    /// Build a [`GroupStructure`] from the current company configuration.
4633    ///
4634    /// The first company in the configuration is treated as the ultimate parent.
4635    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4636    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4637    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4638        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4639
4640        let parent_code = self
4641            .config
4642            .companies
4643            .first()
4644            .map(|c| c.code.clone())
4645            .unwrap_or_else(|| "PARENT".to_string());
4646
4647        let mut group = GroupStructure::new(parent_code);
4648
4649        for company in self.config.companies.iter().skip(1) {
4650            let sub =
4651                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4652            group.add_subsidiary(sub);
4653        }
4654
4655        group
4656    }
4657
4658    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4659    fn phase_intercompany(
4660        &mut self,
4661        journal_entries: &[JournalEntry],
4662        stats: &mut EnhancedGenerationStatistics,
4663    ) -> SynthResult<IntercompanySnapshot> {
4664        // Skip if intercompany is disabled in config
4665        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4666            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4667            return Ok(IntercompanySnapshot::default());
4668        }
4669
4670        // Intercompany requires at least 2 companies
4671        if self.config.companies.len() < 2 {
4672            debug!(
4673                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4674                self.config.companies.len()
4675            );
4676            return Ok(IntercompanySnapshot::default());
4677        }
4678
4679        info!("Phase 14b: Generating Intercompany Transactions");
4680
4681        // Build the group structure early — used by ISA 600 component auditor scope
4682        // and consolidated financial statement generators downstream.
4683        let group_structure = self.build_group_structure();
4684        debug!(
4685            "Group structure built: parent={}, subsidiaries={}",
4686            group_structure.parent_entity,
4687            group_structure.subsidiaries.len()
4688        );
4689
4690        let seed = self.seed;
4691        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4692            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4693        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4694
4695        // Build ownership structure from company configs
4696        // First company is treated as the parent, remaining are subsidiaries
4697        let parent_code = self.config.companies[0].code.clone();
4698        let mut ownership_structure =
4699            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4700
4701        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4702            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4703                format!("REL{:03}", i + 1),
4704                parent_code.clone(),
4705                company.code.clone(),
4706                rust_decimal::Decimal::from(100), // Default 100% ownership
4707                start_date,
4708            );
4709            ownership_structure.add_relationship(relationship);
4710        }
4711
4712        // Convert config transfer pricing method to core model enum
4713        let tp_method = match self.config.intercompany.transfer_pricing_method {
4714            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4715                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4716            }
4717            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4718                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4719            }
4720            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4721                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4722            }
4723            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4724                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4725            }
4726            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4727                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4728            }
4729        };
4730
4731        // Build IC generator config from schema config
4732        let ic_currency = self
4733            .config
4734            .companies
4735            .first()
4736            .map(|c| c.currency.clone())
4737            .unwrap_or_else(|| "USD".to_string());
4738        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4739            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4740            transfer_pricing_method: tp_method,
4741            markup_percent: rust_decimal::Decimal::from_f64_retain(
4742                self.config.intercompany.markup_percent,
4743            )
4744            .unwrap_or(rust_decimal::Decimal::from(5)),
4745            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4746            default_currency: ic_currency,
4747            ..Default::default()
4748        };
4749
4750        // Create IC generator
4751        let mut ic_generator = datasynth_generators::ICGenerator::new(
4752            ic_gen_config,
4753            ownership_structure.clone(),
4754            seed + 50,
4755        );
4756
4757        // Generate IC transactions for the period
4758        // Use ~3 transactions per day as a reasonable default
4759        let transactions_per_day = 3;
4760        let matched_pairs = ic_generator.generate_transactions_for_period(
4761            start_date,
4762            end_date,
4763            transactions_per_day,
4764        );
4765
4766        // Generate IC source P2P/O2C documents
4767        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4768        debug!(
4769            "Generated {} IC seller invoices, {} IC buyer POs",
4770            ic_doc_chains.seller_invoices.len(),
4771            ic_doc_chains.buyer_orders.len()
4772        );
4773
4774        // Generate journal entries from matched pairs
4775        let mut seller_entries = Vec::new();
4776        let mut buyer_entries = Vec::new();
4777        let fiscal_year = start_date.year();
4778
4779        for pair in &matched_pairs {
4780            let fiscal_period = pair.posting_date.month();
4781            let (seller_je, buyer_je) =
4782                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4783            seller_entries.push(seller_je);
4784            buyer_entries.push(buyer_je);
4785        }
4786
4787        // Run matching engine
4788        let matching_config = datasynth_generators::ICMatchingConfig {
4789            base_currency: self
4790                .config
4791                .companies
4792                .first()
4793                .map(|c| c.currency.clone())
4794                .unwrap_or_else(|| "USD".to_string()),
4795            ..Default::default()
4796        };
4797        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4798        matching_engine.load_matched_pairs(&matched_pairs);
4799        let matching_result = matching_engine.run_matching(end_date);
4800
4801        // Generate elimination entries if configured
4802        let mut elimination_entries = Vec::new();
4803        if self.config.intercompany.generate_eliminations {
4804            let elim_config = datasynth_generators::EliminationConfig {
4805                consolidation_entity: "GROUP".to_string(),
4806                base_currency: self
4807                    .config
4808                    .companies
4809                    .first()
4810                    .map(|c| c.currency.clone())
4811                    .unwrap_or_else(|| "USD".to_string()),
4812                ..Default::default()
4813            };
4814
4815            let mut elim_generator =
4816                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4817
4818            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4819            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4820                matching_result
4821                    .matched_balances
4822                    .iter()
4823                    .chain(matching_result.unmatched_balances.iter())
4824                    .cloned()
4825                    .collect();
4826
4827            // Build investment and equity maps from the group structure so that the
4828            // elimination generator can produce equity-investment elimination entries
4829            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4830            //
4831            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4832            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4833            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4834            //
4835            // Net assets are derived from the journal entries using account-range heuristics:
4836            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4837            // no JE data is available (IC phase runs early in the generation pipeline).
4838            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4839                std::collections::HashMap::new();
4840            let mut equity_amounts: std::collections::HashMap<
4841                String,
4842                std::collections::HashMap<String, rust_decimal::Decimal>,
4843            > = std::collections::HashMap::new();
4844            {
4845                use rust_decimal::Decimal;
4846                let hundred = Decimal::from(100u32);
4847                let ten_pct = Decimal::new(10, 2); // 0.10
4848                let thirty_pct = Decimal::new(30, 2); // 0.30
4849                let sixty_pct = Decimal::new(60, 2); // 0.60
4850                let parent_code = &group_structure.parent_entity;
4851                for sub in &group_structure.subsidiaries {
4852                    let net_assets = {
4853                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4854                        if na > Decimal::ZERO {
4855                            na
4856                        } else {
4857                            Decimal::from(1_000_000u64)
4858                        }
4859                    };
4860                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4861                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4862                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4863
4864                    // Split subsidiary equity into conventional components:
4865                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4866                    let mut eq_map = std::collections::HashMap::new();
4867                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4868                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4869                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4870                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4871                }
4872            }
4873
4874            let journal = elim_generator.generate_eliminations(
4875                &fiscal_period,
4876                end_date,
4877                &all_balances,
4878                &matched_pairs,
4879                &investment_amounts,
4880                &equity_amounts,
4881            );
4882
4883            elimination_entries = journal.entries.clone();
4884        }
4885
4886        let matched_pair_count = matched_pairs.len();
4887        let elimination_entry_count = elimination_entries.len();
4888        let match_rate = matching_result.match_rate;
4889
4890        stats.ic_matched_pair_count = matched_pair_count;
4891        stats.ic_elimination_count = elimination_entry_count;
4892        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4893
4894        info!(
4895            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4896            matched_pair_count,
4897            stats.ic_transaction_count,
4898            seller_entries.len(),
4899            buyer_entries.len(),
4900            elimination_entry_count,
4901            match_rate * 100.0
4902        );
4903        self.check_resources_with_log("post-intercompany")?;
4904
4905        // ----------------------------------------------------------------
4906        // NCI measurements: derive from group structure ownership percentages
4907        // ----------------------------------------------------------------
4908        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4909            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4910            use rust_decimal::Decimal;
4911
4912            let eight_pct = Decimal::new(8, 2); // 0.08
4913
4914            group_structure
4915                .subsidiaries
4916                .iter()
4917                .filter(|sub| {
4918                    sub.nci_percentage > Decimal::ZERO
4919                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4920                })
4921                .map(|sub| {
4922                    // Compute net assets from actual journal entries for this subsidiary.
4923                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4924                    // IC phase runs before the main JE batch has been populated).
4925                    let net_assets_from_jes =
4926                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4927
4928                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4929                        net_assets_from_jes.round_dp(2)
4930                    } else {
4931                        // Fallback: use a plausible base amount
4932                        Decimal::from(1_000_000u64)
4933                    };
4934
4935                    // Net income approximated as 8% of net assets
4936                    let net_income = (net_assets * eight_pct).round_dp(2);
4937
4938                    NciMeasurement::compute(
4939                        sub.entity_code.clone(),
4940                        sub.nci_percentage,
4941                        net_assets,
4942                        net_income,
4943                    )
4944                })
4945                .collect()
4946        };
4947
4948        if !nci_measurements.is_empty() {
4949            info!(
4950                "NCI measurements: {} subsidiaries with non-controlling interests",
4951                nci_measurements.len()
4952            );
4953        }
4954
4955        Ok(IntercompanySnapshot {
4956            group_structure: Some(group_structure),
4957            matched_pairs,
4958            seller_journal_entries: seller_entries,
4959            buyer_journal_entries: buyer_entries,
4960            elimination_entries,
4961            nci_measurements,
4962            ic_document_chains: Some(ic_doc_chains),
4963            matched_pair_count,
4964            elimination_entry_count,
4965            match_rate,
4966        })
4967    }
4968
4969    /// Phase 15: Generate bank reconciliations and financial statements.
4970    fn phase_financial_reporting(
4971        &mut self,
4972        document_flows: &DocumentFlowSnapshot,
4973        journal_entries: &[JournalEntry],
4974        coa: &Arc<ChartOfAccounts>,
4975        _hr: &HrSnapshot,
4976        _audit: &AuditSnapshot,
4977        stats: &mut EnhancedGenerationStatistics,
4978    ) -> SynthResult<FinancialReportingSnapshot> {
4979        let fs_enabled = self.phase_config.generate_financial_statements
4980            || self.config.financial_reporting.enabled;
4981        let br_enabled = self.phase_config.generate_bank_reconciliation;
4982
4983        if !fs_enabled && !br_enabled {
4984            debug!("Phase 15: Skipped (financial reporting disabled)");
4985            return Ok(FinancialReportingSnapshot::default());
4986        }
4987
4988        info!("Phase 15: Generating Financial Reporting Data");
4989
4990        let seed = self.seed;
4991        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4992            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4993
4994        let mut financial_statements = Vec::new();
4995        let mut bank_reconciliations = Vec::new();
4996        let mut trial_balances = Vec::new();
4997        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4998        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4999            Vec::new();
5000        // Standalone statements keyed by entity code
5001        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5002            std::collections::HashMap::new();
5003        // Consolidated statements (one per period)
5004        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5005        // Consolidation schedules (one per period)
5006        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5007
5008        // Generate financial statements from JE-derived trial balances.
5009        //
5010        // When journal entries are available, we use cumulative trial balances for
5011        // balance sheet accounts and current-period trial balances for income
5012        // statement accounts. We also track prior-period trial balances so the
5013        // generator can produce comparative amounts, and we build a proper
5014        // cash flow statement from working capital changes rather than random data.
5015        if fs_enabled {
5016            let has_journal_entries = !journal_entries.is_empty();
5017
5018            // Use FinancialStatementGenerator for balance sheet and income statement,
5019            // but build cash flow ourselves from TB data when JEs are available.
5020            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5021            // Separate generator for consolidated statements (different seed offset)
5022            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5023
5024            // Collect elimination JEs once (reused across periods)
5025            let elimination_entries: Vec<&JournalEntry> = journal_entries
5026                .iter()
5027                .filter(|je| je.header.is_elimination)
5028                .collect();
5029
5030            // Generate one set of statements per period, per entity
5031            for period in 0..self.config.global.period_months {
5032                let period_start = start_date + chrono::Months::new(period);
5033                let period_end =
5034                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5035                let fiscal_year = period_end.year() as u16;
5036                let fiscal_period = period_end.month() as u8;
5037                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5038
5039                // Build per-entity trial balances for this period (non-elimination JEs)
5040                // We accumulate them for the consolidation step.
5041                let mut entity_tb_map: std::collections::HashMap<
5042                    String,
5043                    std::collections::HashMap<String, rust_decimal::Decimal>,
5044                > = std::collections::HashMap::new();
5045
5046                // --- Standalone: one set of statements per company ---
5047                for (company_idx, company) in self.config.companies.iter().enumerate() {
5048                    let company_code = company.code.as_str();
5049                    let currency = company.currency.as_str();
5050                    // Use a unique seed offset per company to keep statements deterministic
5051                    // and distinct across companies
5052                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5053                    let mut company_fs_gen =
5054                        FinancialStatementGenerator::new(seed + company_seed_offset);
5055
5056                    if has_journal_entries {
5057                        let tb_entries = Self::build_cumulative_trial_balance(
5058                            journal_entries,
5059                            coa,
5060                            company_code,
5061                            start_date,
5062                            period_end,
5063                            fiscal_year,
5064                            fiscal_period,
5065                        );
5066
5067                        // Accumulate per-entity category balances for consolidation
5068                        let entity_cat_map =
5069                            entity_tb_map.entry(company_code.to_string()).or_default();
5070                        for tb_entry in &tb_entries {
5071                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5072                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5073                        }
5074
5075                        let stmts = company_fs_gen.generate(
5076                            company_code,
5077                            currency,
5078                            &tb_entries,
5079                            period_start,
5080                            period_end,
5081                            fiscal_year,
5082                            fiscal_period,
5083                            None,
5084                            "SYS-AUTOCLOSE",
5085                        );
5086
5087                        let mut entity_stmts = Vec::new();
5088                        for stmt in stmts {
5089                            if stmt.statement_type == StatementType::CashFlowStatement {
5090                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5091                                let cf_items = Self::build_cash_flow_from_trial_balances(
5092                                    &tb_entries,
5093                                    None,
5094                                    net_income,
5095                                );
5096                                entity_stmts.push(FinancialStatement {
5097                                    cash_flow_items: cf_items,
5098                                    ..stmt
5099                                });
5100                            } else {
5101                                entity_stmts.push(stmt);
5102                            }
5103                        }
5104
5105                        // Add to the flat financial_statements list (used by KPI/budget)
5106                        financial_statements.extend(entity_stmts.clone());
5107
5108                        // Store standalone per-entity
5109                        standalone_statements
5110                            .entry(company_code.to_string())
5111                            .or_default()
5112                            .extend(entity_stmts);
5113
5114                        // Only store trial balance for the first company in the period
5115                        // to avoid duplicates in the trial_balances list
5116                        if company_idx == 0 {
5117                            trial_balances.push(PeriodTrialBalance {
5118                                fiscal_year,
5119                                fiscal_period,
5120                                period_start,
5121                                period_end,
5122                                entries: tb_entries,
5123                            });
5124                        }
5125                    } else {
5126                        // Fallback: no JEs available
5127                        let tb_entries = Self::build_trial_balance_from_entries(
5128                            journal_entries,
5129                            coa,
5130                            company_code,
5131                            fiscal_year,
5132                            fiscal_period,
5133                        );
5134
5135                        let stmts = company_fs_gen.generate(
5136                            company_code,
5137                            currency,
5138                            &tb_entries,
5139                            period_start,
5140                            period_end,
5141                            fiscal_year,
5142                            fiscal_period,
5143                            None,
5144                            "SYS-AUTOCLOSE",
5145                        );
5146                        financial_statements.extend(stmts.clone());
5147                        standalone_statements
5148                            .entry(company_code.to_string())
5149                            .or_default()
5150                            .extend(stmts);
5151
5152                        if company_idx == 0 && !tb_entries.is_empty() {
5153                            trial_balances.push(PeriodTrialBalance {
5154                                fiscal_year,
5155                                fiscal_period,
5156                                period_start,
5157                                period_end,
5158                                entries: tb_entries,
5159                            });
5160                        }
5161                    }
5162                }
5163
5164                // --- Consolidated: aggregate all entities + apply eliminations ---
5165                // Use the primary (first) company's currency for the consolidated statement
5166                let group_currency = self
5167                    .config
5168                    .companies
5169                    .first()
5170                    .map(|c| c.currency.as_str())
5171                    .unwrap_or("USD");
5172
5173                // Build owned elimination entries for this period
5174                let period_eliminations: Vec<JournalEntry> = elimination_entries
5175                    .iter()
5176                    .filter(|je| {
5177                        je.header.fiscal_year == fiscal_year
5178                            && je.header.fiscal_period == fiscal_period
5179                    })
5180                    .map(|je| (*je).clone())
5181                    .collect();
5182
5183                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5184                    &entity_tb_map,
5185                    &period_eliminations,
5186                    &period_label,
5187                );
5188
5189                // Build a pseudo trial balance from consolidated line items for the
5190                // FinancialStatementGenerator to use (only for cash flow direction).
5191                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5192                    .line_items
5193                    .iter()
5194                    .map(|li| {
5195                        let net = li.post_elimination_total;
5196                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5197                            (net, rust_decimal::Decimal::ZERO)
5198                        } else {
5199                            (rust_decimal::Decimal::ZERO, -net)
5200                        };
5201                        datasynth_generators::TrialBalanceEntry {
5202                            account_code: li.account_category.clone(),
5203                            account_name: li.account_category.clone(),
5204                            category: li.account_category.clone(),
5205                            debit_balance: debit,
5206                            credit_balance: credit,
5207                        }
5208                    })
5209                    .collect();
5210
5211                let mut cons_stmts = cons_gen.generate(
5212                    "GROUP",
5213                    group_currency,
5214                    &cons_tb,
5215                    period_start,
5216                    period_end,
5217                    fiscal_year,
5218                    fiscal_period,
5219                    None,
5220                    "SYS-AUTOCLOSE",
5221                );
5222
5223                // Split consolidated line items by statement type.
5224                // The consolidation generator returns BS items first, then IS items,
5225                // identified by their CONS- prefix and category.
5226                let bs_categories: &[&str] = &[
5227                    "CASH",
5228                    "RECEIVABLES",
5229                    "INVENTORY",
5230                    "FIXEDASSETS",
5231                    "PAYABLES",
5232                    "ACCRUEDLIABILITIES",
5233                    "LONGTERMDEBT",
5234                    "EQUITY",
5235                ];
5236                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5237                    cons_line_items.into_iter().partition(|li| {
5238                        let upper = li.label.to_uppercase();
5239                        bs_categories.iter().any(|c| upper == *c)
5240                    });
5241
5242                for stmt in &mut cons_stmts {
5243                    stmt.is_consolidated = true;
5244                    match stmt.statement_type {
5245                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5246                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5247                        _ => {} // CF and equity change statements keep generator output
5248                    }
5249                }
5250
5251                consolidated_statements.extend(cons_stmts);
5252                consolidation_schedules.push(schedule);
5253            }
5254
5255            // Backward compat: if only 1 company, use existing code path logic
5256            // (prior_cumulative_tb for comparative amounts). Already handled above;
5257            // the prior_ref is omitted to keep this change minimal.
5258            let _ = &mut fs_gen; // suppress unused warning
5259
5260            stats.financial_statement_count = financial_statements.len();
5261            info!(
5262                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5263                stats.financial_statement_count,
5264                consolidated_statements.len(),
5265                has_journal_entries
5266            );
5267
5268            // ----------------------------------------------------------------
5269            // IFRS 8 / ASC 280: Operating Segment Reporting
5270            // ----------------------------------------------------------------
5271            // Build entity seeds from the company configuration.
5272            let entity_seeds: Vec<SegmentSeed> = self
5273                .config
5274                .companies
5275                .iter()
5276                .map(|c| SegmentSeed {
5277                    code: c.code.clone(),
5278                    name: c.name.clone(),
5279                    currency: c.currency.clone(),
5280                })
5281                .collect();
5282
5283            let mut seg_gen = SegmentGenerator::new(seed + 30);
5284
5285            // Generate one set of segment reports per period.
5286            // We extract consolidated revenue / profit / assets from the consolidated
5287            // financial statements produced above, falling back to simple sums when
5288            // no consolidated statements were generated (single-entity path).
5289            for period in 0..self.config.global.period_months {
5290                let period_end =
5291                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5292                let fiscal_year = period_end.year() as u16;
5293                let fiscal_period = period_end.month() as u8;
5294                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5295
5296                use datasynth_core::models::StatementType;
5297
5298                // Try to find consolidated income statement for this period
5299                let cons_is = consolidated_statements.iter().find(|s| {
5300                    s.fiscal_year == fiscal_year
5301                        && s.fiscal_period == fiscal_period
5302                        && s.statement_type == StatementType::IncomeStatement
5303                });
5304                let cons_bs = consolidated_statements.iter().find(|s| {
5305                    s.fiscal_year == fiscal_year
5306                        && s.fiscal_period == fiscal_period
5307                        && s.statement_type == StatementType::BalanceSheet
5308                });
5309
5310                // If consolidated statements not available fall back to the flat list
5311                let is_stmt = cons_is.or_else(|| {
5312                    financial_statements.iter().find(|s| {
5313                        s.fiscal_year == fiscal_year
5314                            && s.fiscal_period == fiscal_period
5315                            && s.statement_type == StatementType::IncomeStatement
5316                    })
5317                });
5318                let bs_stmt = cons_bs.or_else(|| {
5319                    financial_statements.iter().find(|s| {
5320                        s.fiscal_year == fiscal_year
5321                            && s.fiscal_period == fiscal_period
5322                            && s.statement_type == StatementType::BalanceSheet
5323                    })
5324                });
5325
5326                let consolidated_revenue = is_stmt
5327                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5328                    .map(|li| -li.amount) // revenue is stored as negative in IS
5329                    .unwrap_or(rust_decimal::Decimal::ZERO);
5330
5331                let consolidated_profit = is_stmt
5332                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5333                    .map(|li| li.amount)
5334                    .unwrap_or(rust_decimal::Decimal::ZERO);
5335
5336                let consolidated_assets = bs_stmt
5337                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5338                    .map(|li| li.amount)
5339                    .unwrap_or(rust_decimal::Decimal::ZERO);
5340
5341                // Skip periods where we have no financial data
5342                if consolidated_revenue == rust_decimal::Decimal::ZERO
5343                    && consolidated_assets == rust_decimal::Decimal::ZERO
5344                {
5345                    continue;
5346                }
5347
5348                let group_code = self
5349                    .config
5350                    .companies
5351                    .first()
5352                    .map(|c| c.code.as_str())
5353                    .unwrap_or("GROUP");
5354
5355                // Compute period depreciation from JEs with document type "CL" hitting account
5356                // 6000 (depreciation expense).  These are generated by phase_period_close.
5357                let total_depr: rust_decimal::Decimal = journal_entries
5358                    .iter()
5359                    .filter(|je| je.header.document_type == "CL")
5360                    .flat_map(|je| je.lines.iter())
5361                    .filter(|l| l.gl_account.starts_with("6000"))
5362                    .map(|l| l.debit_amount)
5363                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5364                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5365                    Some(total_depr)
5366                } else {
5367                    None
5368                };
5369
5370                let (segs, recon) = seg_gen.generate(
5371                    group_code,
5372                    &period_label,
5373                    consolidated_revenue,
5374                    consolidated_profit,
5375                    consolidated_assets,
5376                    &entity_seeds,
5377                    depr_param,
5378                );
5379                segment_reports.extend(segs);
5380                segment_reconciliations.push(recon);
5381            }
5382
5383            info!(
5384                "Segment reports generated: {} segments, {} reconciliations",
5385                segment_reports.len(),
5386                segment_reconciliations.len()
5387            );
5388        }
5389
5390        // Generate bank reconciliations from payment data
5391        if br_enabled && !document_flows.payments.is_empty() {
5392            let employee_ids: Vec<String> = self
5393                .master_data
5394                .employees
5395                .iter()
5396                .map(|e| e.employee_id.clone())
5397                .collect();
5398            let mut br_gen =
5399                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5400
5401            // Group payments by company code and period
5402            for company in &self.config.companies {
5403                let company_payments: Vec<PaymentReference> = document_flows
5404                    .payments
5405                    .iter()
5406                    .filter(|p| p.header.company_code == company.code)
5407                    .map(|p| PaymentReference {
5408                        id: p.header.document_id.clone(),
5409                        amount: if p.is_vendor { p.amount } else { -p.amount },
5410                        date: p.header.document_date,
5411                        reference: p
5412                            .check_number
5413                            .clone()
5414                            .or_else(|| p.wire_reference.clone())
5415                            .unwrap_or_else(|| p.header.document_id.clone()),
5416                    })
5417                    .collect();
5418
5419                if company_payments.is_empty() {
5420                    continue;
5421                }
5422
5423                let bank_account_id = format!("{}-MAIN", company.code);
5424
5425                // Generate one reconciliation per period
5426                for period in 0..self.config.global.period_months {
5427                    let period_start = start_date + chrono::Months::new(period);
5428                    let period_end =
5429                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5430
5431                    let period_payments: Vec<PaymentReference> = company_payments
5432                        .iter()
5433                        .filter(|p| p.date >= period_start && p.date <= period_end)
5434                        .cloned()
5435                        .collect();
5436
5437                    let recon = br_gen.generate(
5438                        &company.code,
5439                        &bank_account_id,
5440                        period_start,
5441                        period_end,
5442                        &company.currency,
5443                        &period_payments,
5444                    );
5445                    bank_reconciliations.push(recon);
5446                }
5447            }
5448            info!(
5449                "Bank reconciliations generated: {} reconciliations",
5450                bank_reconciliations.len()
5451            );
5452        }
5453
5454        stats.bank_reconciliation_count = bank_reconciliations.len();
5455        self.check_resources_with_log("post-financial-reporting")?;
5456
5457        if !trial_balances.is_empty() {
5458            info!(
5459                "Period-close trial balances captured: {} periods",
5460                trial_balances.len()
5461            );
5462        }
5463
5464        // Notes to financial statements are generated in a separate post-processing step
5465        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5466        // phases have completed, so that deferred tax and provision data can be wired in.
5467        let notes_to_financial_statements = Vec::new();
5468
5469        Ok(FinancialReportingSnapshot {
5470            financial_statements,
5471            standalone_statements,
5472            consolidated_statements,
5473            consolidation_schedules,
5474            bank_reconciliations,
5475            trial_balances,
5476            segment_reports,
5477            segment_reconciliations,
5478            notes_to_financial_statements,
5479        })
5480    }
5481
5482    /// Populate notes to financial statements using fully-resolved snapshots.
5483    ///
5484    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5485    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5486    /// can be wired into the notes context.  The method mutates
5487    /// `financial_reporting.notes_to_financial_statements` in-place.
5488    fn generate_notes_to_financial_statements(
5489        &self,
5490        financial_reporting: &mut FinancialReportingSnapshot,
5491        accounting_standards: &AccountingStandardsSnapshot,
5492        tax: &TaxSnapshot,
5493        hr: &HrSnapshot,
5494        audit: &AuditSnapshot,
5495        treasury: &TreasurySnapshot,
5496    ) {
5497        use datasynth_config::schema::AccountingFrameworkConfig;
5498        use datasynth_core::models::StatementType;
5499        use datasynth_generators::period_close::notes_generator::{
5500            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5501        };
5502
5503        let seed = self.seed;
5504        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5505        {
5506            Ok(d) => d,
5507            Err(_) => return,
5508        };
5509
5510        let mut notes_gen = NotesGenerator::new(seed + 4235);
5511
5512        for company in &self.config.companies {
5513            let last_period_end = start_date
5514                + chrono::Months::new(self.config.global.period_months)
5515                - chrono::Days::new(1);
5516            let fiscal_year = last_period_end.year() as u16;
5517
5518            // Extract relevant amounts from the already-generated financial statements
5519            let entity_is = financial_reporting
5520                .standalone_statements
5521                .get(&company.code)
5522                .and_then(|stmts| {
5523                    stmts.iter().find(|s| {
5524                        s.fiscal_year == fiscal_year
5525                            && s.statement_type == StatementType::IncomeStatement
5526                    })
5527                });
5528            let entity_bs = financial_reporting
5529                .standalone_statements
5530                .get(&company.code)
5531                .and_then(|stmts| {
5532                    stmts.iter().find(|s| {
5533                        s.fiscal_year == fiscal_year
5534                            && s.statement_type == StatementType::BalanceSheet
5535                    })
5536                });
5537
5538            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5539            let revenue_amount = entity_is
5540                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5541                .map(|li| li.amount);
5542            let ppe_gross = entity_bs
5543                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5544                .map(|li| li.amount);
5545
5546            let framework = match self
5547                .config
5548                .accounting_standards
5549                .framework
5550                .unwrap_or_default()
5551            {
5552                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5553                    "IFRS".to_string()
5554                }
5555                _ => "US GAAP".to_string(),
5556            };
5557
5558            // ---- Deferred tax (IAS 12 / ASC 740) ----
5559            // Sum closing DTA and DTL from rollforward entries for this entity.
5560            let (entity_dta, entity_dtl) = {
5561                let mut dta = rust_decimal::Decimal::ZERO;
5562                let mut dtl = rust_decimal::Decimal::ZERO;
5563                for rf in &tax.deferred_tax.rollforwards {
5564                    if rf.entity_code == company.code {
5565                        dta += rf.closing_dta;
5566                        dtl += rf.closing_dtl;
5567                    }
5568                }
5569                (
5570                    if dta > rust_decimal::Decimal::ZERO {
5571                        Some(dta)
5572                    } else {
5573                        None
5574                    },
5575                    if dtl > rust_decimal::Decimal::ZERO {
5576                        Some(dtl)
5577                    } else {
5578                        None
5579                    },
5580                )
5581            };
5582
5583            // ---- Provisions (IAS 37 / ASC 450) ----
5584            // Filter provisions to this entity; sum best_estimate amounts.
5585            let entity_provisions: Vec<_> = accounting_standards
5586                .provisions
5587                .iter()
5588                .filter(|p| p.entity_code == company.code)
5589                .collect();
5590            let provision_count = entity_provisions.len();
5591            let total_provisions = if provision_count > 0 {
5592                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5593            } else {
5594                None
5595            };
5596
5597            // ---- Pension data from HR snapshot ----
5598            let entity_pension_plan_count = hr
5599                .pension_plans
5600                .iter()
5601                .filter(|p| p.entity_code == company.code)
5602                .count();
5603            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5604                let sum: rust_decimal::Decimal = hr
5605                    .pension_disclosures
5606                    .iter()
5607                    .filter(|d| {
5608                        hr.pension_plans
5609                            .iter()
5610                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5611                    })
5612                    .map(|d| d.net_pension_liability)
5613                    .sum();
5614                let plan_assets_sum: rust_decimal::Decimal = hr
5615                    .pension_plan_assets
5616                    .iter()
5617                    .filter(|a| {
5618                        hr.pension_plans
5619                            .iter()
5620                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5621                    })
5622                    .map(|a| a.fair_value_closing)
5623                    .sum();
5624                if entity_pension_plan_count > 0 {
5625                    Some(sum + plan_assets_sum)
5626                } else {
5627                    None
5628                }
5629            };
5630            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5631                let sum: rust_decimal::Decimal = hr
5632                    .pension_plan_assets
5633                    .iter()
5634                    .filter(|a| {
5635                        hr.pension_plans
5636                            .iter()
5637                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5638                    })
5639                    .map(|a| a.fair_value_closing)
5640                    .sum();
5641                if entity_pension_plan_count > 0 {
5642                    Some(sum)
5643                } else {
5644                    None
5645                }
5646            };
5647
5648            // ---- Audit data: related parties + subsequent events ----
5649            // Audit snapshot covers all entities; use total counts (common case = single entity).
5650            let rp_count = audit.related_party_transactions.len();
5651            let se_count = audit.subsequent_events.len();
5652            let adjusting_count = audit
5653                .subsequent_events
5654                .iter()
5655                .filter(|e| {
5656                    matches!(
5657                        e.classification,
5658                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5659                    )
5660                })
5661                .count();
5662
5663            let ctx = NotesGeneratorContext {
5664                entity_code: company.code.clone(),
5665                framework,
5666                period: format!("FY{}", fiscal_year),
5667                period_end: last_period_end,
5668                currency: company.currency.clone(),
5669                revenue_amount,
5670                total_ppe_gross: ppe_gross,
5671                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5672                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5673                deferred_tax_asset: entity_dta,
5674                deferred_tax_liability: entity_dtl,
5675                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5676                provision_count,
5677                total_provisions,
5678                // Pension data from HR snapshot
5679                pension_plan_count: entity_pension_plan_count,
5680                total_dbo: entity_total_dbo,
5681                total_plan_assets: entity_total_plan_assets,
5682                // Audit data
5683                related_party_transaction_count: rp_count,
5684                subsequent_event_count: se_count,
5685                adjusting_event_count: adjusting_count,
5686                ..NotesGeneratorContext::default()
5687            };
5688
5689            let entity_notes = notes_gen.generate(&ctx);
5690            let standard_note_count = entity_notes.len() as u32;
5691            info!(
5692                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5693                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5694            );
5695            financial_reporting
5696                .notes_to_financial_statements
5697                .extend(entity_notes);
5698
5699            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5700            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5701                .debt_instruments
5702                .iter()
5703                .filter(|d| d.entity_id == company.code)
5704                .map(|d| {
5705                    (
5706                        format!("{:?}", d.instrument_type),
5707                        d.principal,
5708                        d.maturity_date.to_string(),
5709                    )
5710                })
5711                .collect();
5712
5713            let hedge_count = treasury.hedge_relationships.len();
5714            let effective_hedges = treasury
5715                .hedge_relationships
5716                .iter()
5717                .filter(|h| h.is_effective)
5718                .count();
5719            let total_notional: rust_decimal::Decimal = treasury
5720                .hedging_instruments
5721                .iter()
5722                .map(|h| h.notional_amount)
5723                .sum();
5724            let total_fair_value: rust_decimal::Decimal = treasury
5725                .hedging_instruments
5726                .iter()
5727                .map(|h| h.fair_value)
5728                .sum();
5729
5730            // Join provision_movements with provisions to get entity/type info
5731            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5732                .provisions
5733                .iter()
5734                .filter(|p| p.entity_code == company.code)
5735                .map(|p| p.id.as_str())
5736                .collect();
5737            let provision_movements: Vec<(
5738                String,
5739                rust_decimal::Decimal,
5740                rust_decimal::Decimal,
5741                rust_decimal::Decimal,
5742            )> = accounting_standards
5743                .provision_movements
5744                .iter()
5745                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5746                .map(|m| {
5747                    let prov_type = accounting_standards
5748                        .provisions
5749                        .iter()
5750                        .find(|p| p.id == m.provision_id)
5751                        .map(|p| format!("{:?}", p.provision_type))
5752                        .unwrap_or_else(|| "Unknown".to_string());
5753                    (prov_type, m.opening, m.additions, m.closing)
5754                })
5755                .collect();
5756
5757            let enhanced_ctx = EnhancedNotesContext {
5758                entity_code: company.code.clone(),
5759                period: format!("FY{}", fiscal_year),
5760                currency: company.currency.clone(),
5761                // Inventory breakdown: best-effort using zero (would need balance tracker)
5762                finished_goods_value: rust_decimal::Decimal::ZERO,
5763                wip_value: rust_decimal::Decimal::ZERO,
5764                raw_materials_value: rust_decimal::Decimal::ZERO,
5765                debt_instruments,
5766                hedge_count,
5767                effective_hedges,
5768                total_notional,
5769                total_fair_value,
5770                provision_movements,
5771            };
5772
5773            let enhanced_notes =
5774                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5775            if !enhanced_notes.is_empty() {
5776                info!(
5777                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5778                    company.code,
5779                    enhanced_notes.len(),
5780                    enhanced_ctx.debt_instruments.len(),
5781                    hedge_count,
5782                    enhanced_ctx.provision_movements.len(),
5783                );
5784                financial_reporting
5785                    .notes_to_financial_statements
5786                    .extend(enhanced_notes);
5787            }
5788        }
5789    }
5790
5791    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5792    ///
5793    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5794    /// posted in the journal entries flows through to the trial balance, using the real
5795    /// GL account numbers from the CoA.
5796    fn build_trial_balance_from_entries(
5797        journal_entries: &[JournalEntry],
5798        coa: &ChartOfAccounts,
5799        company_code: &str,
5800        fiscal_year: u16,
5801        fiscal_period: u8,
5802    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5803        use rust_decimal::Decimal;
5804
5805        // Accumulate total debits and credits per GL account
5806        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5807        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5808
5809        for je in journal_entries {
5810            // Filter to matching company, fiscal year, and period
5811            if je.header.company_code != company_code
5812                || je.header.fiscal_year != fiscal_year
5813                || je.header.fiscal_period != fiscal_period
5814            {
5815                continue;
5816            }
5817
5818            for line in &je.lines {
5819                let acct = &line.gl_account;
5820                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5821                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5822            }
5823        }
5824
5825        // Build a TrialBalanceEntry for each account that had activity
5826        let mut all_accounts: Vec<&String> = account_debits
5827            .keys()
5828            .chain(account_credits.keys())
5829            .collect::<std::collections::HashSet<_>>()
5830            .into_iter()
5831            .collect();
5832        all_accounts.sort();
5833
5834        let mut entries = Vec::new();
5835
5836        for acct_number in all_accounts {
5837            let debit = account_debits
5838                .get(acct_number)
5839                .copied()
5840                .unwrap_or(Decimal::ZERO);
5841            let credit = account_credits
5842                .get(acct_number)
5843                .copied()
5844                .unwrap_or(Decimal::ZERO);
5845
5846            if debit.is_zero() && credit.is_zero() {
5847                continue;
5848            }
5849
5850            // Look up account name from CoA, fall back to "Account {code}"
5851            let account_name = coa
5852                .get_account(acct_number)
5853                .map(|gl| gl.short_description.clone())
5854                .unwrap_or_else(|| format!("Account {acct_number}"));
5855
5856            // Map account code prefix to the category strings expected by
5857            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5858            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5859            // OperatingExpenses).
5860            let category = Self::category_from_account_code(acct_number);
5861
5862            entries.push(datasynth_generators::TrialBalanceEntry {
5863                account_code: acct_number.clone(),
5864                account_name,
5865                category,
5866                debit_balance: debit,
5867                credit_balance: credit,
5868            });
5869        }
5870
5871        entries
5872    }
5873
5874    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5875    /// (and including) the given period end date.
5876    ///
5877    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5878    /// while income statement accounts (revenue, expenses) show only the current period.
5879    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5880    fn build_cumulative_trial_balance(
5881        journal_entries: &[JournalEntry],
5882        coa: &ChartOfAccounts,
5883        company_code: &str,
5884        start_date: NaiveDate,
5885        period_end: NaiveDate,
5886        fiscal_year: u16,
5887        fiscal_period: u8,
5888    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5889        use rust_decimal::Decimal;
5890
5891        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5892        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5893        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5894
5895        // Accumulate debits/credits for income statement accounts (current period only)
5896        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5897        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5898
5899        for je in journal_entries {
5900            if je.header.company_code != company_code {
5901                continue;
5902            }
5903
5904            for line in &je.lines {
5905                let acct = &line.gl_account;
5906                let category = Self::category_from_account_code(acct);
5907                let is_bs_account = matches!(
5908                    category.as_str(),
5909                    "Cash"
5910                        | "Receivables"
5911                        | "Inventory"
5912                        | "FixedAssets"
5913                        | "Payables"
5914                        | "AccruedLiabilities"
5915                        | "LongTermDebt"
5916                        | "Equity"
5917                );
5918
5919                if is_bs_account {
5920                    // Balance sheet: accumulate from start through period_end
5921                    if je.header.document_date <= period_end
5922                        && je.header.document_date >= start_date
5923                    {
5924                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5925                            line.debit_amount;
5926                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5927                            line.credit_amount;
5928                    }
5929                } else {
5930                    // Income statement: current period only
5931                    if je.header.fiscal_year == fiscal_year
5932                        && je.header.fiscal_period == fiscal_period
5933                    {
5934                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5935                            line.debit_amount;
5936                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5937                            line.credit_amount;
5938                    }
5939                }
5940            }
5941        }
5942
5943        // Merge all accounts
5944        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5945        all_accounts.extend(bs_debits.keys().cloned());
5946        all_accounts.extend(bs_credits.keys().cloned());
5947        all_accounts.extend(is_debits.keys().cloned());
5948        all_accounts.extend(is_credits.keys().cloned());
5949
5950        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5951        sorted_accounts.sort();
5952
5953        let mut entries = Vec::new();
5954
5955        for acct_number in &sorted_accounts {
5956            let category = Self::category_from_account_code(acct_number);
5957            let is_bs_account = matches!(
5958                category.as_str(),
5959                "Cash"
5960                    | "Receivables"
5961                    | "Inventory"
5962                    | "FixedAssets"
5963                    | "Payables"
5964                    | "AccruedLiabilities"
5965                    | "LongTermDebt"
5966                    | "Equity"
5967            );
5968
5969            let (debit, credit) = if is_bs_account {
5970                (
5971                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5972                    bs_credits
5973                        .get(acct_number)
5974                        .copied()
5975                        .unwrap_or(Decimal::ZERO),
5976                )
5977            } else {
5978                (
5979                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5980                    is_credits
5981                        .get(acct_number)
5982                        .copied()
5983                        .unwrap_or(Decimal::ZERO),
5984                )
5985            };
5986
5987            if debit.is_zero() && credit.is_zero() {
5988                continue;
5989            }
5990
5991            let account_name = coa
5992                .get_account(acct_number)
5993                .map(|gl| gl.short_description.clone())
5994                .unwrap_or_else(|| format!("Account {acct_number}"));
5995
5996            entries.push(datasynth_generators::TrialBalanceEntry {
5997                account_code: acct_number.clone(),
5998                account_name,
5999                category,
6000                debit_balance: debit,
6001                credit_balance: credit,
6002            });
6003        }
6004
6005        entries
6006    }
6007
6008    /// Build a JE-derived cash flow statement using the indirect method.
6009    ///
6010    /// Compares current and prior cumulative trial balances to derive working capital
6011    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6012    fn build_cash_flow_from_trial_balances(
6013        current_tb: &[datasynth_generators::TrialBalanceEntry],
6014        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6015        net_income: rust_decimal::Decimal,
6016    ) -> Vec<CashFlowItem> {
6017        use rust_decimal::Decimal;
6018
6019        // Helper: aggregate a TB by category and return net (debit - credit)
6020        let aggregate =
6021            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6022                let mut map: HashMap<String, Decimal> = HashMap::new();
6023                for entry in tb {
6024                    let net = entry.debit_balance - entry.credit_balance;
6025                    *map.entry(entry.category.clone()).or_default() += net;
6026                }
6027                map
6028            };
6029
6030        let current = aggregate(current_tb);
6031        let prior = prior_tb.map(aggregate);
6032
6033        // Get balance for a category, defaulting to zero
6034        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6035            *map.get(key).unwrap_or(&Decimal::ZERO)
6036        };
6037
6038        // Compute change: current - prior (or current if no prior)
6039        let change = |key: &str| -> Decimal {
6040            let curr = get(&current, key);
6041            match &prior {
6042                Some(p) => curr - get(p, key),
6043                None => curr,
6044            }
6045        };
6046
6047        // Operating activities (indirect method)
6048        // Depreciation add-back: approximate from FixedAssets decrease
6049        let fixed_asset_change = change("FixedAssets");
6050        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6051            -fixed_asset_change
6052        } else {
6053            Decimal::ZERO
6054        };
6055
6056        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6057        let ar_change = change("Receivables");
6058        let inventory_change = change("Inventory");
6059        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6060        let ap_change = change("Payables");
6061        let accrued_change = change("AccruedLiabilities");
6062
6063        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6064            + (-ap_change)
6065            + (-accrued_change);
6066
6067        // Investing activities
6068        let capex = if fixed_asset_change > Decimal::ZERO {
6069            -fixed_asset_change
6070        } else {
6071            Decimal::ZERO
6072        };
6073        let investing_cf = capex;
6074
6075        // Financing activities
6076        let debt_change = -change("LongTermDebt");
6077        let equity_change = -change("Equity");
6078        let financing_cf = debt_change + equity_change;
6079
6080        let net_change = operating_cf + investing_cf + financing_cf;
6081
6082        vec![
6083            CashFlowItem {
6084                item_code: "CF-NI".to_string(),
6085                label: "Net Income".to_string(),
6086                category: CashFlowCategory::Operating,
6087                amount: net_income,
6088                amount_prior: None,
6089                sort_order: 1,
6090                is_total: false,
6091            },
6092            CashFlowItem {
6093                item_code: "CF-DEP".to_string(),
6094                label: "Depreciation & Amortization".to_string(),
6095                category: CashFlowCategory::Operating,
6096                amount: depreciation_addback,
6097                amount_prior: None,
6098                sort_order: 2,
6099                is_total: false,
6100            },
6101            CashFlowItem {
6102                item_code: "CF-AR".to_string(),
6103                label: "Change in Accounts Receivable".to_string(),
6104                category: CashFlowCategory::Operating,
6105                amount: -ar_change,
6106                amount_prior: None,
6107                sort_order: 3,
6108                is_total: false,
6109            },
6110            CashFlowItem {
6111                item_code: "CF-AP".to_string(),
6112                label: "Change in Accounts Payable".to_string(),
6113                category: CashFlowCategory::Operating,
6114                amount: -ap_change,
6115                amount_prior: None,
6116                sort_order: 4,
6117                is_total: false,
6118            },
6119            CashFlowItem {
6120                item_code: "CF-INV".to_string(),
6121                label: "Change in Inventory".to_string(),
6122                category: CashFlowCategory::Operating,
6123                amount: -inventory_change,
6124                amount_prior: None,
6125                sort_order: 5,
6126                is_total: false,
6127            },
6128            CashFlowItem {
6129                item_code: "CF-OP".to_string(),
6130                label: "Net Cash from Operating Activities".to_string(),
6131                category: CashFlowCategory::Operating,
6132                amount: operating_cf,
6133                amount_prior: None,
6134                sort_order: 6,
6135                is_total: true,
6136            },
6137            CashFlowItem {
6138                item_code: "CF-CAPEX".to_string(),
6139                label: "Capital Expenditures".to_string(),
6140                category: CashFlowCategory::Investing,
6141                amount: capex,
6142                amount_prior: None,
6143                sort_order: 7,
6144                is_total: false,
6145            },
6146            CashFlowItem {
6147                item_code: "CF-INV-T".to_string(),
6148                label: "Net Cash from Investing Activities".to_string(),
6149                category: CashFlowCategory::Investing,
6150                amount: investing_cf,
6151                amount_prior: None,
6152                sort_order: 8,
6153                is_total: true,
6154            },
6155            CashFlowItem {
6156                item_code: "CF-DEBT".to_string(),
6157                label: "Net Borrowings / (Repayments)".to_string(),
6158                category: CashFlowCategory::Financing,
6159                amount: debt_change,
6160                amount_prior: None,
6161                sort_order: 9,
6162                is_total: false,
6163            },
6164            CashFlowItem {
6165                item_code: "CF-EQ".to_string(),
6166                label: "Equity Changes".to_string(),
6167                category: CashFlowCategory::Financing,
6168                amount: equity_change,
6169                amount_prior: None,
6170                sort_order: 10,
6171                is_total: false,
6172            },
6173            CashFlowItem {
6174                item_code: "CF-FIN-T".to_string(),
6175                label: "Net Cash from Financing Activities".to_string(),
6176                category: CashFlowCategory::Financing,
6177                amount: financing_cf,
6178                amount_prior: None,
6179                sort_order: 11,
6180                is_total: true,
6181            },
6182            CashFlowItem {
6183                item_code: "CF-NET".to_string(),
6184                label: "Net Change in Cash".to_string(),
6185                category: CashFlowCategory::Operating,
6186                amount: net_change,
6187                amount_prior: None,
6188                sort_order: 12,
6189                is_total: true,
6190            },
6191        ]
6192    }
6193
6194    /// Calculate net income from a set of trial balance entries.
6195    ///
6196    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6197    fn calculate_net_income_from_tb(
6198        tb: &[datasynth_generators::TrialBalanceEntry],
6199    ) -> rust_decimal::Decimal {
6200        use rust_decimal::Decimal;
6201
6202        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6203        for entry in tb {
6204            let net = entry.debit_balance - entry.credit_balance;
6205            *aggregated.entry(entry.category.clone()).or_default() += net;
6206        }
6207
6208        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6209        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6210        let opex = *aggregated
6211            .get("OperatingExpenses")
6212            .unwrap_or(&Decimal::ZERO);
6213        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6214        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6215
6216        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6217        // other_income is typically negative (credit), other_expenses is typically positive
6218        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6219        let tax_rate = Decimal::new(25, 2); // 0.25
6220        let tax = operating_income * tax_rate;
6221        operating_income - tax
6222    }
6223
6224    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6225    ///
6226    /// Uses the first two digits of the account code to classify into the categories
6227    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6228    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6229    /// OperatingExpenses, OtherIncome, OtherExpenses.
6230    fn category_from_account_code(code: &str) -> String {
6231        let prefix: String = code.chars().take(2).collect();
6232        match prefix.as_str() {
6233            "10" => "Cash",
6234            "11" => "Receivables",
6235            "12" | "13" | "14" => "Inventory",
6236            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6237            "20" => "Payables",
6238            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6239            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6240            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6241            "40" | "41" | "42" | "43" | "44" => "Revenue",
6242            "50" | "51" | "52" => "CostOfSales",
6243            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6244                "OperatingExpenses"
6245            }
6246            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6247            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6248            _ => "OperatingExpenses",
6249        }
6250        .to_string()
6251    }
6252
6253    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6254    fn phase_hr_data(
6255        &mut self,
6256        stats: &mut EnhancedGenerationStatistics,
6257    ) -> SynthResult<HrSnapshot> {
6258        if !self.phase_config.generate_hr {
6259            debug!("Phase 16: Skipped (HR generation disabled)");
6260            return Ok(HrSnapshot::default());
6261        }
6262
6263        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6264
6265        let seed = self.seed;
6266        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6267            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6268        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6269        let company_code = self
6270            .config
6271            .companies
6272            .first()
6273            .map(|c| c.code.as_str())
6274            .unwrap_or("1000");
6275        let currency = self
6276            .config
6277            .companies
6278            .first()
6279            .map(|c| c.currency.as_str())
6280            .unwrap_or("USD");
6281
6282        let employee_ids: Vec<String> = self
6283            .master_data
6284            .employees
6285            .iter()
6286            .map(|e| e.employee_id.clone())
6287            .collect();
6288
6289        if employee_ids.is_empty() {
6290            debug!("Phase 16: Skipped (no employees available)");
6291            return Ok(HrSnapshot::default());
6292        }
6293
6294        // Extract cost-center pool from master data employees for cross-reference
6295        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6296        let cost_center_ids: Vec<String> = self
6297            .master_data
6298            .employees
6299            .iter()
6300            .filter_map(|e| e.cost_center.clone())
6301            .collect::<std::collections::HashSet<_>>()
6302            .into_iter()
6303            .collect();
6304
6305        let mut snapshot = HrSnapshot::default();
6306
6307        // Generate payroll runs (one per month)
6308        if self.config.hr.payroll.enabled {
6309            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6310                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6311
6312            // Look up country pack for payroll deductions and labels
6313            let payroll_pack = self.primary_pack();
6314
6315            // Store the pack on the generator so generate() resolves
6316            // localized deduction rates and labels from it.
6317            payroll_gen.set_country_pack(payroll_pack.clone());
6318
6319            let employees_with_salary: Vec<(
6320                String,
6321                rust_decimal::Decimal,
6322                Option<String>,
6323                Option<String>,
6324            )> = self
6325                .master_data
6326                .employees
6327                .iter()
6328                .map(|e| {
6329                    // Use the employee's actual annual base salary.
6330                    // Fall back to $60,000 / yr if somehow zero.
6331                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6332                        e.base_salary
6333                    } else {
6334                        rust_decimal::Decimal::from(60_000)
6335                    };
6336                    (
6337                        e.employee_id.clone(),
6338                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6339                        e.cost_center.clone(),
6340                        e.department_id.clone(),
6341                    )
6342                })
6343                .collect();
6344
6345            // Use generate_with_changes when employee change history is available
6346            // so that salary adjustments, transfers, etc. are reflected in payroll.
6347            let change_history = &self.master_data.employee_change_history;
6348            let has_changes = !change_history.is_empty();
6349            if has_changes {
6350                debug!(
6351                    "Payroll will incorporate {} employee change events",
6352                    change_history.len()
6353                );
6354            }
6355
6356            for month in 0..self.config.global.period_months {
6357                let period_start = start_date + chrono::Months::new(month);
6358                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6359                let (run, items) = if has_changes {
6360                    payroll_gen.generate_with_changes(
6361                        company_code,
6362                        &employees_with_salary,
6363                        period_start,
6364                        period_end,
6365                        currency,
6366                        change_history,
6367                    )
6368                } else {
6369                    payroll_gen.generate(
6370                        company_code,
6371                        &employees_with_salary,
6372                        period_start,
6373                        period_end,
6374                        currency,
6375                    )
6376                };
6377                snapshot.payroll_runs.push(run);
6378                snapshot.payroll_run_count += 1;
6379                snapshot.payroll_line_item_count += items.len();
6380                snapshot.payroll_line_items.extend(items);
6381            }
6382        }
6383
6384        // Generate time entries
6385        if self.config.hr.time_attendance.enabled {
6386            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6387                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6388            let entries = time_gen.generate(
6389                &employee_ids,
6390                start_date,
6391                end_date,
6392                &self.config.hr.time_attendance,
6393            );
6394            snapshot.time_entry_count = entries.len();
6395            snapshot.time_entries = entries;
6396        }
6397
6398        // Generate expense reports
6399        if self.config.hr.expenses.enabled {
6400            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6401                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6402            expense_gen.set_country_pack(self.primary_pack().clone());
6403            let company_currency = self
6404                .config
6405                .companies
6406                .first()
6407                .map(|c| c.currency.as_str())
6408                .unwrap_or("USD");
6409            let reports = expense_gen.generate_with_currency(
6410                &employee_ids,
6411                start_date,
6412                end_date,
6413                &self.config.hr.expenses,
6414                company_currency,
6415            );
6416            snapshot.expense_report_count = reports.len();
6417            snapshot.expense_reports = reports;
6418        }
6419
6420        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6421        if self.config.hr.payroll.enabled {
6422            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6423            let employee_pairs: Vec<(String, String)> = self
6424                .master_data
6425                .employees
6426                .iter()
6427                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6428                .collect();
6429            let enrollments =
6430                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6431            snapshot.benefit_enrollment_count = enrollments.len();
6432            snapshot.benefit_enrollments = enrollments;
6433        }
6434
6435        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6436        if self.phase_config.generate_hr {
6437            let entity_name = self
6438                .config
6439                .companies
6440                .first()
6441                .map(|c| c.name.as_str())
6442                .unwrap_or("Entity");
6443            let period_months = self.config.global.period_months;
6444            let period_label = {
6445                let y = start_date.year();
6446                let m = start_date.month();
6447                if period_months >= 12 {
6448                    format!("FY{y}")
6449                } else {
6450                    format!("{y}-{m:02}")
6451                }
6452            };
6453            let reporting_date =
6454                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6455
6456            // Compute average annual salary from actual payroll data when available.
6457            // PayrollRun.total_gross covers all employees for one pay period; we sum
6458            // across all runs and divide by employee_count to get per-employee total,
6459            // then annualise for sub-annual periods.
6460            let avg_salary: Option<rust_decimal::Decimal> = {
6461                let employee_count = employee_ids.len();
6462                if self.config.hr.payroll.enabled
6463                    && employee_count > 0
6464                    && !snapshot.payroll_runs.is_empty()
6465                {
6466                    // Sum total gross pay across all payroll runs for this company
6467                    let total_gross: rust_decimal::Decimal = snapshot
6468                        .payroll_runs
6469                        .iter()
6470                        .filter(|r| r.company_code == company_code)
6471                        .map(|r| r.total_gross)
6472                        .sum();
6473                    if total_gross > rust_decimal::Decimal::ZERO {
6474                        // Annualise: total_gross covers `period_months` months of pay
6475                        let annual_total = if period_months > 0 && period_months < 12 {
6476                            total_gross * rust_decimal::Decimal::from(12u32)
6477                                / rust_decimal::Decimal::from(period_months)
6478                        } else {
6479                            total_gross
6480                        };
6481                        Some(
6482                            (annual_total / rust_decimal::Decimal::from(employee_count))
6483                                .round_dp(2),
6484                        )
6485                    } else {
6486                        None
6487                    }
6488                } else {
6489                    None
6490                }
6491            };
6492
6493            let mut pension_gen =
6494                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6495            let pension_snap = pension_gen.generate(
6496                company_code,
6497                entity_name,
6498                &period_label,
6499                reporting_date,
6500                employee_ids.len(),
6501                currency,
6502                avg_salary,
6503                period_months,
6504            );
6505            snapshot.pension_plan_count = pension_snap.plans.len();
6506            snapshot.pension_plans = pension_snap.plans;
6507            snapshot.pension_obligations = pension_snap.obligations;
6508            snapshot.pension_plan_assets = pension_snap.plan_assets;
6509            snapshot.pension_disclosures = pension_snap.disclosures;
6510            // Pension JEs are returned here so they can be added to entries
6511            // in the caller (stored temporarily on snapshot for transfer).
6512            // We embed them in the hr snapshot for simplicity; the orchestrator
6513            // will extract and extend `entries`.
6514            snapshot.pension_journal_entries = pension_snap.journal_entries;
6515        }
6516
6517        // Generate stock-based compensation (ASC 718 / IFRS 2)
6518        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6519            let period_months = self.config.global.period_months;
6520            let period_label = {
6521                let y = start_date.year();
6522                let m = start_date.month();
6523                if period_months >= 12 {
6524                    format!("FY{y}")
6525                } else {
6526                    format!("{y}-{m:02}")
6527                }
6528            };
6529            let reporting_date =
6530                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6531
6532            let mut stock_comp_gen =
6533                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6534            let stock_snap = stock_comp_gen.generate(
6535                company_code,
6536                &employee_ids,
6537                start_date,
6538                &period_label,
6539                reporting_date,
6540                currency,
6541            );
6542            snapshot.stock_grant_count = stock_snap.grants.len();
6543            snapshot.stock_grants = stock_snap.grants;
6544            snapshot.stock_comp_expenses = stock_snap.expenses;
6545            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6546        }
6547
6548        stats.payroll_run_count = snapshot.payroll_run_count;
6549        stats.time_entry_count = snapshot.time_entry_count;
6550        stats.expense_report_count = snapshot.expense_report_count;
6551        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6552        stats.pension_plan_count = snapshot.pension_plan_count;
6553        stats.stock_grant_count = snapshot.stock_grant_count;
6554
6555        info!(
6556            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6557            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6558            snapshot.time_entry_count, snapshot.expense_report_count,
6559            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6560            snapshot.stock_grant_count
6561        );
6562        self.check_resources_with_log("post-hr")?;
6563
6564        Ok(snapshot)
6565    }
6566
6567    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6568    fn phase_accounting_standards(
6569        &mut self,
6570        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6571        journal_entries: &[JournalEntry],
6572        stats: &mut EnhancedGenerationStatistics,
6573    ) -> SynthResult<AccountingStandardsSnapshot> {
6574        if !self.phase_config.generate_accounting_standards {
6575            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6576            return Ok(AccountingStandardsSnapshot::default());
6577        }
6578        info!("Phase 17: Generating Accounting Standards Data");
6579
6580        let seed = self.seed;
6581        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6582            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6583        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6584        let company_code = self
6585            .config
6586            .companies
6587            .first()
6588            .map(|c| c.code.as_str())
6589            .unwrap_or("1000");
6590        let currency = self
6591            .config
6592            .companies
6593            .first()
6594            .map(|c| c.currency.as_str())
6595            .unwrap_or("USD");
6596
6597        // Convert config framework to standards framework.
6598        // If the user explicitly set a framework in the YAML config, use that.
6599        // Otherwise, fall back to the country pack's accounting.framework field,
6600        // and if that is also absent or unrecognised, default to US GAAP.
6601        let framework = match self.config.accounting_standards.framework {
6602            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6603                datasynth_standards::framework::AccountingFramework::UsGaap
6604            }
6605            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6606                datasynth_standards::framework::AccountingFramework::Ifrs
6607            }
6608            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6609                datasynth_standards::framework::AccountingFramework::DualReporting
6610            }
6611            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6612                datasynth_standards::framework::AccountingFramework::FrenchGaap
6613            }
6614            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6615                datasynth_standards::framework::AccountingFramework::GermanGaap
6616            }
6617            None => {
6618                // Derive framework from the primary company's country pack
6619                let pack = self.primary_pack();
6620                let pack_fw = pack.accounting.framework.as_str();
6621                match pack_fw {
6622                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6623                    "dual_reporting" => {
6624                        datasynth_standards::framework::AccountingFramework::DualReporting
6625                    }
6626                    "french_gaap" => {
6627                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6628                    }
6629                    "german_gaap" | "hgb" => {
6630                        datasynth_standards::framework::AccountingFramework::GermanGaap
6631                    }
6632                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6633                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6634                }
6635            }
6636        };
6637
6638        let mut snapshot = AccountingStandardsSnapshot::default();
6639
6640        // Revenue recognition
6641        if self.config.accounting_standards.revenue_recognition.enabled {
6642            let customer_ids: Vec<String> = self
6643                .master_data
6644                .customers
6645                .iter()
6646                .map(|c| c.customer_id.clone())
6647                .collect();
6648
6649            if !customer_ids.is_empty() {
6650                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6651                let contracts = rev_gen.generate(
6652                    company_code,
6653                    &customer_ids,
6654                    start_date,
6655                    end_date,
6656                    currency,
6657                    &self.config.accounting_standards.revenue_recognition,
6658                    framework,
6659                );
6660                snapshot.revenue_contract_count = contracts.len();
6661                snapshot.contracts = contracts;
6662            }
6663        }
6664
6665        // Impairment testing
6666        if self.config.accounting_standards.impairment.enabled {
6667            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6668                .master_data
6669                .assets
6670                .iter()
6671                .map(|a| {
6672                    (
6673                        a.asset_id.clone(),
6674                        a.description.clone(),
6675                        a.acquisition_cost,
6676                    )
6677                })
6678                .collect();
6679
6680            if !asset_data.is_empty() {
6681                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6682                let tests = imp_gen.generate(
6683                    company_code,
6684                    &asset_data,
6685                    end_date,
6686                    &self.config.accounting_standards.impairment,
6687                    framework,
6688                );
6689                snapshot.impairment_test_count = tests.len();
6690                snapshot.impairment_tests = tests;
6691            }
6692        }
6693
6694        // Business combinations (IFRS 3 / ASC 805)
6695        if self
6696            .config
6697            .accounting_standards
6698            .business_combinations
6699            .enabled
6700        {
6701            let bc_config = &self.config.accounting_standards.business_combinations;
6702            let framework_str = match framework {
6703                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6704                _ => "US_GAAP",
6705            };
6706            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6707            let bc_snap = bc_gen.generate(
6708                company_code,
6709                currency,
6710                start_date,
6711                end_date,
6712                bc_config.acquisition_count,
6713                framework_str,
6714            );
6715            snapshot.business_combination_count = bc_snap.combinations.len();
6716            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6717            snapshot.business_combinations = bc_snap.combinations;
6718        }
6719
6720        // Expected Credit Loss (IFRS 9 / ASC 326)
6721        if self
6722            .config
6723            .accounting_standards
6724            .expected_credit_loss
6725            .enabled
6726        {
6727            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6728            let framework_str = match framework {
6729                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6730                _ => "ASC_326",
6731            };
6732
6733            // Use AR aging data from the subledger snapshot if available;
6734            // otherwise generate synthetic bucket exposures.
6735            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6736
6737            let mut ecl_gen = EclGenerator::new(seed + 43);
6738
6739            // Collect combined bucket totals across all company AR aging reports.
6740            let bucket_exposures: Vec<(
6741                datasynth_core::models::subledger::ar::AgingBucket,
6742                rust_decimal::Decimal,
6743            )> = if ar_aging_reports.is_empty() {
6744                // No AR aging data — synthesise plausible bucket exposures.
6745                use datasynth_core::models::subledger::ar::AgingBucket;
6746                vec![
6747                    (
6748                        AgingBucket::Current,
6749                        rust_decimal::Decimal::from(500_000_u32),
6750                    ),
6751                    (
6752                        AgingBucket::Days1To30,
6753                        rust_decimal::Decimal::from(120_000_u32),
6754                    ),
6755                    (
6756                        AgingBucket::Days31To60,
6757                        rust_decimal::Decimal::from(45_000_u32),
6758                    ),
6759                    (
6760                        AgingBucket::Days61To90,
6761                        rust_decimal::Decimal::from(15_000_u32),
6762                    ),
6763                    (
6764                        AgingBucket::Over90Days,
6765                        rust_decimal::Decimal::from(8_000_u32),
6766                    ),
6767                ]
6768            } else {
6769                use datasynth_core::models::subledger::ar::AgingBucket;
6770                // Sum bucket totals from all reports.
6771                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6772                    std::collections::HashMap::new();
6773                for report in ar_aging_reports {
6774                    for (bucket, amount) in &report.bucket_totals {
6775                        *totals.entry(*bucket).or_default() += amount;
6776                    }
6777                }
6778                AgingBucket::all()
6779                    .into_iter()
6780                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6781                    .collect()
6782            };
6783
6784            let ecl_snap = ecl_gen.generate(
6785                company_code,
6786                end_date,
6787                &bucket_exposures,
6788                ecl_config,
6789                &period_label,
6790                framework_str,
6791            );
6792
6793            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6794            snapshot.ecl_models = ecl_snap.ecl_models;
6795            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6796            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6797        }
6798
6799        // Provisions and contingencies (IAS 37 / ASC 450)
6800        {
6801            let framework_str = match framework {
6802                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6803                _ => "US_GAAP",
6804            };
6805
6806            // Compute actual revenue from the journal entries generated so far.
6807            // The `journal_entries` slice passed to this phase contains all GL entries
6808            // up to and including Period Close. Fall back to a minimum of 100_000 to
6809            // avoid degenerate zero-based provision amounts on first-period datasets.
6810            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6811                .max(rust_decimal::Decimal::from(100_000_u32));
6812
6813            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6814
6815            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6816            let prov_snap = prov_gen.generate(
6817                company_code,
6818                currency,
6819                revenue_proxy,
6820                end_date,
6821                &period_label,
6822                framework_str,
6823                None, // prior_opening: no carry-forward data in single-period runs
6824            );
6825
6826            snapshot.provision_count = prov_snap.provisions.len();
6827            snapshot.provisions = prov_snap.provisions;
6828            snapshot.provision_movements = prov_snap.movements;
6829            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6830            snapshot.provision_journal_entries = prov_snap.journal_entries;
6831        }
6832
6833        // IAS 21 Functional Currency Translation
6834        // For each company whose functional currency differs from the presentation
6835        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6836        {
6837            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6838
6839            let presentation_currency = self
6840                .config
6841                .global
6842                .presentation_currency
6843                .clone()
6844                .unwrap_or_else(|| self.config.global.group_currency.clone());
6845
6846            // Build a minimal rate table populated with approximate rates from
6847            // the FX model base rates (USD-based) so we can do the translation.
6848            let mut rate_table = FxRateTable::new(&presentation_currency);
6849
6850            // Populate with base rates against USD; if presentation_currency is
6851            // not USD we do a best-effort two-step conversion using the table's
6852            // triangulation support.
6853            let base_rates = base_rates_usd();
6854            for (ccy, rate) in &base_rates {
6855                rate_table.add_rate(FxRate::new(
6856                    ccy,
6857                    "USD",
6858                    RateType::Closing,
6859                    end_date,
6860                    *rate,
6861                    "SYNTHETIC",
6862                ));
6863                // Average rate = 98% of closing (approximation).
6864                // 0.98 = 98/100 = Decimal::new(98, 2)
6865                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6866                rate_table.add_rate(FxRate::new(
6867                    ccy,
6868                    "USD",
6869                    RateType::Average,
6870                    end_date,
6871                    avg,
6872                    "SYNTHETIC",
6873                ));
6874            }
6875
6876            let mut translation_results = Vec::new();
6877            for company in &self.config.companies {
6878                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6879                // to ensure the translation produces non-trivial CTA amounts.
6880                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6881                    .max(rust_decimal::Decimal::from(100_000_u32));
6882
6883                let func_ccy = company
6884                    .functional_currency
6885                    .clone()
6886                    .unwrap_or_else(|| company.currency.clone());
6887
6888                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6889                    &company.code,
6890                    &func_ccy,
6891                    &presentation_currency,
6892                    &ias21_period_label,
6893                    end_date,
6894                    company_revenue,
6895                    &rate_table,
6896                );
6897                translation_results.push(result);
6898            }
6899
6900            snapshot.currency_translation_count = translation_results.len();
6901            snapshot.currency_translation_results = translation_results;
6902        }
6903
6904        stats.revenue_contract_count = snapshot.revenue_contract_count;
6905        stats.impairment_test_count = snapshot.impairment_test_count;
6906        stats.business_combination_count = snapshot.business_combination_count;
6907        stats.ecl_model_count = snapshot.ecl_model_count;
6908        stats.provision_count = snapshot.provision_count;
6909
6910        info!(
6911            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6912            snapshot.revenue_contract_count,
6913            snapshot.impairment_test_count,
6914            snapshot.business_combination_count,
6915            snapshot.ecl_model_count,
6916            snapshot.provision_count,
6917            snapshot.currency_translation_count
6918        );
6919        self.check_resources_with_log("post-accounting-standards")?;
6920
6921        Ok(snapshot)
6922    }
6923
6924    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6925    fn phase_manufacturing(
6926        &mut self,
6927        stats: &mut EnhancedGenerationStatistics,
6928    ) -> SynthResult<ManufacturingSnapshot> {
6929        if !self.phase_config.generate_manufacturing {
6930            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6931            return Ok(ManufacturingSnapshot::default());
6932        }
6933        info!("Phase 18: Generating Manufacturing Data");
6934
6935        let seed = self.seed;
6936        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6937            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6938        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6939        let company_code = self
6940            .config
6941            .companies
6942            .first()
6943            .map(|c| c.code.as_str())
6944            .unwrap_or("1000");
6945
6946        let material_data: Vec<(String, String)> = self
6947            .master_data
6948            .materials
6949            .iter()
6950            .map(|m| (m.material_id.clone(), m.description.clone()))
6951            .collect();
6952
6953        if material_data.is_empty() {
6954            debug!("Phase 18: Skipped (no materials available)");
6955            return Ok(ManufacturingSnapshot::default());
6956        }
6957
6958        let mut snapshot = ManufacturingSnapshot::default();
6959
6960        // Generate production orders
6961        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6962        let production_orders = prod_gen.generate(
6963            company_code,
6964            &material_data,
6965            start_date,
6966            end_date,
6967            &self.config.manufacturing.production_orders,
6968            &self.config.manufacturing.costing,
6969            &self.config.manufacturing.routing,
6970        );
6971        snapshot.production_order_count = production_orders.len();
6972
6973        // Generate quality inspections from production orders
6974        let inspection_data: Vec<(String, String, String)> = production_orders
6975            .iter()
6976            .map(|po| {
6977                (
6978                    po.order_id.clone(),
6979                    po.material_id.clone(),
6980                    po.material_description.clone(),
6981                )
6982            })
6983            .collect();
6984
6985        snapshot.production_orders = production_orders;
6986
6987        if !inspection_data.is_empty() {
6988            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6989            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6990            snapshot.quality_inspection_count = inspections.len();
6991            snapshot.quality_inspections = inspections;
6992        }
6993
6994        // Generate cycle counts (one per month)
6995        let storage_locations: Vec<(String, String)> = material_data
6996            .iter()
6997            .enumerate()
6998            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6999            .collect();
7000
7001        let employee_ids: Vec<String> = self
7002            .master_data
7003            .employees
7004            .iter()
7005            .map(|e| e.employee_id.clone())
7006            .collect();
7007        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7008            .with_employee_pool(employee_ids);
7009        let mut cycle_count_total = 0usize;
7010        for month in 0..self.config.global.period_months {
7011            let count_date = start_date + chrono::Months::new(month);
7012            let items_per_count = storage_locations.len().clamp(10, 50);
7013            let cc = cc_gen.generate(
7014                company_code,
7015                &storage_locations,
7016                count_date,
7017                items_per_count,
7018            );
7019            snapshot.cycle_counts.push(cc);
7020            cycle_count_total += 1;
7021        }
7022        snapshot.cycle_count_count = cycle_count_total;
7023
7024        // Generate BOM components
7025        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7026        let bom_components = bom_gen.generate(company_code, &material_data);
7027        snapshot.bom_component_count = bom_components.len();
7028        snapshot.bom_components = bom_components;
7029
7030        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7031        let currency = self
7032            .config
7033            .companies
7034            .first()
7035            .map(|c| c.currency.as_str())
7036            .unwrap_or("USD");
7037        let production_order_ids: Vec<String> = snapshot
7038            .production_orders
7039            .iter()
7040            .map(|po| po.order_id.clone())
7041            .collect();
7042        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7043        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7044            company_code,
7045            &material_data,
7046            start_date,
7047            end_date,
7048            2,
7049            currency,
7050            &production_order_ids,
7051        );
7052        snapshot.inventory_movement_count = inventory_movements.len();
7053        snapshot.inventory_movements = inventory_movements;
7054
7055        stats.production_order_count = snapshot.production_order_count;
7056        stats.quality_inspection_count = snapshot.quality_inspection_count;
7057        stats.cycle_count_count = snapshot.cycle_count_count;
7058        stats.bom_component_count = snapshot.bom_component_count;
7059        stats.inventory_movement_count = snapshot.inventory_movement_count;
7060
7061        info!(
7062            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7063            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7064            snapshot.bom_component_count, snapshot.inventory_movement_count
7065        );
7066        self.check_resources_with_log("post-manufacturing")?;
7067
7068        Ok(snapshot)
7069    }
7070
7071    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7072    fn phase_sales_kpi_budgets(
7073        &mut self,
7074        coa: &Arc<ChartOfAccounts>,
7075        financial_reporting: &FinancialReportingSnapshot,
7076        stats: &mut EnhancedGenerationStatistics,
7077    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7078        if !self.phase_config.generate_sales_kpi_budgets {
7079            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7080            return Ok(SalesKpiBudgetsSnapshot::default());
7081        }
7082        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7083
7084        let seed = self.seed;
7085        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7086            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7087        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7088        let company_code = self
7089            .config
7090            .companies
7091            .first()
7092            .map(|c| c.code.as_str())
7093            .unwrap_or("1000");
7094
7095        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7096
7097        // Sales Quotes
7098        if self.config.sales_quotes.enabled {
7099            let customer_data: Vec<(String, String)> = self
7100                .master_data
7101                .customers
7102                .iter()
7103                .map(|c| (c.customer_id.clone(), c.name.clone()))
7104                .collect();
7105            let material_data: Vec<(String, String)> = self
7106                .master_data
7107                .materials
7108                .iter()
7109                .map(|m| (m.material_id.clone(), m.description.clone()))
7110                .collect();
7111
7112            if !customer_data.is_empty() && !material_data.is_empty() {
7113                let employee_ids: Vec<String> = self
7114                    .master_data
7115                    .employees
7116                    .iter()
7117                    .map(|e| e.employee_id.clone())
7118                    .collect();
7119                let customer_ids: Vec<String> = self
7120                    .master_data
7121                    .customers
7122                    .iter()
7123                    .map(|c| c.customer_id.clone())
7124                    .collect();
7125                let company_currency = self
7126                    .config
7127                    .companies
7128                    .first()
7129                    .map(|c| c.currency.as_str())
7130                    .unwrap_or("USD");
7131
7132                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7133                    .with_pools(employee_ids, customer_ids);
7134                let quotes = quote_gen.generate_with_currency(
7135                    company_code,
7136                    &customer_data,
7137                    &material_data,
7138                    start_date,
7139                    end_date,
7140                    &self.config.sales_quotes,
7141                    company_currency,
7142                );
7143                snapshot.sales_quote_count = quotes.len();
7144                snapshot.sales_quotes = quotes;
7145            }
7146        }
7147
7148        // Management KPIs
7149        if self.config.financial_reporting.management_kpis.enabled {
7150            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7151            let mut kpis = kpi_gen.generate(
7152                company_code,
7153                start_date,
7154                end_date,
7155                &self.config.financial_reporting.management_kpis,
7156            );
7157
7158            // Override financial KPIs with actual data from financial statements
7159            {
7160                use rust_decimal::Decimal;
7161
7162                if let Some(income_stmt) =
7163                    financial_reporting.financial_statements.iter().find(|fs| {
7164                        fs.statement_type == StatementType::IncomeStatement
7165                            && fs.company_code == company_code
7166                    })
7167                {
7168                    // Extract revenue and COGS from income statement line items
7169                    let total_revenue: Decimal = income_stmt
7170                        .line_items
7171                        .iter()
7172                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7173                        .map(|li| li.amount)
7174                        .sum();
7175                    let total_cogs: Decimal = income_stmt
7176                        .line_items
7177                        .iter()
7178                        .filter(|li| {
7179                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7180                                && !li.is_total
7181                        })
7182                        .map(|li| li.amount.abs())
7183                        .sum();
7184                    let total_opex: Decimal = income_stmt
7185                        .line_items
7186                        .iter()
7187                        .filter(|li| {
7188                            li.section.contains("Expense")
7189                                && !li.is_total
7190                                && !li.section.contains("Cost")
7191                        })
7192                        .map(|li| li.amount.abs())
7193                        .sum();
7194
7195                    if total_revenue > Decimal::ZERO {
7196                        let hundred = Decimal::from(100);
7197                        let gross_margin_pct =
7198                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7199                        let operating_income = total_revenue - total_cogs - total_opex;
7200                        let op_margin_pct =
7201                            (operating_income * hundred / total_revenue).round_dp(2);
7202
7203                        // Override gross margin and operating margin KPIs
7204                        for kpi in &mut kpis {
7205                            if kpi.name == "Gross Margin" {
7206                                kpi.value = gross_margin_pct;
7207                            } else if kpi.name == "Operating Margin" {
7208                                kpi.value = op_margin_pct;
7209                            }
7210                        }
7211                    }
7212                }
7213
7214                // Override Current Ratio from balance sheet
7215                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7216                    fs.statement_type == StatementType::BalanceSheet
7217                        && fs.company_code == company_code
7218                }) {
7219                    let current_assets: Decimal = bs
7220                        .line_items
7221                        .iter()
7222                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7223                        .map(|li| li.amount)
7224                        .sum();
7225                    let current_liabilities: Decimal = bs
7226                        .line_items
7227                        .iter()
7228                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7229                        .map(|li| li.amount.abs())
7230                        .sum();
7231
7232                    if current_liabilities > Decimal::ZERO {
7233                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7234                        for kpi in &mut kpis {
7235                            if kpi.name == "Current Ratio" {
7236                                kpi.value = current_ratio;
7237                            }
7238                        }
7239                    }
7240                }
7241            }
7242
7243            snapshot.kpi_count = kpis.len();
7244            snapshot.kpis = kpis;
7245        }
7246
7247        // Budgets
7248        if self.config.financial_reporting.budgets.enabled {
7249            let account_data: Vec<(String, String)> = coa
7250                .accounts
7251                .iter()
7252                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7253                .collect();
7254
7255            if !account_data.is_empty() {
7256                let fiscal_year = start_date.year() as u32;
7257                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7258                let budget = budget_gen.generate(
7259                    company_code,
7260                    fiscal_year,
7261                    &account_data,
7262                    &self.config.financial_reporting.budgets,
7263                );
7264                snapshot.budget_line_count = budget.line_items.len();
7265                snapshot.budgets.push(budget);
7266            }
7267        }
7268
7269        stats.sales_quote_count = snapshot.sales_quote_count;
7270        stats.kpi_count = snapshot.kpi_count;
7271        stats.budget_line_count = snapshot.budget_line_count;
7272
7273        info!(
7274            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7275            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7276        );
7277        self.check_resources_with_log("post-sales-kpi-budgets")?;
7278
7279        Ok(snapshot)
7280    }
7281
7282    /// Compute pre-tax income for a single company from actual journal entries.
7283    ///
7284    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7285    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7286    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7287    /// and the period-close engine so that all three use a consistent definition.
7288    fn compute_pre_tax_income(
7289        company_code: &str,
7290        journal_entries: &[JournalEntry],
7291    ) -> rust_decimal::Decimal {
7292        use datasynth_core::accounts::AccountCategory;
7293        use rust_decimal::Decimal;
7294
7295        let mut total_revenue = Decimal::ZERO;
7296        let mut total_expenses = Decimal::ZERO;
7297
7298        for je in journal_entries {
7299            if je.header.company_code != company_code {
7300                continue;
7301            }
7302            for line in &je.lines {
7303                let cat = AccountCategory::from_account(&line.gl_account);
7304                match cat {
7305                    AccountCategory::Revenue => {
7306                        total_revenue += line.credit_amount - line.debit_amount;
7307                    }
7308                    AccountCategory::Cogs
7309                    | AccountCategory::OperatingExpense
7310                    | AccountCategory::OtherIncomeExpense => {
7311                        total_expenses += line.debit_amount - line.credit_amount;
7312                    }
7313                    _ => {}
7314                }
7315            }
7316        }
7317
7318        let pti = (total_revenue - total_expenses).round_dp(2);
7319        if pti == rust_decimal::Decimal::ZERO {
7320            // No income statement activity yet — fall back to a synthetic value so the
7321            // tax provision generator can still produce meaningful output.
7322            rust_decimal::Decimal::from(1_000_000u32)
7323        } else {
7324            pti
7325        }
7326    }
7327
7328    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7329    fn phase_tax_generation(
7330        &mut self,
7331        document_flows: &DocumentFlowSnapshot,
7332        journal_entries: &[JournalEntry],
7333        stats: &mut EnhancedGenerationStatistics,
7334    ) -> SynthResult<TaxSnapshot> {
7335        if !self.phase_config.generate_tax {
7336            debug!("Phase 20: Skipped (tax generation disabled)");
7337            return Ok(TaxSnapshot::default());
7338        }
7339        info!("Phase 20: Generating Tax Data");
7340
7341        let seed = self.seed;
7342        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7343            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7344        let fiscal_year = start_date.year();
7345        let company_code = self
7346            .config
7347            .companies
7348            .first()
7349            .map(|c| c.code.as_str())
7350            .unwrap_or("1000");
7351
7352        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7353            seed + 370,
7354            self.config.tax.clone(),
7355        );
7356
7357        let pack = self.primary_pack().clone();
7358        let (jurisdictions, codes) =
7359            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7360
7361        // Generate tax provisions for each company
7362        let mut provisions = Vec::new();
7363        if self.config.tax.provisions.enabled {
7364            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7365            for company in &self.config.companies {
7366                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7367                let statutory_rate = rust_decimal::Decimal::new(
7368                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7369                    2,
7370                );
7371                let provision = provision_gen.generate(
7372                    &company.code,
7373                    start_date,
7374                    pre_tax_income,
7375                    statutory_rate,
7376                );
7377                provisions.push(provision);
7378            }
7379        }
7380
7381        // Generate tax lines from document invoices
7382        let mut tax_lines = Vec::new();
7383        if !codes.is_empty() {
7384            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7385                datasynth_generators::TaxLineGeneratorConfig::default(),
7386                codes.clone(),
7387                seed + 372,
7388            );
7389
7390            // Tax lines from vendor invoices (input tax)
7391            // Use the first company's country as buyer country
7392            let buyer_country = self
7393                .config
7394                .companies
7395                .first()
7396                .map(|c| c.country.as_str())
7397                .unwrap_or("US");
7398            for vi in &document_flows.vendor_invoices {
7399                let lines = tax_line_gen.generate_for_document(
7400                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7401                    &vi.header.document_id,
7402                    buyer_country, // seller approx same country
7403                    buyer_country,
7404                    vi.payable_amount,
7405                    vi.header.document_date,
7406                    None,
7407                );
7408                tax_lines.extend(lines);
7409            }
7410
7411            // Tax lines from customer invoices (output tax)
7412            for ci in &document_flows.customer_invoices {
7413                let lines = tax_line_gen.generate_for_document(
7414                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7415                    &ci.header.document_id,
7416                    buyer_country, // seller is the company
7417                    buyer_country,
7418                    ci.total_gross_amount,
7419                    ci.header.document_date,
7420                    None,
7421                );
7422                tax_lines.extend(lines);
7423            }
7424        }
7425
7426        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7427        let deferred_tax = {
7428            let companies: Vec<(&str, &str)> = self
7429                .config
7430                .companies
7431                .iter()
7432                .map(|c| (c.code.as_str(), c.country.as_str()))
7433                .collect();
7434            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7435            deferred_gen.generate(&companies, start_date, journal_entries)
7436        };
7437
7438        // Build a document_id → posting_date map so each tax JE uses its
7439        // source document's date rather than a blanket period-end date.
7440        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7441            std::collections::HashMap::new();
7442        for vi in &document_flows.vendor_invoices {
7443            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7444        }
7445        for ci in &document_flows.customer_invoices {
7446            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7447        }
7448
7449        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7450        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7451        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7452            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7453                &tax_lines,
7454                company_code,
7455                &doc_dates,
7456                end_date,
7457            );
7458            debug!("Generated {} tax posting JEs", jes.len());
7459            jes
7460        } else {
7461            Vec::new()
7462        };
7463
7464        let snapshot = TaxSnapshot {
7465            jurisdiction_count: jurisdictions.len(),
7466            code_count: codes.len(),
7467            jurisdictions,
7468            codes,
7469            tax_provisions: provisions,
7470            tax_lines,
7471            tax_returns: Vec::new(),
7472            withholding_records: Vec::new(),
7473            tax_anomaly_labels: Vec::new(),
7474            deferred_tax,
7475            tax_posting_journal_entries,
7476        };
7477
7478        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7479        stats.tax_code_count = snapshot.code_count;
7480        stats.tax_provision_count = snapshot.tax_provisions.len();
7481        stats.tax_line_count = snapshot.tax_lines.len();
7482
7483        info!(
7484            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7485            snapshot.jurisdiction_count,
7486            snapshot.code_count,
7487            snapshot.tax_provisions.len(),
7488            snapshot.deferred_tax.temporary_differences.len(),
7489            snapshot.deferred_tax.journal_entries.len(),
7490            snapshot.tax_posting_journal_entries.len(),
7491        );
7492        self.check_resources_with_log("post-tax")?;
7493
7494        Ok(snapshot)
7495    }
7496
7497    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7498    fn phase_esg_generation(
7499        &mut self,
7500        document_flows: &DocumentFlowSnapshot,
7501        manufacturing: &ManufacturingSnapshot,
7502        stats: &mut EnhancedGenerationStatistics,
7503    ) -> SynthResult<EsgSnapshot> {
7504        if !self.phase_config.generate_esg {
7505            debug!("Phase 21: Skipped (ESG generation disabled)");
7506            return Ok(EsgSnapshot::default());
7507        }
7508        let degradation = self.check_resources()?;
7509        if degradation >= DegradationLevel::Reduced {
7510            debug!(
7511                "Phase skipped due to resource pressure (degradation: {:?})",
7512                degradation
7513            );
7514            return Ok(EsgSnapshot::default());
7515        }
7516        info!("Phase 21: Generating ESG Data");
7517
7518        let seed = self.seed;
7519        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7520            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7521        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7522        let entity_id = self
7523            .config
7524            .companies
7525            .first()
7526            .map(|c| c.code.as_str())
7527            .unwrap_or("1000");
7528
7529        let esg_cfg = &self.config.esg;
7530        let mut snapshot = EsgSnapshot::default();
7531
7532        // Energy consumption (feeds into scope 1 & 2 emissions)
7533        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7534            esg_cfg.environmental.energy.clone(),
7535            seed + 80,
7536        );
7537        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7538
7539        // Water usage
7540        let facility_count = esg_cfg.environmental.energy.facility_count;
7541        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7542        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7543
7544        // Waste
7545        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7546            seed + 82,
7547            esg_cfg.environmental.waste.diversion_target,
7548            facility_count,
7549        );
7550        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7551
7552        // Emissions (scope 1, 2, 3)
7553        let mut emission_gen =
7554            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7555
7556        // Build EnergyInput from energy_records
7557        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7558            .iter()
7559            .map(|e| datasynth_generators::EnergyInput {
7560                facility_id: e.facility_id.clone(),
7561                energy_type: match e.energy_source {
7562                    EnergySourceType::NaturalGas => {
7563                        datasynth_generators::EnergyInputType::NaturalGas
7564                    }
7565                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7566                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7567                    _ => datasynth_generators::EnergyInputType::Electricity,
7568                },
7569                consumption_kwh: e.consumption_kwh,
7570                period: e.period,
7571            })
7572            .collect();
7573
7574        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7575        if !manufacturing.production_orders.is_empty() {
7576            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7577                &manufacturing.production_orders,
7578                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7579                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7580            );
7581            if !mfg_energy.is_empty() {
7582                info!(
7583                    "ESG: {} energy inputs derived from {} production orders",
7584                    mfg_energy.len(),
7585                    manufacturing.production_orders.len(),
7586                );
7587                energy_inputs.extend(mfg_energy);
7588            }
7589        }
7590
7591        let mut emissions = Vec::new();
7592        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7593        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7594
7595        // Scope 3: use vendor spend data from actual payments
7596        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7597            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7598            for payment in &document_flows.payments {
7599                if payment.is_vendor {
7600                    *totals
7601                        .entry(payment.business_partner_id.clone())
7602                        .or_default() += payment.amount;
7603                }
7604            }
7605            totals
7606        };
7607        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7608            .master_data
7609            .vendors
7610            .iter()
7611            .map(|v| {
7612                let spend = vendor_payment_totals
7613                    .get(&v.vendor_id)
7614                    .copied()
7615                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7616                datasynth_generators::VendorSpendInput {
7617                    vendor_id: v.vendor_id.clone(),
7618                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7619                    spend,
7620                    country: v.country.clone(),
7621                }
7622            })
7623            .collect();
7624        if !vendor_spend.is_empty() {
7625            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7626                entity_id,
7627                &vendor_spend,
7628                start_date,
7629                end_date,
7630            ));
7631        }
7632
7633        // Business travel & commuting (scope 3)
7634        let headcount = self.master_data.employees.len() as u32;
7635        if headcount > 0 {
7636            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7637            emissions.extend(emission_gen.generate_scope3_business_travel(
7638                entity_id,
7639                travel_spend,
7640                start_date,
7641            ));
7642            emissions
7643                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7644        }
7645
7646        snapshot.emission_count = emissions.len();
7647        snapshot.emissions = emissions;
7648        snapshot.energy = energy_records;
7649
7650        // Social: Workforce diversity, pay equity, safety
7651        let mut workforce_gen =
7652            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7653        let total_headcount = headcount.max(100);
7654        snapshot.diversity =
7655            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7656        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7657
7658        // v2.4: Derive additional workforce diversity metrics from actual employee data
7659        if !self.master_data.employees.is_empty() {
7660            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7661                entity_id,
7662                &self.master_data.employees,
7663                end_date,
7664            );
7665            if !hr_diversity.is_empty() {
7666                info!(
7667                    "ESG: {} diversity metrics derived from {} actual employees",
7668                    hr_diversity.len(),
7669                    self.master_data.employees.len(),
7670                );
7671                snapshot.diversity.extend(hr_diversity);
7672            }
7673        }
7674
7675        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7676            entity_id,
7677            facility_count,
7678            start_date,
7679            end_date,
7680        );
7681
7682        // Compute safety metrics
7683        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7684        let safety_metric = workforce_gen.compute_safety_metrics(
7685            entity_id,
7686            &snapshot.safety_incidents,
7687            total_hours,
7688            start_date,
7689        );
7690        snapshot.safety_metrics = vec![safety_metric];
7691
7692        // Governance
7693        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7694            seed + 85,
7695            esg_cfg.governance.board_size,
7696            esg_cfg.governance.independence_target,
7697        );
7698        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7699
7700        // Supplier ESG assessments
7701        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7702            esg_cfg.supply_chain_esg.clone(),
7703            seed + 86,
7704        );
7705        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7706            .master_data
7707            .vendors
7708            .iter()
7709            .map(|v| datasynth_generators::VendorInput {
7710                vendor_id: v.vendor_id.clone(),
7711                country: v.country.clone(),
7712                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7713                quality_score: None,
7714            })
7715            .collect();
7716        snapshot.supplier_assessments =
7717            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7718
7719        // Disclosures
7720        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7721            seed + 87,
7722            esg_cfg.reporting.clone(),
7723            esg_cfg.climate_scenarios.clone(),
7724        );
7725        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7726        snapshot.disclosures = disclosure_gen.generate_disclosures(
7727            entity_id,
7728            &snapshot.materiality,
7729            start_date,
7730            end_date,
7731        );
7732        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7733        snapshot.disclosure_count = snapshot.disclosures.len();
7734
7735        // Anomaly injection
7736        if esg_cfg.anomaly_rate > 0.0 {
7737            let mut anomaly_injector =
7738                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7739            let mut labels = Vec::new();
7740            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7741            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7742            labels.extend(
7743                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7744            );
7745            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7746            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7747            snapshot.anomaly_labels = labels;
7748        }
7749
7750        stats.esg_emission_count = snapshot.emission_count;
7751        stats.esg_disclosure_count = snapshot.disclosure_count;
7752
7753        info!(
7754            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7755            snapshot.emission_count,
7756            snapshot.disclosure_count,
7757            snapshot.supplier_assessments.len()
7758        );
7759        self.check_resources_with_log("post-esg")?;
7760
7761        Ok(snapshot)
7762    }
7763
7764    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7765    fn phase_treasury_data(
7766        &mut self,
7767        document_flows: &DocumentFlowSnapshot,
7768        subledger: &SubledgerSnapshot,
7769        intercompany: &IntercompanySnapshot,
7770        stats: &mut EnhancedGenerationStatistics,
7771    ) -> SynthResult<TreasurySnapshot> {
7772        if !self.phase_config.generate_treasury {
7773            debug!("Phase 22: Skipped (treasury generation disabled)");
7774            return Ok(TreasurySnapshot::default());
7775        }
7776        let degradation = self.check_resources()?;
7777        if degradation >= DegradationLevel::Reduced {
7778            debug!(
7779                "Phase skipped due to resource pressure (degradation: {:?})",
7780                degradation
7781            );
7782            return Ok(TreasurySnapshot::default());
7783        }
7784        info!("Phase 22: Generating Treasury Data");
7785
7786        let seed = self.seed;
7787        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7788            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7789        let currency = self
7790            .config
7791            .companies
7792            .first()
7793            .map(|c| c.currency.as_str())
7794            .unwrap_or("USD");
7795        let entity_id = self
7796            .config
7797            .companies
7798            .first()
7799            .map(|c| c.code.as_str())
7800            .unwrap_or("1000");
7801
7802        let mut snapshot = TreasurySnapshot::default();
7803
7804        // Generate debt instruments
7805        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7806            self.config.treasury.debt.clone(),
7807            seed + 90,
7808        );
7809        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7810
7811        // Generate hedging instruments (IR swaps for floating-rate debt)
7812        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7813            self.config.treasury.hedging.clone(),
7814            seed + 91,
7815        );
7816        for debt in &snapshot.debt_instruments {
7817            if debt.rate_type == InterestRateType::Variable {
7818                let swap = hedge_gen.generate_ir_swap(
7819                    currency,
7820                    debt.principal,
7821                    debt.origination_date,
7822                    debt.maturity_date,
7823                );
7824                snapshot.hedging_instruments.push(swap);
7825            }
7826        }
7827
7828        // Build FX exposures from foreign-currency payments and generate
7829        // FX forwards + hedge relationship designations via generate() API.
7830        {
7831            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7832            for payment in &document_flows.payments {
7833                if payment.currency != currency {
7834                    let entry = fx_map
7835                        .entry(payment.currency.clone())
7836                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7837                    entry.0 += payment.amount;
7838                    // Use the latest settlement date among grouped payments
7839                    if payment.header.document_date > entry.1 {
7840                        entry.1 = payment.header.document_date;
7841                    }
7842                }
7843            }
7844            if !fx_map.is_empty() {
7845                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7846                    .into_iter()
7847                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7848                        datasynth_generators::treasury::FxExposure {
7849                            currency_pair: format!("{foreign_ccy}/{currency}"),
7850                            foreign_currency: foreign_ccy,
7851                            net_amount,
7852                            settlement_date,
7853                            description: "AP payment FX exposure".to_string(),
7854                        }
7855                    })
7856                    .collect();
7857                let (fx_instruments, fx_relationships) =
7858                    hedge_gen.generate(start_date, &fx_exposures);
7859                snapshot.hedging_instruments.extend(fx_instruments);
7860                snapshot.hedge_relationships.extend(fx_relationships);
7861            }
7862        }
7863
7864        // Inject anomalies if configured
7865        if self.config.treasury.anomaly_rate > 0.0 {
7866            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7867                seed + 92,
7868                self.config.treasury.anomaly_rate,
7869            );
7870            let mut labels = Vec::new();
7871            labels.extend(
7872                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7873            );
7874            snapshot.treasury_anomaly_labels = labels;
7875        }
7876
7877        // Generate cash positions from payment flows
7878        if self.config.treasury.cash_positioning.enabled {
7879            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7880
7881            // AP payments as outflows
7882            for payment in &document_flows.payments {
7883                cash_flows.push(datasynth_generators::treasury::CashFlow {
7884                    date: payment.header.document_date,
7885                    account_id: format!("{entity_id}-MAIN"),
7886                    amount: payment.amount,
7887                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7888                });
7889            }
7890
7891            // Customer receipts (from O2C chains) as inflows
7892            for chain in &document_flows.o2c_chains {
7893                if let Some(ref receipt) = chain.customer_receipt {
7894                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7895                        date: receipt.header.document_date,
7896                        account_id: format!("{entity_id}-MAIN"),
7897                        amount: receipt.amount,
7898                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7899                    });
7900                }
7901                // Remainder receipts (follow-up to partial payments)
7902                for receipt in &chain.remainder_receipts {
7903                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7904                        date: receipt.header.document_date,
7905                        account_id: format!("{entity_id}-MAIN"),
7906                        amount: receipt.amount,
7907                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7908                    });
7909                }
7910            }
7911
7912            if !cash_flows.is_empty() {
7913                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7914                    self.config.treasury.cash_positioning.clone(),
7915                    seed + 93,
7916                );
7917                let account_id = format!("{entity_id}-MAIN");
7918                snapshot.cash_positions = cash_gen.generate(
7919                    entity_id,
7920                    &account_id,
7921                    currency,
7922                    &cash_flows,
7923                    start_date,
7924                    start_date + chrono::Months::new(self.config.global.period_months),
7925                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7926                );
7927            }
7928        }
7929
7930        // Generate cash forecasts from AR/AP aging
7931        if self.config.treasury.cash_forecasting.enabled {
7932            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7933
7934            // Build AR aging items from subledger AR invoices
7935            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7936                .ar_invoices
7937                .iter()
7938                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7939                .map(|inv| {
7940                    let days_past_due = if inv.due_date < end_date {
7941                        (end_date - inv.due_date).num_days().max(0) as u32
7942                    } else {
7943                        0
7944                    };
7945                    datasynth_generators::treasury::ArAgingItem {
7946                        expected_date: inv.due_date,
7947                        amount: inv.amount_remaining,
7948                        days_past_due,
7949                        document_id: inv.invoice_number.clone(),
7950                    }
7951                })
7952                .collect();
7953
7954            // Build AP aging items from subledger AP invoices
7955            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7956                .ap_invoices
7957                .iter()
7958                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7959                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7960                    payment_date: inv.due_date,
7961                    amount: inv.amount_remaining,
7962                    document_id: inv.invoice_number.clone(),
7963                })
7964                .collect();
7965
7966            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7967                self.config.treasury.cash_forecasting.clone(),
7968                seed + 94,
7969            );
7970            let forecast = forecast_gen.generate(
7971                entity_id,
7972                currency,
7973                end_date,
7974                &ar_items,
7975                &ap_items,
7976                &[], // scheduled disbursements - empty for now
7977            );
7978            snapshot.cash_forecasts.push(forecast);
7979        }
7980
7981        // Generate cash pools and sweeps
7982        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7983            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7984            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7985                self.config.treasury.cash_pooling.clone(),
7986                seed + 95,
7987            );
7988
7989            // Create a pool from available accounts
7990            let account_ids: Vec<String> = snapshot
7991                .cash_positions
7992                .iter()
7993                .map(|cp| cp.bank_account_id.clone())
7994                .collect::<std::collections::HashSet<_>>()
7995                .into_iter()
7996                .collect();
7997
7998            if let Some(pool) =
7999                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8000            {
8001                // Generate sweeps - build participant balances from last cash position per account
8002                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8003                for cp in &snapshot.cash_positions {
8004                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8005                }
8006
8007                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8008                    latest_balances
8009                        .into_iter()
8010                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8011                        .map(
8012                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8013                                account_id: id,
8014                                balance,
8015                            },
8016                        )
8017                        .collect();
8018
8019                let sweeps =
8020                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8021                snapshot.cash_pool_sweeps = sweeps;
8022                snapshot.cash_pools.push(pool);
8023            }
8024        }
8025
8026        // Generate bank guarantees
8027        if self.config.treasury.bank_guarantees.enabled {
8028            let vendor_names: Vec<String> = self
8029                .master_data
8030                .vendors
8031                .iter()
8032                .map(|v| v.name.clone())
8033                .collect();
8034            if !vendor_names.is_empty() {
8035                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8036                    self.config.treasury.bank_guarantees.clone(),
8037                    seed + 96,
8038                );
8039                snapshot.bank_guarantees =
8040                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8041            }
8042        }
8043
8044        // Generate netting runs from intercompany matched pairs
8045        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8046            let entity_ids: Vec<String> = self
8047                .config
8048                .companies
8049                .iter()
8050                .map(|c| c.code.clone())
8051                .collect();
8052            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8053                .matched_pairs
8054                .iter()
8055                .map(|mp| {
8056                    (
8057                        mp.seller_company.clone(),
8058                        mp.buyer_company.clone(),
8059                        mp.amount,
8060                    )
8061                })
8062                .collect();
8063            if entity_ids.len() >= 2 {
8064                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8065                    self.config.treasury.netting.clone(),
8066                    seed + 97,
8067                );
8068                snapshot.netting_runs = netting_gen.generate(
8069                    &entity_ids,
8070                    currency,
8071                    start_date,
8072                    self.config.global.period_months,
8073                    &ic_amounts,
8074                );
8075            }
8076        }
8077
8078        // Generate treasury journal entries from the instruments we just created.
8079        {
8080            use datasynth_generators::treasury::TreasuryAccounting;
8081
8082            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8083            let mut treasury_jes = Vec::new();
8084
8085            // Debt interest accrual JEs
8086            if !snapshot.debt_instruments.is_empty() {
8087                let debt_jes =
8088                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8089                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8090                treasury_jes.extend(debt_jes);
8091            }
8092
8093            // Hedge mark-to-market JEs
8094            if !snapshot.hedging_instruments.is_empty() {
8095                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8096                    &snapshot.hedging_instruments,
8097                    &snapshot.hedge_relationships,
8098                    end_date,
8099                    entity_id,
8100                );
8101                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8102                treasury_jes.extend(hedge_jes);
8103            }
8104
8105            // Cash pool sweep JEs
8106            if !snapshot.cash_pool_sweeps.is_empty() {
8107                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8108                    &snapshot.cash_pool_sweeps,
8109                    entity_id,
8110                );
8111                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8112                treasury_jes.extend(sweep_jes);
8113            }
8114
8115            if !treasury_jes.is_empty() {
8116                debug!("Total treasury journal entries: {}", treasury_jes.len());
8117            }
8118            snapshot.journal_entries = treasury_jes;
8119        }
8120
8121        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8122        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8123        stats.cash_position_count = snapshot.cash_positions.len();
8124        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8125        stats.cash_pool_count = snapshot.cash_pools.len();
8126
8127        info!(
8128            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8129            snapshot.debt_instruments.len(),
8130            snapshot.hedging_instruments.len(),
8131            snapshot.cash_positions.len(),
8132            snapshot.cash_forecasts.len(),
8133            snapshot.cash_pools.len(),
8134            snapshot.bank_guarantees.len(),
8135            snapshot.netting_runs.len(),
8136            snapshot.journal_entries.len(),
8137        );
8138        self.check_resources_with_log("post-treasury")?;
8139
8140        Ok(snapshot)
8141    }
8142
8143    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8144    fn phase_project_accounting(
8145        &mut self,
8146        document_flows: &DocumentFlowSnapshot,
8147        hr: &HrSnapshot,
8148        stats: &mut EnhancedGenerationStatistics,
8149    ) -> SynthResult<ProjectAccountingSnapshot> {
8150        if !self.phase_config.generate_project_accounting {
8151            debug!("Phase 23: Skipped (project accounting disabled)");
8152            return Ok(ProjectAccountingSnapshot::default());
8153        }
8154        let degradation = self.check_resources()?;
8155        if degradation >= DegradationLevel::Reduced {
8156            debug!(
8157                "Phase skipped due to resource pressure (degradation: {:?})",
8158                degradation
8159            );
8160            return Ok(ProjectAccountingSnapshot::default());
8161        }
8162        info!("Phase 23: Generating Project Accounting Data");
8163
8164        let seed = self.seed;
8165        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8166            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8167        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8168        let company_code = self
8169            .config
8170            .companies
8171            .first()
8172            .map(|c| c.code.as_str())
8173            .unwrap_or("1000");
8174
8175        let mut snapshot = ProjectAccountingSnapshot::default();
8176
8177        // Generate projects with WBS hierarchies
8178        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8179            self.config.project_accounting.clone(),
8180            seed + 95,
8181        );
8182        let pool = project_gen.generate(company_code, start_date, end_date);
8183        snapshot.projects = pool.projects.clone();
8184
8185        // Link source documents to projects for cost allocation
8186        {
8187            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8188                Vec::new();
8189
8190            // Time entries
8191            for te in &hr.time_entries {
8192                let total_hours = te.hours_regular + te.hours_overtime;
8193                if total_hours > 0.0 {
8194                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8195                        id: te.entry_id.clone(),
8196                        entity_id: company_code.to_string(),
8197                        date: te.date,
8198                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8199                            .unwrap_or(rust_decimal::Decimal::ZERO),
8200                        source_type: CostSourceType::TimeEntry,
8201                        hours: Some(
8202                            rust_decimal::Decimal::from_f64_retain(total_hours)
8203                                .unwrap_or(rust_decimal::Decimal::ZERO),
8204                        ),
8205                    });
8206                }
8207            }
8208
8209            // Expense reports
8210            for er in &hr.expense_reports {
8211                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8212                    id: er.report_id.clone(),
8213                    entity_id: company_code.to_string(),
8214                    date: er.submission_date,
8215                    amount: er.total_amount,
8216                    source_type: CostSourceType::ExpenseReport,
8217                    hours: None,
8218                });
8219            }
8220
8221            // Purchase orders
8222            for po in &document_flows.purchase_orders {
8223                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8224                    id: po.header.document_id.clone(),
8225                    entity_id: company_code.to_string(),
8226                    date: po.header.document_date,
8227                    amount: po.total_net_amount,
8228                    source_type: CostSourceType::PurchaseOrder,
8229                    hours: None,
8230                });
8231            }
8232
8233            // Vendor invoices
8234            for vi in &document_flows.vendor_invoices {
8235                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8236                    id: vi.header.document_id.clone(),
8237                    entity_id: company_code.to_string(),
8238                    date: vi.header.document_date,
8239                    amount: vi.payable_amount,
8240                    source_type: CostSourceType::VendorInvoice,
8241                    hours: None,
8242                });
8243            }
8244
8245            if !source_docs.is_empty() && !pool.projects.is_empty() {
8246                let mut cost_gen =
8247                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8248                        self.config.project_accounting.cost_allocation.clone(),
8249                        seed + 99,
8250                    );
8251                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8252            }
8253        }
8254
8255        // Generate change orders
8256        if self.config.project_accounting.change_orders.enabled {
8257            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8258                self.config.project_accounting.change_orders.clone(),
8259                seed + 96,
8260            );
8261            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8262        }
8263
8264        // Generate milestones
8265        if self.config.project_accounting.milestones.enabled {
8266            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8267                self.config.project_accounting.milestones.clone(),
8268                seed + 97,
8269            );
8270            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8271        }
8272
8273        // Generate earned value metrics (needs cost lines, so only if we have projects)
8274        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8275            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8276                self.config.project_accounting.earned_value.clone(),
8277                seed + 98,
8278            );
8279            snapshot.earned_value_metrics =
8280                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8281        }
8282
8283        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8284        if self.config.project_accounting.revenue_recognition.enabled
8285            && !snapshot.projects.is_empty()
8286            && !snapshot.cost_lines.is_empty()
8287        {
8288            use datasynth_generators::project_accounting::RevenueGenerator;
8289            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8290            let avg_contract_value =
8291                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8292                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8293
8294            // Build contract value tuples: only customer-type projects get revenue recognition.
8295            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8296            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8297                snapshot
8298                    .projects
8299                    .iter()
8300                    .filter(|p| {
8301                        matches!(
8302                            p.project_type,
8303                            datasynth_core::models::ProjectType::Customer
8304                        )
8305                    })
8306                    .map(|p| {
8307                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8308                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8309                        // budget × 1.25 → contract value
8310                        } else {
8311                            avg_contract_value
8312                        };
8313                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8314                        (p.project_id.clone(), cv, etc)
8315                    })
8316                    .collect();
8317
8318            if !contract_values.is_empty() {
8319                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8320                snapshot.revenue_records = rev_gen.generate(
8321                    &snapshot.projects,
8322                    &snapshot.cost_lines,
8323                    &contract_values,
8324                    start_date,
8325                    end_date,
8326                );
8327                debug!(
8328                    "Generated {} revenue recognition records for {} customer projects",
8329                    snapshot.revenue_records.len(),
8330                    contract_values.len()
8331                );
8332            }
8333        }
8334
8335        stats.project_count = snapshot.projects.len();
8336        stats.project_change_order_count = snapshot.change_orders.len();
8337        stats.project_cost_line_count = snapshot.cost_lines.len();
8338
8339        info!(
8340            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8341            snapshot.projects.len(),
8342            snapshot.change_orders.len(),
8343            snapshot.milestones.len(),
8344            snapshot.earned_value_metrics.len()
8345        );
8346        self.check_resources_with_log("post-project-accounting")?;
8347
8348        Ok(snapshot)
8349    }
8350
8351    /// Phase 24: Generate process evolution and organizational events.
8352    fn phase_evolution_events(
8353        &mut self,
8354        stats: &mut EnhancedGenerationStatistics,
8355    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8356        if !self.phase_config.generate_evolution_events {
8357            debug!("Phase 24: Skipped (evolution events disabled)");
8358            return Ok((Vec::new(), Vec::new()));
8359        }
8360        info!("Phase 24: Generating Process Evolution + Organizational Events");
8361
8362        let seed = self.seed;
8363        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8364            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8365        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8366
8367        // Process evolution events
8368        let mut proc_gen =
8369            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8370                seed + 100,
8371            );
8372        let process_events = proc_gen.generate_events(start_date, end_date);
8373
8374        // Organizational events
8375        let company_codes: Vec<String> = self
8376            .config
8377            .companies
8378            .iter()
8379            .map(|c| c.code.clone())
8380            .collect();
8381        let mut org_gen =
8382            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8383                seed + 101,
8384            );
8385        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8386
8387        stats.process_evolution_event_count = process_events.len();
8388        stats.organizational_event_count = org_events.len();
8389
8390        info!(
8391            "Evolution events generated: {} process evolution, {} organizational",
8392            process_events.len(),
8393            org_events.len()
8394        );
8395        self.check_resources_with_log("post-evolution-events")?;
8396
8397        Ok((process_events, org_events))
8398    }
8399
8400    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8401    /// data recovery, and regulatory changes).
8402    fn phase_disruption_events(
8403        &self,
8404        stats: &mut EnhancedGenerationStatistics,
8405    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8406        if !self.config.organizational_events.enabled {
8407            debug!("Phase 24b: Skipped (organizational events disabled)");
8408            return Ok(Vec::new());
8409        }
8410        info!("Phase 24b: Generating Disruption Events");
8411
8412        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8413            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8414        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8415
8416        let company_codes: Vec<String> = self
8417            .config
8418            .companies
8419            .iter()
8420            .map(|c| c.code.clone())
8421            .collect();
8422
8423        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8424        let events = gen.generate(start_date, end_date, &company_codes);
8425
8426        stats.disruption_event_count = events.len();
8427        info!("Disruption events generated: {} events", events.len());
8428        self.check_resources_with_log("post-disruption-events")?;
8429
8430        Ok(events)
8431    }
8432
8433    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8434    ///
8435    /// Produces paired examples where each pair contains the original clean JE
8436    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8437    /// split transaction). Useful for training anomaly detection models with
8438    /// known ground truth.
8439    fn phase_counterfactuals(
8440        &self,
8441        journal_entries: &[JournalEntry],
8442        stats: &mut EnhancedGenerationStatistics,
8443    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8444        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8445            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8446            return Ok(Vec::new());
8447        }
8448        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8449
8450        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8451
8452        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8453
8454        // Rotating set of specs to produce diverse mutation types
8455        let specs = [
8456            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8457            CounterfactualSpec::ShiftDate { days: -14 },
8458            CounterfactualSpec::SelfApprove,
8459            CounterfactualSpec::SplitTransaction { split_count: 3 },
8460        ];
8461
8462        let pairs: Vec<_> = journal_entries
8463            .iter()
8464            .enumerate()
8465            .map(|(i, je)| {
8466                let spec = &specs[i % specs.len()];
8467                gen.generate(je, spec)
8468            })
8469            .collect();
8470
8471        stats.counterfactual_pair_count = pairs.len();
8472        info!(
8473            "Counterfactual pairs generated: {} pairs from {} journal entries",
8474            pairs.len(),
8475            journal_entries.len()
8476        );
8477        self.check_resources_with_log("post-counterfactuals")?;
8478
8479        Ok(pairs)
8480    }
8481
8482    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8483    ///
8484    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8485    /// fraudulent, then generates probabilistic red flags on all chain documents.
8486    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8487    /// to produce realistic ML training data.
8488    fn phase_red_flags(
8489        &self,
8490        anomaly_labels: &AnomalyLabels,
8491        document_flows: &DocumentFlowSnapshot,
8492        stats: &mut EnhancedGenerationStatistics,
8493    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8494        if !self.config.fraud.enabled {
8495            debug!("Phase 26: Skipped (fraud generation disabled)");
8496            return Ok(Vec::new());
8497        }
8498        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8499
8500        use datasynth_generators::fraud::RedFlagGenerator;
8501
8502        let generator = RedFlagGenerator::new();
8503        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8504
8505        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8506        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8507            .labels
8508            .iter()
8509            .filter(|label| label.anomaly_type.is_intentional())
8510            .map(|label| label.document_id.as_str())
8511            .collect();
8512
8513        let mut flags = Vec::new();
8514
8515        // Iterate P2P chains: use the purchase order document ID as the chain key.
8516        for chain in &document_flows.p2p_chains {
8517            let doc_id = &chain.purchase_order.header.document_id;
8518            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8519            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8520        }
8521
8522        // Iterate O2C chains: use the sales order document ID as the chain key.
8523        for chain in &document_flows.o2c_chains {
8524            let doc_id = &chain.sales_order.header.document_id;
8525            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8526            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8527        }
8528
8529        stats.red_flag_count = flags.len();
8530        info!(
8531            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8532            flags.len(),
8533            document_flows.p2p_chains.len(),
8534            document_flows.o2c_chains.len(),
8535            fraud_doc_ids.len()
8536        );
8537        self.check_resources_with_log("post-red-flags")?;
8538
8539        Ok(flags)
8540    }
8541
8542    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8543    ///
8544    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8545    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8546    /// advance them over the simulation period.
8547    fn phase_collusion_rings(
8548        &mut self,
8549        stats: &mut EnhancedGenerationStatistics,
8550    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8551        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8552            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8553            return Ok(Vec::new());
8554        }
8555        info!("Phase 26b: Generating Collusion Rings");
8556
8557        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8558            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8559        let months = self.config.global.period_months;
8560
8561        let employee_ids: Vec<String> = self
8562            .master_data
8563            .employees
8564            .iter()
8565            .map(|e| e.employee_id.clone())
8566            .collect();
8567        let vendor_ids: Vec<String> = self
8568            .master_data
8569            .vendors
8570            .iter()
8571            .map(|v| v.vendor_id.clone())
8572            .collect();
8573
8574        let mut generator =
8575            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8576        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8577
8578        stats.collusion_ring_count = rings.len();
8579        info!(
8580            "Collusion rings generated: {} rings, total members: {}",
8581            rings.len(),
8582            rings
8583                .iter()
8584                .map(datasynth_generators::fraud::CollusionRing::size)
8585                .sum::<usize>()
8586        );
8587        self.check_resources_with_log("post-collusion-rings")?;
8588
8589        Ok(rings)
8590    }
8591
8592    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8593    ///
8594    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8595    /// master data changes over time, supporting bi-temporal audit queries.
8596    fn phase_temporal_attributes(
8597        &mut self,
8598        stats: &mut EnhancedGenerationStatistics,
8599    ) -> SynthResult<
8600        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8601    > {
8602        if !self.config.temporal_attributes.enabled {
8603            debug!("Phase 27: Skipped (temporal attributes disabled)");
8604            return Ok(Vec::new());
8605        }
8606        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8607
8608        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8609            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8610
8611        // Build a TemporalAttributeConfig from the user's config.
8612        // Since Phase 27 is already gated on temporal_attributes.enabled,
8613        // default to enabling version chains so users get actual mutations.
8614        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8615            || self.config.temporal_attributes.enabled;
8616        let temporal_config = {
8617            let ta = &self.config.temporal_attributes;
8618            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8619                .enabled(ta.enabled)
8620                .closed_probability(ta.valid_time.closed_probability)
8621                .avg_validity_days(ta.valid_time.avg_validity_days)
8622                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8623                .with_version_chains(if generate_version_chains {
8624                    ta.avg_versions_per_entity
8625                } else {
8626                    1.0
8627                })
8628                .build()
8629        };
8630        // Apply backdating settings if configured
8631        let temporal_config = if self
8632            .config
8633            .temporal_attributes
8634            .transaction_time
8635            .allow_backdating
8636        {
8637            let mut c = temporal_config;
8638            c.transaction_time.allow_backdating = true;
8639            c.transaction_time.backdating_probability = self
8640                .config
8641                .temporal_attributes
8642                .transaction_time
8643                .backdating_probability;
8644            c.transaction_time.max_backdate_days = self
8645                .config
8646                .temporal_attributes
8647                .transaction_time
8648                .max_backdate_days;
8649            c
8650        } else {
8651            temporal_config
8652        };
8653        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8654            temporal_config,
8655            self.seed + 130,
8656            start_date,
8657        );
8658
8659        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8660            self.seed + 130,
8661            datasynth_core::GeneratorType::Vendor,
8662        );
8663
8664        let chains: Vec<_> = self
8665            .master_data
8666            .vendors
8667            .iter()
8668            .map(|vendor| {
8669                let id = uuid_factory.next();
8670                gen.generate_version_chain(vendor.clone(), id)
8671            })
8672            .collect();
8673
8674        stats.temporal_version_chain_count = chains.len();
8675        info!("Temporal version chains generated: {} chains", chains.len());
8676        self.check_resources_with_log("post-temporal-attributes")?;
8677
8678        Ok(chains)
8679    }
8680
8681    /// Phase 28: Build entity relationship graph and cross-process links.
8682    ///
8683    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8684    /// `EntityGraph` from master-data vendor/customer entities and
8685    /// journal-entry-derived transaction summaries.
8686    ///
8687    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8688    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8689    /// generates inventory-movement cross-process links.
8690    fn phase_entity_relationships(
8691        &self,
8692        journal_entries: &[JournalEntry],
8693        document_flows: &DocumentFlowSnapshot,
8694        stats: &mut EnhancedGenerationStatistics,
8695    ) -> SynthResult<(
8696        Option<datasynth_core::models::EntityGraph>,
8697        Vec<datasynth_core::models::CrossProcessLink>,
8698    )> {
8699        use datasynth_generators::relationships::{
8700            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8701            TransactionSummary,
8702        };
8703
8704        let rs_enabled = self.config.relationship_strength.enabled;
8705        let cpl_enabled = self.config.cross_process_links.enabled
8706            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8707
8708        if !rs_enabled && !cpl_enabled {
8709            debug!(
8710                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8711            );
8712            return Ok((None, Vec::new()));
8713        }
8714
8715        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8716
8717        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8718            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8719
8720        let company_code = self
8721            .config
8722            .companies
8723            .first()
8724            .map(|c| c.code.as_str())
8725            .unwrap_or("1000");
8726
8727        // Build the generator with matching config flags
8728        let gen_config = EntityGraphConfig {
8729            enabled: rs_enabled,
8730            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8731                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8732                enable_return_flows: false,
8733                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8734                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8735                // Use higher link rate for small datasets to avoid probabilistic empty results
8736                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8737                    1.0
8738                } else {
8739                    0.30
8740                },
8741                ..Default::default()
8742            },
8743            strength_config: datasynth_generators::relationships::StrengthConfig {
8744                transaction_volume_weight: self
8745                    .config
8746                    .relationship_strength
8747                    .calculation
8748                    .transaction_volume_weight,
8749                transaction_count_weight: self
8750                    .config
8751                    .relationship_strength
8752                    .calculation
8753                    .transaction_count_weight,
8754                duration_weight: self
8755                    .config
8756                    .relationship_strength
8757                    .calculation
8758                    .relationship_duration_weight,
8759                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8760                mutual_connections_weight: self
8761                    .config
8762                    .relationship_strength
8763                    .calculation
8764                    .mutual_connections_weight,
8765                recency_half_life_days: self
8766                    .config
8767                    .relationship_strength
8768                    .calculation
8769                    .recency_half_life_days,
8770            },
8771            ..Default::default()
8772        };
8773
8774        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8775
8776        // --- Part 1: Entity Relationship Graph ---
8777        let entity_graph = if rs_enabled {
8778            // Build EntitySummary lists from master data
8779            let vendor_summaries: Vec<EntitySummary> = self
8780                .master_data
8781                .vendors
8782                .iter()
8783                .map(|v| {
8784                    EntitySummary::new(
8785                        &v.vendor_id,
8786                        &v.name,
8787                        datasynth_core::models::GraphEntityType::Vendor,
8788                        start_date,
8789                    )
8790                })
8791                .collect();
8792
8793            let customer_summaries: Vec<EntitySummary> = self
8794                .master_data
8795                .customers
8796                .iter()
8797                .map(|c| {
8798                    EntitySummary::new(
8799                        &c.customer_id,
8800                        &c.name,
8801                        datasynth_core::models::GraphEntityType::Customer,
8802                        start_date,
8803                    )
8804                })
8805                .collect();
8806
8807            // Build transaction summaries from journal entries.
8808            // Key = (company_code, trading_partner) for entries that have a
8809            // trading partner.  This captures intercompany flows and any JE
8810            // whose line items carry a trading_partner reference.
8811            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8812                std::collections::HashMap::new();
8813
8814            for je in journal_entries {
8815                let cc = je.header.company_code.clone();
8816                let posting_date = je.header.posting_date;
8817                for line in &je.lines {
8818                    if let Some(ref tp) = line.trading_partner {
8819                        let amount = if line.debit_amount > line.credit_amount {
8820                            line.debit_amount
8821                        } else {
8822                            line.credit_amount
8823                        };
8824                        let entry = txn_summaries
8825                            .entry((cc.clone(), tp.clone()))
8826                            .or_insert_with(|| TransactionSummary {
8827                                total_volume: rust_decimal::Decimal::ZERO,
8828                                transaction_count: 0,
8829                                first_transaction_date: posting_date,
8830                                last_transaction_date: posting_date,
8831                                related_entities: std::collections::HashSet::new(),
8832                            });
8833                        entry.total_volume += amount;
8834                        entry.transaction_count += 1;
8835                        if posting_date < entry.first_transaction_date {
8836                            entry.first_transaction_date = posting_date;
8837                        }
8838                        if posting_date > entry.last_transaction_date {
8839                            entry.last_transaction_date = posting_date;
8840                        }
8841                        entry.related_entities.insert(cc.clone());
8842                    }
8843                }
8844            }
8845
8846            // Also extract transaction relationships from document flow chains.
8847            // P2P chains: Company → Vendor relationships
8848            for chain in &document_flows.p2p_chains {
8849                let cc = chain.purchase_order.header.company_code.clone();
8850                let vendor_id = chain.purchase_order.vendor_id.clone();
8851                let po_date = chain.purchase_order.header.document_date;
8852                let amount = chain.purchase_order.total_net_amount;
8853
8854                let entry = txn_summaries
8855                    .entry((cc.clone(), vendor_id))
8856                    .or_insert_with(|| TransactionSummary {
8857                        total_volume: rust_decimal::Decimal::ZERO,
8858                        transaction_count: 0,
8859                        first_transaction_date: po_date,
8860                        last_transaction_date: po_date,
8861                        related_entities: std::collections::HashSet::new(),
8862                    });
8863                entry.total_volume += amount;
8864                entry.transaction_count += 1;
8865                if po_date < entry.first_transaction_date {
8866                    entry.first_transaction_date = po_date;
8867                }
8868                if po_date > entry.last_transaction_date {
8869                    entry.last_transaction_date = po_date;
8870                }
8871                entry.related_entities.insert(cc);
8872            }
8873
8874            // O2C chains: Company → Customer relationships
8875            for chain in &document_flows.o2c_chains {
8876                let cc = chain.sales_order.header.company_code.clone();
8877                let customer_id = chain.sales_order.customer_id.clone();
8878                let so_date = chain.sales_order.header.document_date;
8879                let amount = chain.sales_order.total_net_amount;
8880
8881                let entry = txn_summaries
8882                    .entry((cc.clone(), customer_id))
8883                    .or_insert_with(|| TransactionSummary {
8884                        total_volume: rust_decimal::Decimal::ZERO,
8885                        transaction_count: 0,
8886                        first_transaction_date: so_date,
8887                        last_transaction_date: so_date,
8888                        related_entities: std::collections::HashSet::new(),
8889                    });
8890                entry.total_volume += amount;
8891                entry.transaction_count += 1;
8892                if so_date < entry.first_transaction_date {
8893                    entry.first_transaction_date = so_date;
8894                }
8895                if so_date > entry.last_transaction_date {
8896                    entry.last_transaction_date = so_date;
8897                }
8898                entry.related_entities.insert(cc);
8899            }
8900
8901            let as_of_date = journal_entries
8902                .last()
8903                .map(|je| je.header.posting_date)
8904                .unwrap_or(start_date);
8905
8906            let graph = gen.generate_entity_graph(
8907                company_code,
8908                as_of_date,
8909                &vendor_summaries,
8910                &customer_summaries,
8911                &txn_summaries,
8912            );
8913
8914            info!(
8915                "Entity relationship graph: {} nodes, {} edges",
8916                graph.nodes.len(),
8917                graph.edges.len()
8918            );
8919            stats.entity_relationship_node_count = graph.nodes.len();
8920            stats.entity_relationship_edge_count = graph.edges.len();
8921            Some(graph)
8922        } else {
8923            None
8924        };
8925
8926        // --- Part 2: Cross-Process Links ---
8927        let cross_process_links = if cpl_enabled {
8928            // Build GoodsReceiptRef from P2P chains
8929            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8930                .p2p_chains
8931                .iter()
8932                .flat_map(|chain| {
8933                    let vendor_id = chain.purchase_order.vendor_id.clone();
8934                    let cc = chain.purchase_order.header.company_code.clone();
8935                    chain.goods_receipts.iter().flat_map(move |gr| {
8936                        gr.items.iter().filter_map({
8937                            let doc_id = gr.header.document_id.clone();
8938                            let v_id = vendor_id.clone();
8939                            let company = cc.clone();
8940                            let receipt_date = gr.header.document_date;
8941                            move |item| {
8942                                item.base
8943                                    .material_id
8944                                    .as_ref()
8945                                    .map(|mat_id| GoodsReceiptRef {
8946                                        document_id: doc_id.clone(),
8947                                        material_id: mat_id.clone(),
8948                                        quantity: item.base.quantity,
8949                                        receipt_date,
8950                                        vendor_id: v_id.clone(),
8951                                        company_code: company.clone(),
8952                                    })
8953                            }
8954                        })
8955                    })
8956                })
8957                .collect();
8958
8959            // Build DeliveryRef from O2C chains
8960            let del_refs: Vec<DeliveryRef> = document_flows
8961                .o2c_chains
8962                .iter()
8963                .flat_map(|chain| {
8964                    let customer_id = chain.sales_order.customer_id.clone();
8965                    let cc = chain.sales_order.header.company_code.clone();
8966                    chain.deliveries.iter().flat_map(move |del| {
8967                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8968                        del.items.iter().filter_map({
8969                            let doc_id = del.header.document_id.clone();
8970                            let c_id = customer_id.clone();
8971                            let company = cc.clone();
8972                            move |item| {
8973                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8974                                    document_id: doc_id.clone(),
8975                                    material_id: mat_id.clone(),
8976                                    quantity: item.base.quantity,
8977                                    delivery_date,
8978                                    customer_id: c_id.clone(),
8979                                    company_code: company.clone(),
8980                                })
8981                            }
8982                        })
8983                    })
8984                })
8985                .collect();
8986
8987            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8988            info!("Cross-process links generated: {} links", links.len());
8989            stats.cross_process_link_count = links.len();
8990            links
8991        } else {
8992            Vec::new()
8993        };
8994
8995        self.check_resources_with_log("post-entity-relationships")?;
8996        Ok((entity_graph, cross_process_links))
8997    }
8998
8999    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9000    fn phase_industry_data(
9001        &self,
9002        stats: &mut EnhancedGenerationStatistics,
9003    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9004        if !self.config.industry_specific.enabled {
9005            return None;
9006        }
9007        info!("Phase 29: Generating industry-specific data");
9008        let output = datasynth_generators::industry::factory::generate_industry_output(
9009            self.config.global.industry,
9010        );
9011        stats.industry_gl_account_count = output.gl_accounts.len();
9012        info!(
9013            "Industry data generated: {} GL accounts for {:?}",
9014            output.gl_accounts.len(),
9015            self.config.global.industry
9016        );
9017        Some(output)
9018    }
9019
9020    /// Phase 3b: Generate opening balances for each company.
9021    fn phase_opening_balances(
9022        &mut self,
9023        coa: &Arc<ChartOfAccounts>,
9024        stats: &mut EnhancedGenerationStatistics,
9025    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9026        if !self.config.balance.generate_opening_balances {
9027            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9028            return Ok(Vec::new());
9029        }
9030        info!("Phase 3b: Generating Opening Balances");
9031
9032        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9033            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9034        let fiscal_year = start_date.year();
9035
9036        let industry = match self.config.global.industry {
9037            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9038            IndustrySector::Retail => IndustryType::Retail,
9039            IndustrySector::FinancialServices => IndustryType::Financial,
9040            IndustrySector::Healthcare => IndustryType::Healthcare,
9041            IndustrySector::Technology => IndustryType::Technology,
9042            _ => IndustryType::Manufacturing,
9043        };
9044
9045        let config = datasynth_generators::OpeningBalanceConfig {
9046            industry,
9047            ..Default::default()
9048        };
9049        let mut gen =
9050            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9051
9052        let mut results = Vec::new();
9053        for company in &self.config.companies {
9054            let spec = OpeningBalanceSpec::new(
9055                company.code.clone(),
9056                start_date,
9057                fiscal_year,
9058                company.currency.clone(),
9059                rust_decimal::Decimal::new(10_000_000, 0),
9060                industry,
9061            );
9062            let ob = gen.generate(&spec, coa, start_date, &company.code);
9063            results.push(ob);
9064        }
9065
9066        stats.opening_balance_count = results.len();
9067        info!("Opening balances generated: {} companies", results.len());
9068        self.check_resources_with_log("post-opening-balances")?;
9069
9070        Ok(results)
9071    }
9072
9073    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9074    fn phase_subledger_reconciliation(
9075        &mut self,
9076        subledger: &SubledgerSnapshot,
9077        entries: &[JournalEntry],
9078        stats: &mut EnhancedGenerationStatistics,
9079    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9080        if !self.config.balance.reconcile_subledgers {
9081            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9082            return Ok(Vec::new());
9083        }
9084        info!("Phase 9b: Reconciling GL to subledger balances");
9085
9086        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9087            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9088            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9089
9090        // Build GL balance map from journal entries using a balance tracker
9091        let tracker_config = BalanceTrackerConfig {
9092            validate_on_each_entry: false,
9093            track_history: false,
9094            fail_on_validation_error: false,
9095            ..Default::default()
9096        };
9097        let recon_currency = self
9098            .config
9099            .companies
9100            .first()
9101            .map(|c| c.currency.clone())
9102            .unwrap_or_else(|| "USD".to_string());
9103        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9104        let validation_errors = tracker.apply_entries(entries);
9105        if !validation_errors.is_empty() {
9106            warn!(
9107                error_count = validation_errors.len(),
9108                "Balance tracker encountered validation errors during subledger reconciliation"
9109            );
9110            for err in &validation_errors {
9111                debug!("Balance validation error: {:?}", err);
9112            }
9113        }
9114
9115        let mut engine = datasynth_generators::ReconciliationEngine::new(
9116            datasynth_generators::ReconciliationConfig::default(),
9117        );
9118
9119        let mut results = Vec::new();
9120        let company_code = self
9121            .config
9122            .companies
9123            .first()
9124            .map(|c| c.code.as_str())
9125            .unwrap_or("1000");
9126
9127        // Reconcile AR
9128        if !subledger.ar_invoices.is_empty() {
9129            let gl_balance = tracker
9130                .get_account_balance(
9131                    company_code,
9132                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9133                )
9134                .map(|b| b.closing_balance)
9135                .unwrap_or_default();
9136            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9137            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9138        }
9139
9140        // Reconcile AP
9141        if !subledger.ap_invoices.is_empty() {
9142            let gl_balance = tracker
9143                .get_account_balance(
9144                    company_code,
9145                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9146                )
9147                .map(|b| b.closing_balance)
9148                .unwrap_or_default();
9149            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9150            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9151        }
9152
9153        // Reconcile FA
9154        if !subledger.fa_records.is_empty() {
9155            let gl_asset_balance = tracker
9156                .get_account_balance(
9157                    company_code,
9158                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9159                )
9160                .map(|b| b.closing_balance)
9161                .unwrap_or_default();
9162            let gl_accum_depr_balance = tracker
9163                .get_account_balance(
9164                    company_code,
9165                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9166                )
9167                .map(|b| b.closing_balance)
9168                .unwrap_or_default();
9169            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9170                subledger.fa_records.iter().collect();
9171            let (asset_recon, depr_recon) = engine.reconcile_fa(
9172                company_code,
9173                end_date,
9174                gl_asset_balance,
9175                gl_accum_depr_balance,
9176                &fa_refs,
9177            );
9178            results.push(asset_recon);
9179            results.push(depr_recon);
9180        }
9181
9182        // Reconcile Inventory
9183        if !subledger.inventory_positions.is_empty() {
9184            let gl_balance = tracker
9185                .get_account_balance(
9186                    company_code,
9187                    datasynth_core::accounts::control_accounts::INVENTORY,
9188                )
9189                .map(|b| b.closing_balance)
9190                .unwrap_or_default();
9191            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9192                subledger.inventory_positions.iter().collect();
9193            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9194        }
9195
9196        stats.subledger_reconciliation_count = results.len();
9197        let passed = results.iter().filter(|r| r.is_balanced()).count();
9198        let failed = results.len() - passed;
9199        info!(
9200            "Subledger reconciliation: {} checks, {} passed, {} failed",
9201            results.len(),
9202            passed,
9203            failed
9204        );
9205        self.check_resources_with_log("post-subledger-reconciliation")?;
9206
9207        Ok(results)
9208    }
9209
9210    /// Generate the chart of accounts.
9211    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9212        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9213
9214        let coa_framework = self.resolve_coa_framework();
9215
9216        let mut gen = ChartOfAccountsGenerator::new(
9217            self.config.chart_of_accounts.complexity,
9218            self.config.global.industry,
9219            self.seed,
9220        )
9221        .with_coa_framework(coa_framework);
9222
9223        let coa = Arc::new(gen.generate());
9224        self.coa = Some(Arc::clone(&coa));
9225
9226        if let Some(pb) = pb {
9227            pb.finish_with_message("Chart of Accounts complete");
9228        }
9229
9230        Ok(coa)
9231    }
9232
9233    /// Generate master data entities.
9234    fn generate_master_data(&mut self) -> SynthResult<()> {
9235        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9236            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9237        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9238
9239        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9240        let pb = self.create_progress_bar(total, "Generating Master Data");
9241
9242        // Resolve country pack once for all companies (uses primary company's country)
9243        let pack = self.primary_pack().clone();
9244
9245        // Capture config values needed inside the parallel closure
9246        let vendors_per_company = self.phase_config.vendors_per_company;
9247        let customers_per_company = self.phase_config.customers_per_company;
9248        let materials_per_company = self.phase_config.materials_per_company;
9249        let assets_per_company = self.phase_config.assets_per_company;
9250        let coa_framework = self.resolve_coa_framework();
9251
9252        // Generate all master data in parallel across companies.
9253        // Each company's data is independent, making this embarrassingly parallel.
9254        let per_company_results: Vec<_> = self
9255            .config
9256            .companies
9257            .par_iter()
9258            .enumerate()
9259            .map(|(i, company)| {
9260                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9261                let pack = pack.clone();
9262
9263                // Generate vendors (offset counter so IDs are globally unique across companies)
9264                let mut vendor_gen = VendorGenerator::new(company_seed);
9265                vendor_gen.set_country_pack(pack.clone());
9266                vendor_gen.set_coa_framework(coa_framework);
9267                vendor_gen.set_counter_offset(i * vendors_per_company);
9268                // Wire vendor network config when enabled
9269                if self.config.vendor_network.enabled {
9270                    let vn = &self.config.vendor_network;
9271                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9272                        enabled: true,
9273                        depth: vn.depth,
9274                        tier1_count: datasynth_generators::TierCountConfig::new(
9275                            vn.tier1.min,
9276                            vn.tier1.max,
9277                        ),
9278                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9279                            vn.tier2_per_parent.min,
9280                            vn.tier2_per_parent.max,
9281                        ),
9282                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9283                            vn.tier3_per_parent.min,
9284                            vn.tier3_per_parent.max,
9285                        ),
9286                        cluster_distribution: datasynth_generators::ClusterDistribution {
9287                            reliable_strategic: vn.clusters.reliable_strategic,
9288                            standard_operational: vn.clusters.standard_operational,
9289                            transactional: vn.clusters.transactional,
9290                            problematic: vn.clusters.problematic,
9291                        },
9292                        concentration_limits: datasynth_generators::ConcentrationLimits {
9293                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9294                            max_top5: vn.dependencies.top_5_concentration,
9295                        },
9296                        ..datasynth_generators::VendorNetworkConfig::default()
9297                    });
9298                }
9299                let vendor_pool =
9300                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9301
9302                // Generate customers (offset counter so IDs are globally unique across companies)
9303                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9304                customer_gen.set_country_pack(pack.clone());
9305                customer_gen.set_coa_framework(coa_framework);
9306                customer_gen.set_counter_offset(i * customers_per_company);
9307                // Wire customer segmentation config when enabled
9308                if self.config.customer_segmentation.enabled {
9309                    let cs = &self.config.customer_segmentation;
9310                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9311                        enabled: true,
9312                        segment_distribution: datasynth_generators::SegmentDistribution {
9313                            enterprise: cs.value_segments.enterprise.customer_share,
9314                            mid_market: cs.value_segments.mid_market.customer_share,
9315                            smb: cs.value_segments.smb.customer_share,
9316                            consumer: cs.value_segments.consumer.customer_share,
9317                        },
9318                        referral_config: datasynth_generators::ReferralConfig {
9319                            enabled: cs.networks.referrals.enabled,
9320                            referral_rate: cs.networks.referrals.referral_rate,
9321                            ..Default::default()
9322                        },
9323                        hierarchy_config: datasynth_generators::HierarchyConfig {
9324                            enabled: cs.networks.corporate_hierarchies.enabled,
9325                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9326                            ..Default::default()
9327                        },
9328                        ..Default::default()
9329                    };
9330                    customer_gen.set_segmentation_config(seg_cfg);
9331                }
9332                let customer_pool = customer_gen.generate_customer_pool(
9333                    customers_per_company,
9334                    &company.code,
9335                    start_date,
9336                );
9337
9338                // Generate materials (offset counter so IDs are globally unique across companies)
9339                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9340                material_gen.set_country_pack(pack.clone());
9341                material_gen.set_counter_offset(i * materials_per_company);
9342                let material_pool = material_gen.generate_material_pool(
9343                    materials_per_company,
9344                    &company.code,
9345                    start_date,
9346                );
9347
9348                // Generate fixed assets
9349                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9350                let asset_pool = asset_gen.generate_asset_pool(
9351                    assets_per_company,
9352                    &company.code,
9353                    (start_date, end_date),
9354                );
9355
9356                // Generate employees
9357                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9358                employee_gen.set_country_pack(pack);
9359                let employee_pool =
9360                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9361
9362                // Generate employee change history (2-5 events per employee)
9363                let employee_change_history =
9364                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9365
9366                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9367                let employee_ids: Vec<String> = employee_pool
9368                    .employees
9369                    .iter()
9370                    .map(|e| e.employee_id.clone())
9371                    .collect();
9372                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9373                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9374
9375                (
9376                    vendor_pool.vendors,
9377                    customer_pool.customers,
9378                    material_pool.materials,
9379                    asset_pool.assets,
9380                    employee_pool.employees,
9381                    employee_change_history,
9382                    cost_centers,
9383                )
9384            })
9385            .collect();
9386
9387        // Aggregate results from all companies
9388        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9389            per_company_results
9390        {
9391            self.master_data.vendors.extend(vendors);
9392            self.master_data.customers.extend(customers);
9393            self.master_data.materials.extend(materials);
9394            self.master_data.assets.extend(assets);
9395            self.master_data.employees.extend(employees);
9396            self.master_data.cost_centers.extend(cost_centers);
9397            self.master_data
9398                .employee_change_history
9399                .extend(change_history);
9400        }
9401
9402        if let Some(pb) = &pb {
9403            pb.inc(total);
9404        }
9405        if let Some(pb) = pb {
9406            pb.finish_with_message("Master data generation complete");
9407        }
9408
9409        Ok(())
9410    }
9411
9412    /// Generate document flows (P2P and O2C).
9413    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9414        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9415            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9416
9417        // Generate P2P chains
9418        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9419        let months = (self.config.global.period_months as usize).max(1);
9420        let p2p_count = self
9421            .phase_config
9422            .p2p_chains
9423            .min(self.master_data.vendors.len() * 2 * months);
9424        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9425
9426        // Convert P2P config from schema to generator config
9427        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9428        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9429        p2p_gen.set_country_pack(self.primary_pack().clone());
9430
9431        for i in 0..p2p_count {
9432            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9433            let materials: Vec<&Material> = self
9434                .master_data
9435                .materials
9436                .iter()
9437                .skip(i % self.master_data.materials.len().max(1))
9438                .take(2.min(self.master_data.materials.len()))
9439                .collect();
9440
9441            if materials.is_empty() {
9442                continue;
9443            }
9444
9445            let company = &self.config.companies[i % self.config.companies.len()];
9446            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9447            let fiscal_period = po_date.month() as u8;
9448            let created_by = if self.master_data.employees.is_empty() {
9449                "SYSTEM"
9450            } else {
9451                self.master_data.employees[i % self.master_data.employees.len()]
9452                    .user_id
9453                    .as_str()
9454            };
9455
9456            let chain = p2p_gen.generate_chain(
9457                &company.code,
9458                vendor,
9459                &materials,
9460                po_date,
9461                start_date.year() as u16,
9462                fiscal_period,
9463                created_by,
9464            );
9465
9466            // Flatten documents
9467            flows.purchase_orders.push(chain.purchase_order.clone());
9468            flows.goods_receipts.extend(chain.goods_receipts.clone());
9469            if let Some(vi) = &chain.vendor_invoice {
9470                flows.vendor_invoices.push(vi.clone());
9471            }
9472            if let Some(payment) = &chain.payment {
9473                flows.payments.push(payment.clone());
9474            }
9475            for remainder in &chain.remainder_payments {
9476                flows.payments.push(remainder.clone());
9477            }
9478            flows.p2p_chains.push(chain);
9479
9480            if let Some(pb) = &pb {
9481                pb.inc(1);
9482            }
9483        }
9484
9485        if let Some(pb) = pb {
9486            pb.finish_with_message("P2P document flows complete");
9487        }
9488
9489        // Generate O2C chains
9490        // Cap at ~2 SOs per customer per month to keep order volume realistic
9491        let o2c_count = self
9492            .phase_config
9493            .o2c_chains
9494            .min(self.master_data.customers.len() * 2 * months);
9495        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9496
9497        // Convert O2C config from schema to generator config
9498        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9499        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9500        o2c_gen.set_country_pack(self.primary_pack().clone());
9501
9502        for i in 0..o2c_count {
9503            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9504            let materials: Vec<&Material> = self
9505                .master_data
9506                .materials
9507                .iter()
9508                .skip(i % self.master_data.materials.len().max(1))
9509                .take(2.min(self.master_data.materials.len()))
9510                .collect();
9511
9512            if materials.is_empty() {
9513                continue;
9514            }
9515
9516            let company = &self.config.companies[i % self.config.companies.len()];
9517            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9518            let fiscal_period = so_date.month() as u8;
9519            let created_by = if self.master_data.employees.is_empty() {
9520                "SYSTEM"
9521            } else {
9522                self.master_data.employees[i % self.master_data.employees.len()]
9523                    .user_id
9524                    .as_str()
9525            };
9526
9527            let chain = o2c_gen.generate_chain(
9528                &company.code,
9529                customer,
9530                &materials,
9531                so_date,
9532                start_date.year() as u16,
9533                fiscal_period,
9534                created_by,
9535            );
9536
9537            // Flatten documents
9538            flows.sales_orders.push(chain.sales_order.clone());
9539            flows.deliveries.extend(chain.deliveries.clone());
9540            if let Some(ci) = &chain.customer_invoice {
9541                flows.customer_invoices.push(ci.clone());
9542            }
9543            if let Some(receipt) = &chain.customer_receipt {
9544                flows.payments.push(receipt.clone());
9545            }
9546            // Extract remainder receipts (follow-up to partial payments)
9547            for receipt in &chain.remainder_receipts {
9548                flows.payments.push(receipt.clone());
9549            }
9550            flows.o2c_chains.push(chain);
9551
9552            if let Some(pb) = &pb {
9553                pb.inc(1);
9554            }
9555        }
9556
9557        if let Some(pb) = pb {
9558            pb.finish_with_message("O2C document flows complete");
9559        }
9560
9561        // Collect all document cross-references from document headers.
9562        // Each document embeds references to its predecessor(s) via add_reference(); here we
9563        // denormalise them into a flat list for the document_references.json output file.
9564        {
9565            let mut refs = Vec::new();
9566            for doc in &flows.purchase_orders {
9567                refs.extend(doc.header.document_references.iter().cloned());
9568            }
9569            for doc in &flows.goods_receipts {
9570                refs.extend(doc.header.document_references.iter().cloned());
9571            }
9572            for doc in &flows.vendor_invoices {
9573                refs.extend(doc.header.document_references.iter().cloned());
9574            }
9575            for doc in &flows.sales_orders {
9576                refs.extend(doc.header.document_references.iter().cloned());
9577            }
9578            for doc in &flows.deliveries {
9579                refs.extend(doc.header.document_references.iter().cloned());
9580            }
9581            for doc in &flows.customer_invoices {
9582                refs.extend(doc.header.document_references.iter().cloned());
9583            }
9584            for doc in &flows.payments {
9585                refs.extend(doc.header.document_references.iter().cloned());
9586            }
9587            debug!(
9588                "Collected {} document cross-references from document headers",
9589                refs.len()
9590            );
9591            flows.document_references = refs;
9592        }
9593
9594        Ok(())
9595    }
9596
9597    /// Generate journal entries using parallel generation across multiple cores.
9598    fn generate_journal_entries(
9599        &mut self,
9600        coa: &Arc<ChartOfAccounts>,
9601    ) -> SynthResult<Vec<JournalEntry>> {
9602        use datasynth_core::traits::ParallelGenerator;
9603
9604        let total = self.calculate_total_transactions();
9605        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9606
9607        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9608            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9610
9611        let company_codes: Vec<String> = self
9612            .config
9613            .companies
9614            .iter()
9615            .map(|c| c.code.clone())
9616            .collect();
9617
9618        let generator = JournalEntryGenerator::new_with_params(
9619            self.config.transactions.clone(),
9620            Arc::clone(coa),
9621            company_codes,
9622            start_date,
9623            end_date,
9624            self.seed,
9625        );
9626
9627        // Connect generated master data to ensure JEs reference real entities
9628        // Enable persona-based error injection for realistic human behavior
9629        // Pass fraud configuration for fraud injection
9630        let je_pack = self.primary_pack();
9631
9632        let mut generator = generator
9633            .with_master_data(
9634                &self.master_data.vendors,
9635                &self.master_data.customers,
9636                &self.master_data.materials,
9637            )
9638            .with_country_pack_names(je_pack)
9639            .with_country_pack_temporal(
9640                self.config.temporal_patterns.clone(),
9641                self.seed + 200,
9642                je_pack,
9643            )
9644            .with_persona_errors(true)
9645            .with_fraud_config(self.config.fraud.clone());
9646
9647        // Apply temporal drift if configured
9648        if self.config.temporal.enabled {
9649            let drift_config = self.config.temporal.to_core_config();
9650            generator = generator.with_drift_config(drift_config, self.seed + 100);
9651        }
9652
9653        // Check memory limit at start
9654        self.check_memory_limit()?;
9655
9656        // Determine parallelism: use available cores, but cap at total entries
9657        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9658
9659        // Use parallel generation for datasets with 10K+ entries.
9660        // Below this threshold, the statistical properties of a single-seeded
9661        // generator (e.g. Benford compliance) are better preserved.
9662        let entries = if total >= 10_000 && num_threads > 1 {
9663            // Parallel path: split the generator across cores and generate in parallel.
9664            // Each sub-generator gets a unique seed for deterministic, independent generation.
9665            let sub_generators = generator.split(num_threads);
9666            let entries_per_thread = total as usize / num_threads;
9667            let remainder = total as usize % num_threads;
9668
9669            let batches: Vec<Vec<JournalEntry>> = sub_generators
9670                .into_par_iter()
9671                .enumerate()
9672                .map(|(i, mut gen)| {
9673                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9674                    gen.generate_batch(count)
9675                })
9676                .collect();
9677
9678            // Merge all batches into a single Vec
9679            let entries = JournalEntryGenerator::merge_results(batches);
9680
9681            if let Some(pb) = &pb {
9682                pb.inc(total);
9683            }
9684            entries
9685        } else {
9686            // Sequential path for small datasets (< 1000 entries)
9687            let mut entries = Vec::with_capacity(total as usize);
9688            for _ in 0..total {
9689                let entry = generator.generate();
9690                entries.push(entry);
9691                if let Some(pb) = &pb {
9692                    pb.inc(1);
9693                }
9694            }
9695            entries
9696        };
9697
9698        if let Some(pb) = pb {
9699            pb.finish_with_message("Journal entries complete");
9700        }
9701
9702        Ok(entries)
9703    }
9704
9705    /// Generate journal entries from document flows.
9706    ///
9707    /// This creates proper GL entries for each document in the P2P and O2C flows,
9708    /// ensuring that document activity is reflected in the general ledger.
9709    fn generate_jes_from_document_flows(
9710        &mut self,
9711        flows: &DocumentFlowSnapshot,
9712    ) -> SynthResult<Vec<JournalEntry>> {
9713        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9714        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9715
9716        let je_config = match self.resolve_coa_framework() {
9717            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9718            CoAFramework::GermanSkr04 => {
9719                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9720                DocumentFlowJeConfig::from(&fa)
9721            }
9722            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9723        };
9724
9725        let populate_fec = je_config.populate_fec_fields;
9726        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9727
9728        // Build auxiliary account lookup from vendor/customer master data so that
9729        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9730        // PCG "4010001") instead of raw partner IDs.
9731        if populate_fec {
9732            let mut aux_lookup = std::collections::HashMap::new();
9733            for vendor in &self.master_data.vendors {
9734                if let Some(ref aux) = vendor.auxiliary_gl_account {
9735                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9736                }
9737            }
9738            for customer in &self.master_data.customers {
9739                if let Some(ref aux) = customer.auxiliary_gl_account {
9740                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9741                }
9742            }
9743            if !aux_lookup.is_empty() {
9744                generator.set_auxiliary_account_lookup(aux_lookup);
9745            }
9746        }
9747
9748        let mut entries = Vec::new();
9749
9750        // Generate JEs from P2P chains
9751        for chain in &flows.p2p_chains {
9752            let chain_entries = generator.generate_from_p2p_chain(chain);
9753            entries.extend(chain_entries);
9754            if let Some(pb) = &pb {
9755                pb.inc(1);
9756            }
9757        }
9758
9759        // Generate JEs from O2C chains
9760        for chain in &flows.o2c_chains {
9761            let chain_entries = generator.generate_from_o2c_chain(chain);
9762            entries.extend(chain_entries);
9763            if let Some(pb) = &pb {
9764                pb.inc(1);
9765            }
9766        }
9767
9768        if let Some(pb) = pb {
9769            pb.finish_with_message(format!(
9770                "Generated {} JEs from document flows",
9771                entries.len()
9772            ));
9773        }
9774
9775        Ok(entries)
9776    }
9777
9778    /// Generate journal entries from payroll runs.
9779    ///
9780    /// Creates one JE per payroll run:
9781    /// - DR Salaries & Wages (6100) for gross pay
9782    /// - CR Payroll Clearing (9100) for gross pay
9783    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9784        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9785
9786        let mut jes = Vec::with_capacity(payroll_runs.len());
9787
9788        for run in payroll_runs {
9789            let mut je = JournalEntry::new_simple(
9790                format!("JE-PAYROLL-{}", run.payroll_id),
9791                run.company_code.clone(),
9792                run.run_date,
9793                format!("Payroll {}", run.payroll_id),
9794            );
9795
9796            // Debit Salaries & Wages for gross pay
9797            je.add_line(JournalEntryLine {
9798                line_number: 1,
9799                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9800                debit_amount: run.total_gross,
9801                reference: Some(run.payroll_id.clone()),
9802                text: Some(format!(
9803                    "Payroll {} ({} employees)",
9804                    run.payroll_id, run.employee_count
9805                )),
9806                ..Default::default()
9807            });
9808
9809            // Credit Payroll Clearing for gross pay
9810            je.add_line(JournalEntryLine {
9811                line_number: 2,
9812                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9813                credit_amount: run.total_gross,
9814                reference: Some(run.payroll_id.clone()),
9815                ..Default::default()
9816            });
9817
9818            jes.push(je);
9819        }
9820
9821        jes
9822    }
9823
9824    /// Link document flows to subledger records.
9825    ///
9826    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9827    /// ensuring subledger data is coherent with document flow data.
9828    fn link_document_flows_to_subledgers(
9829        &mut self,
9830        flows: &DocumentFlowSnapshot,
9831    ) -> SynthResult<SubledgerSnapshot> {
9832        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9833        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9834
9835        // Build vendor/customer name maps from master data for realistic subledger names
9836        let vendor_names: std::collections::HashMap<String, String> = self
9837            .master_data
9838            .vendors
9839            .iter()
9840            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9841            .collect();
9842        let customer_names: std::collections::HashMap<String, String> = self
9843            .master_data
9844            .customers
9845            .iter()
9846            .map(|c| (c.customer_id.clone(), c.name.clone()))
9847            .collect();
9848
9849        let mut linker = DocumentFlowLinker::new()
9850            .with_vendor_names(vendor_names)
9851            .with_customer_names(customer_names);
9852
9853        // Convert vendor invoices to AP invoices
9854        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9855        if let Some(pb) = &pb {
9856            pb.inc(flows.vendor_invoices.len() as u64);
9857        }
9858
9859        // Convert customer invoices to AR invoices
9860        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9861        if let Some(pb) = &pb {
9862            pb.inc(flows.customer_invoices.len() as u64);
9863        }
9864
9865        if let Some(pb) = pb {
9866            pb.finish_with_message(format!(
9867                "Linked {} AP and {} AR invoices",
9868                ap_invoices.len(),
9869                ar_invoices.len()
9870            ));
9871        }
9872
9873        Ok(SubledgerSnapshot {
9874            ap_invoices,
9875            ar_invoices,
9876            fa_records: Vec::new(),
9877            inventory_positions: Vec::new(),
9878            inventory_movements: Vec::new(),
9879            // Aging reports are computed after payment settlement in phase_document_flows.
9880            ar_aging_reports: Vec::new(),
9881            ap_aging_reports: Vec::new(),
9882            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9883            depreciation_runs: Vec::new(),
9884            inventory_valuations: Vec::new(),
9885            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9886            dunning_runs: Vec::new(),
9887            dunning_letters: Vec::new(),
9888        })
9889    }
9890
9891    /// Generate OCPM events from document flows.
9892    ///
9893    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9894    /// capturing the object-centric process perspective.
9895    #[allow(clippy::too_many_arguments)]
9896    fn generate_ocpm_events(
9897        &mut self,
9898        flows: &DocumentFlowSnapshot,
9899        sourcing: &SourcingSnapshot,
9900        hr: &HrSnapshot,
9901        manufacturing: &ManufacturingSnapshot,
9902        banking: &BankingSnapshot,
9903        audit: &AuditSnapshot,
9904        financial_reporting: &FinancialReportingSnapshot,
9905    ) -> SynthResult<OcpmSnapshot> {
9906        let total_chains = flows.p2p_chains.len()
9907            + flows.o2c_chains.len()
9908            + sourcing.sourcing_projects.len()
9909            + hr.payroll_runs.len()
9910            + manufacturing.production_orders.len()
9911            + banking.customers.len()
9912            + audit.engagements.len()
9913            + financial_reporting.bank_reconciliations.len();
9914        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9915
9916        // Create OCPM event log with standard types
9917        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9918        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9919
9920        // Configure the OCPM generator
9921        let ocpm_config = OcpmGeneratorConfig {
9922            generate_p2p: true,
9923            generate_o2c: true,
9924            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9925            generate_h2r: !hr.payroll_runs.is_empty(),
9926            generate_mfg: !manufacturing.production_orders.is_empty(),
9927            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9928            generate_bank: !banking.customers.is_empty(),
9929            generate_audit: !audit.engagements.is_empty(),
9930            happy_path_rate: 0.75,
9931            exception_path_rate: 0.20,
9932            error_path_rate: 0.05,
9933            add_duration_variability: true,
9934            duration_std_dev_factor: 0.3,
9935        };
9936        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9937        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9938
9939        // Get available users for resource assignment
9940        let available_users: Vec<String> = self
9941            .master_data
9942            .employees
9943            .iter()
9944            .take(20)
9945            .map(|e| e.user_id.clone())
9946            .collect();
9947
9948        // Deterministic base date from config (avoids Utc::now() non-determinism)
9949        let fallback_date =
9950            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9951        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9952            .unwrap_or(fallback_date);
9953        let base_midnight = base_date
9954            .and_hms_opt(0, 0, 0)
9955            .expect("midnight is always valid");
9956        let base_datetime =
9957            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9958
9959        // Helper closure to add case results to event log
9960        let add_result = |event_log: &mut OcpmEventLog,
9961                          result: datasynth_ocpm::CaseGenerationResult| {
9962            for event in result.events {
9963                event_log.add_event(event);
9964            }
9965            for object in result.objects {
9966                event_log.add_object(object);
9967            }
9968            for relationship in result.relationships {
9969                event_log.add_relationship(relationship);
9970            }
9971            for corr in result.correlation_events {
9972                event_log.add_correlation_event(corr);
9973            }
9974            event_log.add_case(result.case_trace);
9975        };
9976
9977        // Generate events from P2P chains
9978        for chain in &flows.p2p_chains {
9979            let po = &chain.purchase_order;
9980            let documents = P2pDocuments::new(
9981                &po.header.document_id,
9982                &po.vendor_id,
9983                &po.header.company_code,
9984                po.total_net_amount,
9985                &po.header.currency,
9986                &ocpm_uuid_factory,
9987            )
9988            .with_goods_receipt(
9989                chain
9990                    .goods_receipts
9991                    .first()
9992                    .map(|gr| gr.header.document_id.as_str())
9993                    .unwrap_or(""),
9994                &ocpm_uuid_factory,
9995            )
9996            .with_invoice(
9997                chain
9998                    .vendor_invoice
9999                    .as_ref()
10000                    .map(|vi| vi.header.document_id.as_str())
10001                    .unwrap_or(""),
10002                &ocpm_uuid_factory,
10003            )
10004            .with_payment(
10005                chain
10006                    .payment
10007                    .as_ref()
10008                    .map(|p| p.header.document_id.as_str())
10009                    .unwrap_or(""),
10010                &ocpm_uuid_factory,
10011            );
10012
10013            let start_time =
10014                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10015            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10016            add_result(&mut event_log, result);
10017
10018            if let Some(pb) = &pb {
10019                pb.inc(1);
10020            }
10021        }
10022
10023        // Generate events from O2C chains
10024        for chain in &flows.o2c_chains {
10025            let so = &chain.sales_order;
10026            let documents = O2cDocuments::new(
10027                &so.header.document_id,
10028                &so.customer_id,
10029                &so.header.company_code,
10030                so.total_net_amount,
10031                &so.header.currency,
10032                &ocpm_uuid_factory,
10033            )
10034            .with_delivery(
10035                chain
10036                    .deliveries
10037                    .first()
10038                    .map(|d| d.header.document_id.as_str())
10039                    .unwrap_or(""),
10040                &ocpm_uuid_factory,
10041            )
10042            .with_invoice(
10043                chain
10044                    .customer_invoice
10045                    .as_ref()
10046                    .map(|ci| ci.header.document_id.as_str())
10047                    .unwrap_or(""),
10048                &ocpm_uuid_factory,
10049            )
10050            .with_receipt(
10051                chain
10052                    .customer_receipt
10053                    .as_ref()
10054                    .map(|r| r.header.document_id.as_str())
10055                    .unwrap_or(""),
10056                &ocpm_uuid_factory,
10057            );
10058
10059            let start_time =
10060                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10061            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10062            add_result(&mut event_log, result);
10063
10064            if let Some(pb) = &pb {
10065                pb.inc(1);
10066            }
10067        }
10068
10069        // Generate events from S2C sourcing projects
10070        for project in &sourcing.sourcing_projects {
10071            // Find vendor from contracts or qualifications
10072            let vendor_id = sourcing
10073                .contracts
10074                .iter()
10075                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10076                .map(|c| c.vendor_id.clone())
10077                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10078                .or_else(|| {
10079                    self.master_data
10080                        .vendors
10081                        .first()
10082                        .map(|v| v.vendor_id.clone())
10083                })
10084                .unwrap_or_else(|| "V000".to_string());
10085            let mut docs = S2cDocuments::new(
10086                &project.project_id,
10087                &vendor_id,
10088                &project.company_code,
10089                project.estimated_annual_spend,
10090                &ocpm_uuid_factory,
10091            );
10092            // Link RFx if available
10093            if let Some(rfx) = sourcing
10094                .rfx_events
10095                .iter()
10096                .find(|r| r.sourcing_project_id == project.project_id)
10097            {
10098                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10099                // Link winning bid (status == Accepted)
10100                if let Some(bid) = sourcing.bids.iter().find(|b| {
10101                    b.rfx_id == rfx.rfx_id
10102                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10103                }) {
10104                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10105                }
10106            }
10107            // Link contract
10108            if let Some(contract) = sourcing
10109                .contracts
10110                .iter()
10111                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10112            {
10113                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10114            }
10115            let start_time = base_datetime - chrono::Duration::days(90);
10116            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10117            add_result(&mut event_log, result);
10118
10119            if let Some(pb) = &pb {
10120                pb.inc(1);
10121            }
10122        }
10123
10124        // Generate events from H2R payroll runs
10125        for run in &hr.payroll_runs {
10126            // Use first matching payroll line item's employee, or fallback
10127            let employee_id = hr
10128                .payroll_line_items
10129                .iter()
10130                .find(|li| li.payroll_id == run.payroll_id)
10131                .map(|li| li.employee_id.as_str())
10132                .unwrap_or("EMP000");
10133            let docs = H2rDocuments::new(
10134                &run.payroll_id,
10135                employee_id,
10136                &run.company_code,
10137                run.total_gross,
10138                &ocpm_uuid_factory,
10139            )
10140            .with_time_entries(
10141                hr.time_entries
10142                    .iter()
10143                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10144                    .take(5)
10145                    .map(|t| t.entry_id.as_str())
10146                    .collect(),
10147            );
10148            let start_time = base_datetime - chrono::Duration::days(30);
10149            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10150            add_result(&mut event_log, result);
10151
10152            if let Some(pb) = &pb {
10153                pb.inc(1);
10154            }
10155        }
10156
10157        // Generate events from MFG production orders
10158        for order in &manufacturing.production_orders {
10159            let mut docs = MfgDocuments::new(
10160                &order.order_id,
10161                &order.material_id,
10162                &order.company_code,
10163                order.planned_quantity,
10164                &ocpm_uuid_factory,
10165            )
10166            .with_operations(
10167                order
10168                    .operations
10169                    .iter()
10170                    .map(|o| format!("OP-{:04}", o.operation_number))
10171                    .collect::<Vec<_>>()
10172                    .iter()
10173                    .map(std::string::String::as_str)
10174                    .collect(),
10175            );
10176            // Link quality inspection if available (via reference_id matching order_id)
10177            if let Some(insp) = manufacturing
10178                .quality_inspections
10179                .iter()
10180                .find(|i| i.reference_id == order.order_id)
10181            {
10182                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10183            }
10184            // Link cycle count if available (match by material_id in items)
10185            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10186                cc.items
10187                    .iter()
10188                    .any(|item| item.material_id == order.material_id)
10189            }) {
10190                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10191            }
10192            let start_time = base_datetime - chrono::Duration::days(60);
10193            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10194            add_result(&mut event_log, result);
10195
10196            if let Some(pb) = &pb {
10197                pb.inc(1);
10198            }
10199        }
10200
10201        // Generate events from Banking customers
10202        for customer in &banking.customers {
10203            let customer_id_str = customer.customer_id.to_string();
10204            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10205            // Link accounts (primary_owner_id matches customer_id)
10206            if let Some(account) = banking
10207                .accounts
10208                .iter()
10209                .find(|a| a.primary_owner_id == customer.customer_id)
10210            {
10211                let account_id_str = account.account_id.to_string();
10212                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10213                // Link transactions for this account
10214                let txn_strs: Vec<String> = banking
10215                    .transactions
10216                    .iter()
10217                    .filter(|t| t.account_id == account.account_id)
10218                    .take(10)
10219                    .map(|t| t.transaction_id.to_string())
10220                    .collect();
10221                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10222                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10223                    .transactions
10224                    .iter()
10225                    .filter(|t| t.account_id == account.account_id)
10226                    .take(10)
10227                    .map(|t| t.amount)
10228                    .collect();
10229                if !txn_ids.is_empty() {
10230                    docs = docs.with_transactions(txn_ids, txn_amounts);
10231                }
10232            }
10233            let start_time = base_datetime - chrono::Duration::days(180);
10234            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10235            add_result(&mut event_log, result);
10236
10237            if let Some(pb) = &pb {
10238                pb.inc(1);
10239            }
10240        }
10241
10242        // Generate events from Audit engagements
10243        for engagement in &audit.engagements {
10244            let engagement_id_str = engagement.engagement_id.to_string();
10245            let docs = AuditDocuments::new(
10246                &engagement_id_str,
10247                &engagement.client_entity_id,
10248                &ocpm_uuid_factory,
10249            )
10250            .with_workpapers(
10251                audit
10252                    .workpapers
10253                    .iter()
10254                    .filter(|w| w.engagement_id == engagement.engagement_id)
10255                    .take(10)
10256                    .map(|w| w.workpaper_id.to_string())
10257                    .collect::<Vec<_>>()
10258                    .iter()
10259                    .map(std::string::String::as_str)
10260                    .collect(),
10261            )
10262            .with_evidence(
10263                audit
10264                    .evidence
10265                    .iter()
10266                    .filter(|e| e.engagement_id == engagement.engagement_id)
10267                    .take(10)
10268                    .map(|e| e.evidence_id.to_string())
10269                    .collect::<Vec<_>>()
10270                    .iter()
10271                    .map(std::string::String::as_str)
10272                    .collect(),
10273            )
10274            .with_risks(
10275                audit
10276                    .risk_assessments
10277                    .iter()
10278                    .filter(|r| r.engagement_id == engagement.engagement_id)
10279                    .take(5)
10280                    .map(|r| r.risk_id.to_string())
10281                    .collect::<Vec<_>>()
10282                    .iter()
10283                    .map(std::string::String::as_str)
10284                    .collect(),
10285            )
10286            .with_findings(
10287                audit
10288                    .findings
10289                    .iter()
10290                    .filter(|f| f.engagement_id == engagement.engagement_id)
10291                    .take(5)
10292                    .map(|f| f.finding_id.to_string())
10293                    .collect::<Vec<_>>()
10294                    .iter()
10295                    .map(std::string::String::as_str)
10296                    .collect(),
10297            )
10298            .with_judgments(
10299                audit
10300                    .judgments
10301                    .iter()
10302                    .filter(|j| j.engagement_id == engagement.engagement_id)
10303                    .take(5)
10304                    .map(|j| j.judgment_id.to_string())
10305                    .collect::<Vec<_>>()
10306                    .iter()
10307                    .map(std::string::String::as_str)
10308                    .collect(),
10309            );
10310            let start_time = base_datetime - chrono::Duration::days(120);
10311            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10312            add_result(&mut event_log, result);
10313
10314            if let Some(pb) = &pb {
10315                pb.inc(1);
10316            }
10317        }
10318
10319        // Generate events from Bank Reconciliations
10320        for recon in &financial_reporting.bank_reconciliations {
10321            let docs = BankReconDocuments::new(
10322                &recon.reconciliation_id,
10323                &recon.bank_account_id,
10324                &recon.company_code,
10325                recon.bank_ending_balance,
10326                &ocpm_uuid_factory,
10327            )
10328            .with_statement_lines(
10329                recon
10330                    .statement_lines
10331                    .iter()
10332                    .take(20)
10333                    .map(|l| l.line_id.as_str())
10334                    .collect(),
10335            )
10336            .with_reconciling_items(
10337                recon
10338                    .reconciling_items
10339                    .iter()
10340                    .take(10)
10341                    .map(|i| i.item_id.as_str())
10342                    .collect(),
10343            );
10344            let start_time = base_datetime - chrono::Duration::days(30);
10345            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10346            add_result(&mut event_log, result);
10347
10348            if let Some(pb) = &pb {
10349                pb.inc(1);
10350            }
10351        }
10352
10353        // Compute process variants
10354        event_log.compute_variants();
10355
10356        let summary = event_log.summary();
10357
10358        if let Some(pb) = pb {
10359            pb.finish_with_message(format!(
10360                "Generated {} OCPM events, {} objects",
10361                summary.event_count, summary.object_count
10362            ));
10363        }
10364
10365        Ok(OcpmSnapshot {
10366            event_count: summary.event_count,
10367            object_count: summary.object_count,
10368            case_count: summary.case_count,
10369            event_log: Some(event_log),
10370        })
10371    }
10372
10373    /// Inject anomalies into journal entries.
10374    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10375        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10376
10377        // Read anomaly rates from config instead of using hardcoded values.
10378        // Priority: anomaly_injection config > fraud config > default 0.02
10379        let total_rate = if self.config.anomaly_injection.enabled {
10380            self.config.anomaly_injection.rates.total_rate
10381        } else if self.config.fraud.enabled {
10382            self.config.fraud.fraud_rate
10383        } else {
10384            0.02
10385        };
10386
10387        let fraud_rate = if self.config.anomaly_injection.enabled {
10388            self.config.anomaly_injection.rates.fraud_rate
10389        } else {
10390            AnomalyRateConfig::default().fraud_rate
10391        };
10392
10393        let error_rate = if self.config.anomaly_injection.enabled {
10394            self.config.anomaly_injection.rates.error_rate
10395        } else {
10396            AnomalyRateConfig::default().error_rate
10397        };
10398
10399        let process_issue_rate = if self.config.anomaly_injection.enabled {
10400            self.config.anomaly_injection.rates.process_rate
10401        } else {
10402            AnomalyRateConfig::default().process_issue_rate
10403        };
10404
10405        let anomaly_config = AnomalyInjectorConfig {
10406            rates: AnomalyRateConfig {
10407                total_rate,
10408                fraud_rate,
10409                error_rate,
10410                process_issue_rate,
10411                ..Default::default()
10412            },
10413            seed: self.seed + 5000,
10414            ..Default::default()
10415        };
10416
10417        let mut injector = AnomalyInjector::new(anomaly_config);
10418        let result = injector.process_entries(entries);
10419
10420        if let Some(pb) = &pb {
10421            pb.inc(entries.len() as u64);
10422            pb.finish_with_message("Anomaly injection complete");
10423        }
10424
10425        let mut by_type = HashMap::new();
10426        for label in &result.labels {
10427            *by_type
10428                .entry(format!("{:?}", label.anomaly_type))
10429                .or_insert(0) += 1;
10430        }
10431
10432        Ok(AnomalyLabels {
10433            labels: result.labels,
10434            summary: Some(result.summary),
10435            by_type,
10436        })
10437    }
10438
10439    /// Validate journal entries using running balance tracker.
10440    ///
10441    /// Applies all entries to the balance tracker and validates:
10442    /// - Each entry is internally balanced (debits = credits)
10443    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10444    ///
10445    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10446    /// excluded from balance validation as they may be intentionally unbalanced.
10447    fn validate_journal_entries(
10448        &mut self,
10449        entries: &[JournalEntry],
10450    ) -> SynthResult<BalanceValidationResult> {
10451        // Filter out entries with human errors as they may be intentionally unbalanced
10452        let clean_entries: Vec<&JournalEntry> = entries
10453            .iter()
10454            .filter(|e| {
10455                e.header
10456                    .header_text
10457                    .as_ref()
10458                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10459                    .unwrap_or(true)
10460            })
10461            .collect();
10462
10463        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10464
10465        // Configure tracker to not fail on errors (collect them instead)
10466        let config = BalanceTrackerConfig {
10467            validate_on_each_entry: false,   // We'll validate at the end
10468            track_history: false,            // Skip history for performance
10469            fail_on_validation_error: false, // Collect errors, don't fail
10470            ..Default::default()
10471        };
10472        let validation_currency = self
10473            .config
10474            .companies
10475            .first()
10476            .map(|c| c.currency.clone())
10477            .unwrap_or_else(|| "USD".to_string());
10478
10479        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10480
10481        // Apply clean entries (without human errors)
10482        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10483        let errors = tracker.apply_entries(&clean_refs);
10484
10485        if let Some(pb) = &pb {
10486            pb.inc(entries.len() as u64);
10487        }
10488
10489        // Check if any entries were unbalanced
10490        // Note: When fail_on_validation_error is false, errors are stored in tracker
10491        let has_unbalanced = tracker
10492            .get_validation_errors()
10493            .iter()
10494            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10495
10496        // Validate balance sheet for each company
10497        // Include both returned errors and collected validation errors
10498        let mut all_errors = errors;
10499        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10500        let company_codes: Vec<String> = self
10501            .config
10502            .companies
10503            .iter()
10504            .map(|c| c.code.clone())
10505            .collect();
10506
10507        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10508            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10509            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10510
10511        for company_code in &company_codes {
10512            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10513                all_errors.push(e);
10514            }
10515        }
10516
10517        // Get statistics after all mutable operations are done
10518        let stats = tracker.get_statistics();
10519
10520        // Determine if balanced overall
10521        let is_balanced = all_errors.is_empty();
10522
10523        if let Some(pb) = pb {
10524            let msg = if is_balanced {
10525                "Balance validation passed"
10526            } else {
10527                "Balance validation completed with errors"
10528            };
10529            pb.finish_with_message(msg);
10530        }
10531
10532        Ok(BalanceValidationResult {
10533            validated: true,
10534            is_balanced,
10535            entries_processed: stats.entries_processed,
10536            total_debits: stats.total_debits,
10537            total_credits: stats.total_credits,
10538            accounts_tracked: stats.accounts_tracked,
10539            companies_tracked: stats.companies_tracked,
10540            validation_errors: all_errors,
10541            has_unbalanced_entries: has_unbalanced,
10542        })
10543    }
10544
10545    /// Inject data quality variations into journal entries.
10546    ///
10547    /// Applies typos, missing values, and format variations to make
10548    /// the synthetic data more realistic for testing data cleaning pipelines.
10549    fn inject_data_quality(
10550        &mut self,
10551        entries: &mut [JournalEntry],
10552    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10553        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10554
10555        // Build config from user-specified schema settings when data_quality is enabled;
10556        // otherwise fall back to the low-rate minimal() preset.
10557        let config = if self.config.data_quality.enabled {
10558            let dq = &self.config.data_quality;
10559            DataQualityConfig {
10560                enable_missing_values: dq.missing_values.enabled,
10561                missing_values: datasynth_generators::MissingValueConfig {
10562                    global_rate: dq.effective_missing_rate(),
10563                    ..Default::default()
10564                },
10565                enable_format_variations: dq.format_variations.enabled,
10566                format_variations: datasynth_generators::FormatVariationConfig {
10567                    date_variation_rate: dq.format_variations.dates.rate,
10568                    amount_variation_rate: dq.format_variations.amounts.rate,
10569                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10570                    ..Default::default()
10571                },
10572                enable_duplicates: dq.duplicates.enabled,
10573                duplicates: datasynth_generators::DuplicateConfig {
10574                    duplicate_rate: dq.effective_duplicate_rate(),
10575                    ..Default::default()
10576                },
10577                enable_typos: dq.typos.enabled,
10578                typos: datasynth_generators::TypoConfig {
10579                    char_error_rate: dq.effective_typo_rate(),
10580                    ..Default::default()
10581                },
10582                enable_encoding_issues: dq.encoding_issues.enabled,
10583                encoding_issue_rate: dq.encoding_issues.rate,
10584                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10585                track_statistics: true,
10586            }
10587        } else {
10588            DataQualityConfig::minimal()
10589        };
10590        let mut injector = DataQualityInjector::new(config);
10591
10592        // Wire country pack for locale-aware format baselines
10593        injector.set_country_pack(self.primary_pack().clone());
10594
10595        // Build context for missing value decisions
10596        let context = HashMap::new();
10597
10598        for entry in entries.iter_mut() {
10599            // Process header_text field (common target for typos)
10600            if let Some(text) = &entry.header.header_text {
10601                let processed = injector.process_text_field(
10602                    "header_text",
10603                    text,
10604                    &entry.header.document_id.to_string(),
10605                    &context,
10606                );
10607                match processed {
10608                    Some(new_text) if new_text != *text => {
10609                        entry.header.header_text = Some(new_text);
10610                    }
10611                    None => {
10612                        entry.header.header_text = None; // Missing value
10613                    }
10614                    _ => {}
10615                }
10616            }
10617
10618            // Process reference field
10619            if let Some(ref_text) = &entry.header.reference {
10620                let processed = injector.process_text_field(
10621                    "reference",
10622                    ref_text,
10623                    &entry.header.document_id.to_string(),
10624                    &context,
10625                );
10626                match processed {
10627                    Some(new_text) if new_text != *ref_text => {
10628                        entry.header.reference = Some(new_text);
10629                    }
10630                    None => {
10631                        entry.header.reference = None;
10632                    }
10633                    _ => {}
10634                }
10635            }
10636
10637            // Process user_persona field (potential for typos in user IDs)
10638            let user_persona = entry.header.user_persona.clone();
10639            if let Some(processed) = injector.process_text_field(
10640                "user_persona",
10641                &user_persona,
10642                &entry.header.document_id.to_string(),
10643                &context,
10644            ) {
10645                if processed != user_persona {
10646                    entry.header.user_persona = processed;
10647                }
10648            }
10649
10650            // Process line items
10651            for line in &mut entry.lines {
10652                // Process line description if present
10653                if let Some(ref text) = line.line_text {
10654                    let processed = injector.process_text_field(
10655                        "line_text",
10656                        text,
10657                        &entry.header.document_id.to_string(),
10658                        &context,
10659                    );
10660                    match processed {
10661                        Some(new_text) if new_text != *text => {
10662                            line.line_text = Some(new_text);
10663                        }
10664                        None => {
10665                            line.line_text = None;
10666                        }
10667                        _ => {}
10668                    }
10669                }
10670
10671                // Process cost_center if present
10672                if let Some(cc) = &line.cost_center {
10673                    let processed = injector.process_text_field(
10674                        "cost_center",
10675                        cc,
10676                        &entry.header.document_id.to_string(),
10677                        &context,
10678                    );
10679                    match processed {
10680                        Some(new_cc) if new_cc != *cc => {
10681                            line.cost_center = Some(new_cc);
10682                        }
10683                        None => {
10684                            line.cost_center = None;
10685                        }
10686                        _ => {}
10687                    }
10688                }
10689            }
10690
10691            if let Some(pb) = &pb {
10692                pb.inc(1);
10693            }
10694        }
10695
10696        if let Some(pb) = pb {
10697            pb.finish_with_message("Data quality injection complete");
10698        }
10699
10700        let quality_issues = injector.issues().to_vec();
10701        Ok((injector.stats().clone(), quality_issues))
10702    }
10703
10704    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10705    ///
10706    /// Creates complete audit documentation for each company in the configuration,
10707    /// following ISA standards:
10708    /// - ISA 210/220: Engagement acceptance and terms
10709    /// - ISA 230: Audit documentation (workpapers)
10710    /// - ISA 265: Control deficiencies (findings)
10711    /// - ISA 315/330: Risk assessment and response
10712    /// - ISA 500: Audit evidence
10713    /// - ISA 200: Professional judgment
10714    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10715        // Check if FSM-driven audit generation is enabled
10716        let use_fsm = self
10717            .config
10718            .audit
10719            .fsm
10720            .as_ref()
10721            .map(|f| f.enabled)
10722            .unwrap_or(false);
10723
10724        if use_fsm {
10725            return self.generate_audit_data_with_fsm(entries);
10726        }
10727
10728        // --- Legacy (non-FSM) audit generation follows ---
10729        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10730            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10731        let fiscal_year = start_date.year() as u16;
10732        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10733
10734        // Calculate rough total revenue from entries for materiality
10735        let total_revenue: rust_decimal::Decimal = entries
10736            .iter()
10737            .flat_map(|e| e.lines.iter())
10738            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10739            .map(|l| l.credit_amount)
10740            .sum();
10741
10742        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10743        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10744
10745        let mut snapshot = AuditSnapshot::default();
10746
10747        // Initialize generators
10748        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10749        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10750        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10751        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10752        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10753        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10754        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10755        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10756        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10757        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10758        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10759        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10760
10761        // Get list of accounts from CoA for risk assessment
10762        let accounts: Vec<String> = self
10763            .coa
10764            .as_ref()
10765            .map(|coa| {
10766                coa.get_postable_accounts()
10767                    .iter()
10768                    .map(|acc| acc.account_code().to_string())
10769                    .collect()
10770            })
10771            .unwrap_or_default();
10772
10773        // Generate engagements for each company
10774        for (i, company) in self.config.companies.iter().enumerate() {
10775            // Calculate company-specific revenue (proportional to volume weight)
10776            let company_revenue = total_revenue
10777                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10778
10779            // Generate engagements for this company
10780            let engagements_for_company =
10781                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10782            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10783                1
10784            } else {
10785                0
10786            };
10787
10788            for _eng_idx in 0..(engagements_for_company + extra) {
10789                // Generate the engagement
10790                let mut engagement = engagement_gen.generate_engagement(
10791                    &company.code,
10792                    &company.name,
10793                    fiscal_year,
10794                    period_end,
10795                    company_revenue,
10796                    None, // Use default engagement type
10797                );
10798
10799                // Replace synthetic team IDs with real employee IDs from master data
10800                if !self.master_data.employees.is_empty() {
10801                    let emp_count = self.master_data.employees.len();
10802                    // Use employee IDs deterministically based on engagement index
10803                    let base = (i * 10 + _eng_idx) % emp_count;
10804                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10805                        .employee_id
10806                        .clone();
10807                    engagement.engagement_manager_id = self.master_data.employees
10808                        [(base + 1) % emp_count]
10809                        .employee_id
10810                        .clone();
10811                    let real_team: Vec<String> = engagement
10812                        .team_member_ids
10813                        .iter()
10814                        .enumerate()
10815                        .map(|(j, _)| {
10816                            self.master_data.employees[(base + 2 + j) % emp_count]
10817                                .employee_id
10818                                .clone()
10819                        })
10820                        .collect();
10821                    engagement.team_member_ids = real_team;
10822                }
10823
10824                if let Some(pb) = &pb {
10825                    pb.inc(1);
10826                }
10827
10828                // Get team members from the engagement
10829                let team_members: Vec<String> = engagement.team_member_ids.clone();
10830
10831                // Generate workpapers for the engagement
10832                let workpapers =
10833                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10834
10835                for wp in &workpapers {
10836                    if let Some(pb) = &pb {
10837                        pb.inc(1);
10838                    }
10839
10840                    // Generate evidence for each workpaper
10841                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10842                        wp,
10843                        &team_members,
10844                        wp.preparer_date,
10845                    );
10846
10847                    for _ in &evidence {
10848                        if let Some(pb) = &pb {
10849                            pb.inc(1);
10850                        }
10851                    }
10852
10853                    snapshot.evidence.extend(evidence);
10854                }
10855
10856                // Generate risk assessments for the engagement
10857                let risks =
10858                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10859
10860                for _ in &risks {
10861                    if let Some(pb) = &pb {
10862                        pb.inc(1);
10863                    }
10864                }
10865                snapshot.risk_assessments.extend(risks);
10866
10867                // Generate findings for the engagement
10868                let findings = finding_gen.generate_findings_for_engagement(
10869                    &engagement,
10870                    &workpapers,
10871                    &team_members,
10872                );
10873
10874                for _ in &findings {
10875                    if let Some(pb) = &pb {
10876                        pb.inc(1);
10877                    }
10878                }
10879                snapshot.findings.extend(findings);
10880
10881                // Generate professional judgments for the engagement
10882                let judgments =
10883                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10884
10885                for _ in &judgments {
10886                    if let Some(pb) = &pb {
10887                        pb.inc(1);
10888                    }
10889                }
10890                snapshot.judgments.extend(judgments);
10891
10892                // ISA 505: External confirmations and responses
10893                let (confs, resps) =
10894                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10895                snapshot.confirmations.extend(confs);
10896                snapshot.confirmation_responses.extend(resps);
10897
10898                // ISA 330: Procedure steps per workpaper
10899                let team_pairs: Vec<(String, String)> = team_members
10900                    .iter()
10901                    .map(|id| {
10902                        let name = self
10903                            .master_data
10904                            .employees
10905                            .iter()
10906                            .find(|e| e.employee_id == *id)
10907                            .map(|e| e.display_name.clone())
10908                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10909                        (id.clone(), name)
10910                    })
10911                    .collect();
10912                for wp in &workpapers {
10913                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10914                    snapshot.procedure_steps.extend(steps);
10915                }
10916
10917                // ISA 530: Samples per workpaper
10918                for wp in &workpapers {
10919                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10920                        snapshot.samples.push(sample);
10921                    }
10922                }
10923
10924                // ISA 520: Analytical procedures
10925                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10926                snapshot.analytical_results.extend(analytical);
10927
10928                // ISA 610: Internal audit function and reports
10929                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10930                snapshot.ia_functions.push(ia_func);
10931                snapshot.ia_reports.extend(ia_reports);
10932
10933                // ISA 550: Related parties and transactions
10934                let vendor_names: Vec<String> = self
10935                    .master_data
10936                    .vendors
10937                    .iter()
10938                    .map(|v| v.name.clone())
10939                    .collect();
10940                let customer_names: Vec<String> = self
10941                    .master_data
10942                    .customers
10943                    .iter()
10944                    .map(|c| c.name.clone())
10945                    .collect();
10946                let (parties, rp_txns) =
10947                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10948                snapshot.related_parties.extend(parties);
10949                snapshot.related_party_transactions.extend(rp_txns);
10950
10951                // Add workpapers after findings since findings need them
10952                snapshot.workpapers.extend(workpapers);
10953
10954                // Generate audit scope record for this engagement (one per engagement)
10955                {
10956                    let scope_id = format!(
10957                        "SCOPE-{}-{}",
10958                        engagement.engagement_id.simple(),
10959                        &engagement.client_entity_id
10960                    );
10961                    let scope = datasynth_core::models::audit::AuditScope::new(
10962                        scope_id.clone(),
10963                        engagement.engagement_id.to_string(),
10964                        engagement.client_entity_id.clone(),
10965                        engagement.materiality,
10966                    );
10967                    // Wire scope_id back to engagement
10968                    let mut eng = engagement;
10969                    eng.scope_id = Some(scope_id);
10970                    snapshot.audit_scopes.push(scope);
10971                    snapshot.engagements.push(eng);
10972                }
10973            }
10974        }
10975
10976        // ----------------------------------------------------------------
10977        // ISA 600: Group audit — component auditors, plan, instructions, reports
10978        // ----------------------------------------------------------------
10979        if self.config.companies.len() > 1 {
10980            // Use materiality from the first engagement if available, otherwise
10981            // derive a reasonable figure from total revenue.
10982            let group_materiality = snapshot
10983                .engagements
10984                .first()
10985                .map(|e| e.materiality)
10986                .unwrap_or_else(|| {
10987                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10988                    total_revenue * pct
10989                });
10990
10991            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10992            let group_engagement_id = snapshot
10993                .engagements
10994                .first()
10995                .map(|e| e.engagement_id.to_string())
10996                .unwrap_or_else(|| "GROUP-ENG".to_string());
10997
10998            let component_snapshot = component_gen.generate(
10999                &self.config.companies,
11000                group_materiality,
11001                &group_engagement_id,
11002                period_end,
11003            );
11004
11005            snapshot.component_auditors = component_snapshot.component_auditors;
11006            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11007            snapshot.component_instructions = component_snapshot.component_instructions;
11008            snapshot.component_reports = component_snapshot.component_reports;
11009
11010            info!(
11011                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11012                snapshot.component_auditors.len(),
11013                snapshot.component_instructions.len(),
11014                snapshot.component_reports.len(),
11015            );
11016        }
11017
11018        // ----------------------------------------------------------------
11019        // ISA 210: Engagement letters — one per engagement
11020        // ----------------------------------------------------------------
11021        {
11022            let applicable_framework = self
11023                .config
11024                .accounting_standards
11025                .framework
11026                .as_ref()
11027                .map(|f| format!("{f:?}"))
11028                .unwrap_or_else(|| "IFRS".to_string());
11029
11030            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11031            let entity_count = self.config.companies.len();
11032
11033            for engagement in &snapshot.engagements {
11034                let company = self
11035                    .config
11036                    .companies
11037                    .iter()
11038                    .find(|c| c.code == engagement.client_entity_id);
11039                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11040                let letter_date = engagement.planning_start;
11041                let letter = letter_gen.generate(
11042                    &engagement.engagement_id.to_string(),
11043                    &engagement.client_name,
11044                    entity_count,
11045                    engagement.period_end_date,
11046                    currency,
11047                    &applicable_framework,
11048                    letter_date,
11049                );
11050                snapshot.engagement_letters.push(letter);
11051            }
11052
11053            info!(
11054                "ISA 210 engagement letters: {} generated",
11055                snapshot.engagement_letters.len()
11056            );
11057        }
11058
11059        // ----------------------------------------------------------------
11060        // ISA 560 / IAS 10: Subsequent events
11061        // ----------------------------------------------------------------
11062        {
11063            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11064            let entity_codes: Vec<String> = self
11065                .config
11066                .companies
11067                .iter()
11068                .map(|c| c.code.clone())
11069                .collect();
11070            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11071            info!(
11072                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11073                subsequent.len(),
11074                subsequent
11075                    .iter()
11076                    .filter(|e| matches!(
11077                        e.classification,
11078                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11079                    ))
11080                    .count(),
11081                subsequent
11082                    .iter()
11083                    .filter(|e| matches!(
11084                        e.classification,
11085                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11086                    ))
11087                    .count(),
11088            );
11089            snapshot.subsequent_events = subsequent;
11090        }
11091
11092        // ----------------------------------------------------------------
11093        // ISA 402: Service organization controls
11094        // ----------------------------------------------------------------
11095        {
11096            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11097            let entity_codes: Vec<String> = self
11098                .config
11099                .companies
11100                .iter()
11101                .map(|c| c.code.clone())
11102                .collect();
11103            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11104            info!(
11105                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11106                soc_snapshot.service_organizations.len(),
11107                soc_snapshot.soc_reports.len(),
11108                soc_snapshot.user_entity_controls.len(),
11109            );
11110            snapshot.service_organizations = soc_snapshot.service_organizations;
11111            snapshot.soc_reports = soc_snapshot.soc_reports;
11112            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11113        }
11114
11115        // ----------------------------------------------------------------
11116        // ISA 570: Going concern assessments
11117        // ----------------------------------------------------------------
11118        {
11119            use datasynth_generators::audit::going_concern_generator::{
11120                GoingConcernGenerator, GoingConcernInput,
11121            };
11122            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11123            let entity_codes: Vec<String> = self
11124                .config
11125                .companies
11126                .iter()
11127                .map(|c| c.code.clone())
11128                .collect();
11129            // Assessment date = period end + 75 days (typical sign-off window).
11130            let assessment_date = period_end + chrono::Duration::days(75);
11131            let period_label = format!("FY{}", period_end.year());
11132
11133            // Build financial inputs from actual journal entries.
11134            //
11135            // We derive approximate P&L, working capital, and operating cash flow
11136            // by aggregating GL account balances from the journal entry population.
11137            // Account ranges used (standard chart):
11138            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11139            //   Expenses:        6xxx (debit-normal)
11140            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11141            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11142            //   Operating CF:    net income adjusted for D&A (rough proxy)
11143            let gc_inputs: Vec<GoingConcernInput> = self
11144                .config
11145                .companies
11146                .iter()
11147                .map(|company| {
11148                    let code = &company.code;
11149                    let mut revenue = rust_decimal::Decimal::ZERO;
11150                    let mut expenses = rust_decimal::Decimal::ZERO;
11151                    let mut current_assets = rust_decimal::Decimal::ZERO;
11152                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11153                    let mut total_debt = rust_decimal::Decimal::ZERO;
11154
11155                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11156                        for line in &je.lines {
11157                            let acct = line.gl_account.as_str();
11158                            let net = line.debit_amount - line.credit_amount;
11159                            if acct.starts_with('4') {
11160                                // Revenue accounts: credit-normal, so negative net = revenue earned
11161                                revenue -= net;
11162                            } else if acct.starts_with('6') {
11163                                // Expense accounts: debit-normal
11164                                expenses += net;
11165                            }
11166                            // Balance sheet accounts for working capital
11167                            if acct.starts_with('1') {
11168                                // Current asset accounts (1000–1499)
11169                                if let Ok(n) = acct.parse::<u32>() {
11170                                    if (1000..=1499).contains(&n) {
11171                                        current_assets += net;
11172                                    }
11173                                }
11174                            } else if acct.starts_with('2') {
11175                                if let Ok(n) = acct.parse::<u32>() {
11176                                    if (2000..=2499).contains(&n) {
11177                                        // Current liabilities
11178                                        current_liabs -= net; // credit-normal
11179                                    } else if (2500..=2999).contains(&n) {
11180                                        // Long-term debt
11181                                        total_debt -= net;
11182                                    }
11183                                }
11184                            }
11185                        }
11186                    }
11187
11188                    let net_income = revenue - expenses;
11189                    let working_capital = current_assets - current_liabs;
11190                    // Rough operating CF proxy: net income (full accrual CF calculation
11191                    // is done separately in the cash flow statement generator)
11192                    let operating_cash_flow = net_income;
11193
11194                    GoingConcernInput {
11195                        entity_code: code.clone(),
11196                        net_income,
11197                        working_capital,
11198                        operating_cash_flow,
11199                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11200                        assessment_date,
11201                    }
11202                })
11203                .collect();
11204
11205            let assessments = if gc_inputs.is_empty() {
11206                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11207            } else {
11208                gc_gen.generate_for_entities_with_inputs(
11209                    &entity_codes,
11210                    &gc_inputs,
11211                    assessment_date,
11212                    &period_label,
11213                )
11214            };
11215            info!(
11216                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11217                assessments.len(),
11218                assessments.iter().filter(|a| matches!(
11219                    a.auditor_conclusion,
11220                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11221                )).count(),
11222                assessments.iter().filter(|a| matches!(
11223                    a.auditor_conclusion,
11224                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11225                )).count(),
11226                assessments.iter().filter(|a| matches!(
11227                    a.auditor_conclusion,
11228                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11229                )).count(),
11230            );
11231            snapshot.going_concern_assessments = assessments;
11232        }
11233
11234        // ----------------------------------------------------------------
11235        // ISA 540: Accounting estimates
11236        // ----------------------------------------------------------------
11237        {
11238            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11239            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11240            let entity_codes: Vec<String> = self
11241                .config
11242                .companies
11243                .iter()
11244                .map(|c| c.code.clone())
11245                .collect();
11246            let estimates = est_gen.generate_for_entities(&entity_codes);
11247            info!(
11248                "ISA 540 accounting estimates: {} estimates across {} entities \
11249                 ({} with retrospective reviews, {} with auditor point estimates)",
11250                estimates.len(),
11251                entity_codes.len(),
11252                estimates
11253                    .iter()
11254                    .filter(|e| e.retrospective_review.is_some())
11255                    .count(),
11256                estimates
11257                    .iter()
11258                    .filter(|e| e.auditor_point_estimate.is_some())
11259                    .count(),
11260            );
11261            snapshot.accounting_estimates = estimates;
11262        }
11263
11264        // ----------------------------------------------------------------
11265        // ISA 700/701/705/706: Audit opinions (one per engagement)
11266        // ----------------------------------------------------------------
11267        {
11268            use datasynth_generators::audit::audit_opinion_generator::{
11269                AuditOpinionGenerator, AuditOpinionInput,
11270            };
11271
11272            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11273
11274            // Build inputs — one per engagement, linking findings and going concern.
11275            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11276                .engagements
11277                .iter()
11278                .map(|eng| {
11279                    // Collect findings for this engagement.
11280                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11281                        .findings
11282                        .iter()
11283                        .filter(|f| f.engagement_id == eng.engagement_id)
11284                        .cloned()
11285                        .collect();
11286
11287                    // Going concern for this entity.
11288                    let gc = snapshot
11289                        .going_concern_assessments
11290                        .iter()
11291                        .find(|g| g.entity_code == eng.client_entity_id)
11292                        .cloned();
11293
11294                    // Component reports relevant to this engagement.
11295                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11296                        snapshot.component_reports.clone();
11297
11298                    let auditor = self
11299                        .master_data
11300                        .employees
11301                        .first()
11302                        .map(|e| e.display_name.clone())
11303                        .unwrap_or_else(|| "Global Audit LLP".into());
11304
11305                    let partner = self
11306                        .master_data
11307                        .employees
11308                        .get(1)
11309                        .map(|e| e.display_name.clone())
11310                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11311
11312                    AuditOpinionInput {
11313                        entity_code: eng.client_entity_id.clone(),
11314                        entity_name: eng.client_name.clone(),
11315                        engagement_id: eng.engagement_id,
11316                        period_end: eng.period_end_date,
11317                        findings: eng_findings,
11318                        going_concern: gc,
11319                        component_reports: comp_reports,
11320                        // Mark as US-listed when audit standards include PCAOB.
11321                        is_us_listed: {
11322                            let fw = &self.config.audit_standards.isa_compliance.framework;
11323                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11324                        },
11325                        auditor_name: auditor,
11326                        engagement_partner: partner,
11327                    }
11328                })
11329                .collect();
11330
11331            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11332
11333            for go in &generated_opinions {
11334                snapshot
11335                    .key_audit_matters
11336                    .extend(go.key_audit_matters.clone());
11337            }
11338            snapshot.audit_opinions = generated_opinions
11339                .into_iter()
11340                .map(|go| go.opinion)
11341                .collect();
11342
11343            info!(
11344                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11345                snapshot.audit_opinions.len(),
11346                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11347                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11348                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11349                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11350            );
11351        }
11352
11353        // ----------------------------------------------------------------
11354        // SOX 302 / 404 assessments
11355        // ----------------------------------------------------------------
11356        {
11357            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11358
11359            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11360
11361            for (i, company) in self.config.companies.iter().enumerate() {
11362                // Collect findings for this company's engagements.
11363                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11364                    .engagements
11365                    .iter()
11366                    .filter(|e| e.client_entity_id == company.code)
11367                    .map(|e| e.engagement_id)
11368                    .collect();
11369
11370                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11371                    .findings
11372                    .iter()
11373                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11374                    .cloned()
11375                    .collect();
11376
11377                // Derive executive names from employee list.
11378                let emp_count = self.master_data.employees.len();
11379                let ceo_name = if emp_count > 0 {
11380                    self.master_data.employees[i % emp_count]
11381                        .display_name
11382                        .clone()
11383                } else {
11384                    format!("CEO of {}", company.name)
11385                };
11386                let cfo_name = if emp_count > 1 {
11387                    self.master_data.employees[(i + 1) % emp_count]
11388                        .display_name
11389                        .clone()
11390                } else {
11391                    format!("CFO of {}", company.name)
11392                };
11393
11394                // Use engagement materiality if available.
11395                let materiality = snapshot
11396                    .engagements
11397                    .iter()
11398                    .find(|e| e.client_entity_id == company.code)
11399                    .map(|e| e.materiality)
11400                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11401
11402                let input = SoxGeneratorInput {
11403                    company_code: company.code.clone(),
11404                    company_name: company.name.clone(),
11405                    fiscal_year,
11406                    period_end,
11407                    findings: company_findings,
11408                    ceo_name,
11409                    cfo_name,
11410                    materiality_threshold: materiality,
11411                    revenue_percent: rust_decimal::Decimal::from(100),
11412                    assets_percent: rust_decimal::Decimal::from(100),
11413                    significant_accounts: vec![
11414                        "Revenue".into(),
11415                        "Accounts Receivable".into(),
11416                        "Inventory".into(),
11417                        "Fixed Assets".into(),
11418                        "Accounts Payable".into(),
11419                    ],
11420                };
11421
11422                let (certs, assessment) = sox_gen.generate(&input);
11423                snapshot.sox_302_certifications.extend(certs);
11424                snapshot.sox_404_assessments.push(assessment);
11425            }
11426
11427            info!(
11428                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11429                snapshot.sox_302_certifications.len(),
11430                snapshot.sox_404_assessments.len(),
11431                snapshot
11432                    .sox_404_assessments
11433                    .iter()
11434                    .filter(|a| a.icfr_effective)
11435                    .count(),
11436                snapshot
11437                    .sox_404_assessments
11438                    .iter()
11439                    .filter(|a| !a.icfr_effective)
11440                    .count(),
11441            );
11442        }
11443
11444        // ----------------------------------------------------------------
11445        // ISA 320: Materiality calculations (one per entity)
11446        // ----------------------------------------------------------------
11447        {
11448            use datasynth_generators::audit::materiality_generator::{
11449                MaterialityGenerator, MaterialityInput,
11450            };
11451
11452            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11453
11454            // Compute per-company financials from JEs.
11455            // Asset accounts start with '1', revenue with '4',
11456            // expense accounts with '5' or '6'.
11457            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11458
11459            for company in &self.config.companies {
11460                let company_code = company.code.clone();
11461
11462                // Revenue: credit-side entries on 4xxx accounts
11463                let company_revenue: rust_decimal::Decimal = entries
11464                    .iter()
11465                    .filter(|e| e.company_code() == company_code)
11466                    .flat_map(|e| e.lines.iter())
11467                    .filter(|l| l.account_code.starts_with('4'))
11468                    .map(|l| l.credit_amount)
11469                    .sum();
11470
11471                // Total assets: debit balances on 1xxx accounts
11472                let total_assets: rust_decimal::Decimal = entries
11473                    .iter()
11474                    .filter(|e| e.company_code() == company_code)
11475                    .flat_map(|e| e.lines.iter())
11476                    .filter(|l| l.account_code.starts_with('1'))
11477                    .map(|l| l.debit_amount)
11478                    .sum();
11479
11480                // Expenses: debit-side entries on 5xxx/6xxx accounts
11481                let total_expenses: rust_decimal::Decimal = entries
11482                    .iter()
11483                    .filter(|e| e.company_code() == company_code)
11484                    .flat_map(|e| e.lines.iter())
11485                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11486                    .map(|l| l.debit_amount)
11487                    .sum();
11488
11489                // Equity: credit balances on 3xxx accounts
11490                let equity: rust_decimal::Decimal = entries
11491                    .iter()
11492                    .filter(|e| e.company_code() == company_code)
11493                    .flat_map(|e| e.lines.iter())
11494                    .filter(|l| l.account_code.starts_with('3'))
11495                    .map(|l| l.credit_amount)
11496                    .sum();
11497
11498                let pretax_income = company_revenue - total_expenses;
11499
11500                // If no company-specific data, fall back to proportional share
11501                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11502                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11503                        .unwrap_or(rust_decimal::Decimal::ONE);
11504                    (
11505                        total_revenue * w,
11506                        total_revenue * w * rust_decimal::Decimal::from(3),
11507                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11508                        total_revenue * w * rust_decimal::Decimal::from(2),
11509                    )
11510                } else {
11511                    (company_revenue, total_assets, pretax_income, equity)
11512                };
11513
11514                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11515
11516                materiality_inputs.push(MaterialityInput {
11517                    entity_code: company_code,
11518                    period: format!("FY{}", fiscal_year),
11519                    revenue: rev,
11520                    pretax_income: pti,
11521                    total_assets: assets,
11522                    equity: eq,
11523                    gross_profit,
11524                });
11525            }
11526
11527            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11528
11529            info!(
11530                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11531                 {} total assets, {} equity benchmarks)",
11532                snapshot.materiality_calculations.len(),
11533                snapshot
11534                    .materiality_calculations
11535                    .iter()
11536                    .filter(|m| matches!(
11537                        m.benchmark,
11538                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11539                    ))
11540                    .count(),
11541                snapshot
11542                    .materiality_calculations
11543                    .iter()
11544                    .filter(|m| matches!(
11545                        m.benchmark,
11546                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11547                    ))
11548                    .count(),
11549                snapshot
11550                    .materiality_calculations
11551                    .iter()
11552                    .filter(|m| matches!(
11553                        m.benchmark,
11554                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11555                    ))
11556                    .count(),
11557                snapshot
11558                    .materiality_calculations
11559                    .iter()
11560                    .filter(|m| matches!(
11561                        m.benchmark,
11562                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11563                    ))
11564                    .count(),
11565            );
11566        }
11567
11568        // ----------------------------------------------------------------
11569        // ISA 315: Combined Risk Assessments (per entity, per account area)
11570        // ----------------------------------------------------------------
11571        {
11572            use datasynth_generators::audit::cra_generator::CraGenerator;
11573
11574            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11575
11576            // Build entity → scope_id map from already-generated scopes
11577            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11578                .audit_scopes
11579                .iter()
11580                .map(|s| (s.entity_code.clone(), s.id.clone()))
11581                .collect();
11582
11583            for company in &self.config.companies {
11584                let cras = cra_gen.generate_for_entity(&company.code, None);
11585                let scope_id = entity_scope_map.get(&company.code).cloned();
11586                let cras_with_scope: Vec<_> = cras
11587                    .into_iter()
11588                    .map(|mut cra| {
11589                        cra.scope_id = scope_id.clone();
11590                        cra
11591                    })
11592                    .collect();
11593                snapshot.combined_risk_assessments.extend(cras_with_scope);
11594            }
11595
11596            let significant_count = snapshot
11597                .combined_risk_assessments
11598                .iter()
11599                .filter(|c| c.significant_risk)
11600                .count();
11601            let high_cra_count = snapshot
11602                .combined_risk_assessments
11603                .iter()
11604                .filter(|c| {
11605                    matches!(
11606                        c.combined_risk,
11607                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11608                    )
11609                })
11610                .count();
11611
11612            info!(
11613                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11614                snapshot.combined_risk_assessments.len(),
11615                significant_count,
11616                high_cra_count,
11617            );
11618        }
11619
11620        // ----------------------------------------------------------------
11621        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11622        // ----------------------------------------------------------------
11623        {
11624            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11625
11626            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11627
11628            // Group CRAs by entity and use per-entity tolerable error from materiality
11629            for company in &self.config.companies {
11630                let entity_code = company.code.clone();
11631
11632                // Find tolerable error for this entity (= performance materiality)
11633                let tolerable_error = snapshot
11634                    .materiality_calculations
11635                    .iter()
11636                    .find(|m| m.entity_code == entity_code)
11637                    .map(|m| m.tolerable_error);
11638
11639                // Collect CRAs for this entity
11640                let entity_cras: Vec<_> = snapshot
11641                    .combined_risk_assessments
11642                    .iter()
11643                    .filter(|c| c.entity_code == entity_code)
11644                    .cloned()
11645                    .collect();
11646
11647                if !entity_cras.is_empty() {
11648                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11649                    snapshot.sampling_plans.extend(plans);
11650                    snapshot.sampled_items.extend(items);
11651                }
11652            }
11653
11654            let misstatement_count = snapshot
11655                .sampled_items
11656                .iter()
11657                .filter(|i| i.misstatement_found)
11658                .count();
11659
11660            info!(
11661                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11662                snapshot.sampling_plans.len(),
11663                snapshot.sampled_items.len(),
11664                misstatement_count,
11665            );
11666        }
11667
11668        // ----------------------------------------------------------------
11669        // ISA 315: Significant Classes of Transactions (SCOTS)
11670        // ----------------------------------------------------------------
11671        {
11672            use datasynth_generators::audit::scots_generator::{
11673                ScotsGenerator, ScotsGeneratorConfig,
11674            };
11675
11676            let ic_enabled = self.config.intercompany.enabled;
11677
11678            let config = ScotsGeneratorConfig {
11679                intercompany_enabled: ic_enabled,
11680                ..ScotsGeneratorConfig::default()
11681            };
11682            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11683
11684            for company in &self.config.companies {
11685                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11686                snapshot
11687                    .significant_transaction_classes
11688                    .extend(entity_scots);
11689            }
11690
11691            let estimation_count = snapshot
11692                .significant_transaction_classes
11693                .iter()
11694                .filter(|s| {
11695                    matches!(
11696                        s.transaction_type,
11697                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11698                    )
11699                })
11700                .count();
11701
11702            info!(
11703                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11704                snapshot.significant_transaction_classes.len(),
11705                estimation_count,
11706            );
11707        }
11708
11709        // ----------------------------------------------------------------
11710        // ISA 520: Unusual Item Markers
11711        // ----------------------------------------------------------------
11712        {
11713            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11714
11715            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11716            let entity_codes: Vec<String> = self
11717                .config
11718                .companies
11719                .iter()
11720                .map(|c| c.code.clone())
11721                .collect();
11722            let unusual_flags =
11723                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11724            info!(
11725                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11726                unusual_flags.len(),
11727                unusual_flags
11728                    .iter()
11729                    .filter(|f| matches!(
11730                        f.severity,
11731                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11732                    ))
11733                    .count(),
11734                unusual_flags
11735                    .iter()
11736                    .filter(|f| matches!(
11737                        f.severity,
11738                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11739                    ))
11740                    .count(),
11741                unusual_flags
11742                    .iter()
11743                    .filter(|f| matches!(
11744                        f.severity,
11745                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11746                    ))
11747                    .count(),
11748            );
11749            snapshot.unusual_items = unusual_flags;
11750        }
11751
11752        // ----------------------------------------------------------------
11753        // ISA 520: Analytical Relationships
11754        // ----------------------------------------------------------------
11755        {
11756            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11757
11758            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11759            let entity_codes: Vec<String> = self
11760                .config
11761                .companies
11762                .iter()
11763                .map(|c| c.code.clone())
11764                .collect();
11765            let current_period_label = format!("FY{fiscal_year}");
11766            let prior_period_label = format!("FY{}", fiscal_year - 1);
11767            let analytical_rels = ar_gen.generate_for_entities(
11768                &entity_codes,
11769                entries,
11770                &current_period_label,
11771                &prior_period_label,
11772            );
11773            let out_of_range = analytical_rels
11774                .iter()
11775                .filter(|r| !r.within_expected_range)
11776                .count();
11777            info!(
11778                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11779                analytical_rels.len(),
11780                out_of_range,
11781            );
11782            snapshot.analytical_relationships = analytical_rels;
11783        }
11784
11785        if let Some(pb) = pb {
11786            pb.finish_with_message(format!(
11787                "Audit data: {} engagements, {} workpapers, {} evidence, \
11788                 {} confirmations, {} procedure steps, {} samples, \
11789                 {} analytical, {} IA funcs, {} related parties, \
11790                 {} component auditors, {} letters, {} subsequent events, \
11791                 {} service orgs, {} going concern, {} accounting estimates, \
11792                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11793                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11794                 {} unusual items, {} analytical relationships",
11795                snapshot.engagements.len(),
11796                snapshot.workpapers.len(),
11797                snapshot.evidence.len(),
11798                snapshot.confirmations.len(),
11799                snapshot.procedure_steps.len(),
11800                snapshot.samples.len(),
11801                snapshot.analytical_results.len(),
11802                snapshot.ia_functions.len(),
11803                snapshot.related_parties.len(),
11804                snapshot.component_auditors.len(),
11805                snapshot.engagement_letters.len(),
11806                snapshot.subsequent_events.len(),
11807                snapshot.service_organizations.len(),
11808                snapshot.going_concern_assessments.len(),
11809                snapshot.accounting_estimates.len(),
11810                snapshot.audit_opinions.len(),
11811                snapshot.key_audit_matters.len(),
11812                snapshot.sox_302_certifications.len(),
11813                snapshot.sox_404_assessments.len(),
11814                snapshot.materiality_calculations.len(),
11815                snapshot.combined_risk_assessments.len(),
11816                snapshot.sampling_plans.len(),
11817                snapshot.significant_transaction_classes.len(),
11818                snapshot.unusual_items.len(),
11819                snapshot.analytical_relationships.len(),
11820            ));
11821        }
11822
11823        // ----------------------------------------------------------------
11824        // PCAOB-ISA cross-reference mappings
11825        // ----------------------------------------------------------------
11826        // Always include the standard PCAOB-ISA mappings when audit generation is
11827        // enabled. These are static reference data (no randomness required) so we
11828        // call standard_mappings() directly.
11829        {
11830            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11831            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11832            debug!(
11833                "PCAOB-ISA mappings generated: {} mappings",
11834                snapshot.isa_pcaob_mappings.len()
11835            );
11836        }
11837
11838        // ----------------------------------------------------------------
11839        // ISA standard reference entries
11840        // ----------------------------------------------------------------
11841        // Emit flat ISA standard reference data (number, title, series) so
11842        // consumers get a machine-readable listing of all 34 ISA standards in
11843        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11844        {
11845            use datasynth_standards::audit::isa_reference::IsaStandard;
11846            snapshot.isa_mappings = IsaStandard::standard_entries();
11847            debug!(
11848                "ISA standard entries generated: {} standards",
11849                snapshot.isa_mappings.len()
11850            );
11851        }
11852
11853        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11854        // For each RPT, find the chronologically closest JE for the engagement's entity.
11855        {
11856            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11857                .engagements
11858                .iter()
11859                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11860                .collect();
11861
11862            for rpt in &mut snapshot.related_party_transactions {
11863                if rpt.journal_entry_id.is_some() {
11864                    continue; // already set
11865                }
11866                let entity = engagement_by_id
11867                    .get(&rpt.engagement_id.to_string())
11868                    .copied()
11869                    .unwrap_or("");
11870
11871                // Find closest JE by date in the entity's company
11872                let best_je = entries
11873                    .iter()
11874                    .filter(|je| je.header.company_code == entity)
11875                    .min_by_key(|je| {
11876                        (je.header.posting_date - rpt.transaction_date)
11877                            .num_days()
11878                            .abs()
11879                    });
11880
11881                if let Some(je) = best_je {
11882                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11883                }
11884            }
11885
11886            let linked = snapshot
11887                .related_party_transactions
11888                .iter()
11889                .filter(|t| t.journal_entry_id.is_some())
11890                .count();
11891            debug!(
11892                "Linked {}/{} related party transactions to journal entries",
11893                linked,
11894                snapshot.related_party_transactions.len()
11895            );
11896        }
11897
11898        Ok(snapshot)
11899    }
11900
11901    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11902    ///
11903    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11904    /// from the current orchestrator state, runs the FSM engine, and maps the
11905    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11906    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11907    fn generate_audit_data_with_fsm(
11908        &mut self,
11909        entries: &[JournalEntry],
11910    ) -> SynthResult<AuditSnapshot> {
11911        use datasynth_audit_fsm::{
11912            context::EngagementContext,
11913            engine::AuditFsmEngine,
11914            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11915        };
11916        use rand::SeedableRng;
11917        use rand_chacha::ChaCha8Rng;
11918
11919        info!("Audit FSM: generating audit data via FSM engine");
11920
11921        let fsm_config = self
11922            .config
11923            .audit
11924            .fsm
11925            .as_ref()
11926            .expect("FSM config must be present when FSM is enabled");
11927
11928        // 1. Load blueprint from config string.
11929        let bwp = match fsm_config.blueprint.as_str() {
11930            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11931            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11932            _ => {
11933                warn!(
11934                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11935                    fsm_config.blueprint
11936                );
11937                BlueprintWithPreconditions::load_builtin_fsa()
11938            }
11939        }
11940        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11941
11942        // 2. Load overlay from config string.
11943        let overlay = match fsm_config.overlay.as_str() {
11944            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11945            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11946            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11947            _ => {
11948                warn!(
11949                    "Unknown FSM overlay '{}', falling back to builtin:default",
11950                    fsm_config.overlay
11951                );
11952                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11953            }
11954        }
11955        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11956
11957        // 3. Build EngagementContext from orchestrator state.
11958        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11959            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11960        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11961
11962        // Determine the engagement entity early so we can filter JEs.
11963        let company = self.config.companies.first();
11964        let company_code = company
11965            .map(|c| c.code.clone())
11966            .unwrap_or_else(|| "UNKNOWN".to_string());
11967        let company_name = company
11968            .map(|c| c.name.clone())
11969            .unwrap_or_else(|| "Unknown Company".to_string());
11970        let currency = company
11971            .map(|c| c.currency.clone())
11972            .unwrap_or_else(|| "USD".to_string());
11973
11974        // Filter JEs to the engagement entity for single-company coherence.
11975        let entity_entries: Vec<_> = entries
11976            .iter()
11977            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11978            .cloned()
11979            .collect();
11980        let entries = &entity_entries; // Shadow the parameter for remaining usage
11981
11982        // Financial aggregates from journal entries.
11983        let total_revenue: rust_decimal::Decimal = entries
11984            .iter()
11985            .flat_map(|e| e.lines.iter())
11986            .filter(|l| l.account_code.starts_with('4'))
11987            .map(|l| l.credit_amount - l.debit_amount)
11988            .sum();
11989
11990        let total_assets: rust_decimal::Decimal = entries
11991            .iter()
11992            .flat_map(|e| e.lines.iter())
11993            .filter(|l| l.account_code.starts_with('1'))
11994            .map(|l| l.debit_amount - l.credit_amount)
11995            .sum();
11996
11997        let total_expenses: rust_decimal::Decimal = entries
11998            .iter()
11999            .flat_map(|e| e.lines.iter())
12000            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12001            .map(|l| l.debit_amount)
12002            .sum();
12003
12004        let equity: rust_decimal::Decimal = entries
12005            .iter()
12006            .flat_map(|e| e.lines.iter())
12007            .filter(|l| l.account_code.starts_with('3'))
12008            .map(|l| l.credit_amount - l.debit_amount)
12009            .sum();
12010
12011        let total_debt: rust_decimal::Decimal = entries
12012            .iter()
12013            .flat_map(|e| e.lines.iter())
12014            .filter(|l| l.account_code.starts_with('2'))
12015            .map(|l| l.credit_amount - l.debit_amount)
12016            .sum();
12017
12018        let pretax_income = total_revenue - total_expenses;
12019
12020        let cogs: rust_decimal::Decimal = entries
12021            .iter()
12022            .flat_map(|e| e.lines.iter())
12023            .filter(|l| l.account_code.starts_with('5'))
12024            .map(|l| l.debit_amount)
12025            .sum();
12026        let gross_profit = total_revenue - cogs;
12027
12028        let current_assets: rust_decimal::Decimal = entries
12029            .iter()
12030            .flat_map(|e| e.lines.iter())
12031            .filter(|l| {
12032                l.account_code.starts_with("10")
12033                    || l.account_code.starts_with("11")
12034                    || l.account_code.starts_with("12")
12035                    || l.account_code.starts_with("13")
12036            })
12037            .map(|l| l.debit_amount - l.credit_amount)
12038            .sum();
12039        let current_liabilities: rust_decimal::Decimal = entries
12040            .iter()
12041            .flat_map(|e| e.lines.iter())
12042            .filter(|l| {
12043                l.account_code.starts_with("20")
12044                    || l.account_code.starts_with("21")
12045                    || l.account_code.starts_with("22")
12046            })
12047            .map(|l| l.credit_amount - l.debit_amount)
12048            .sum();
12049        let working_capital = current_assets - current_liabilities;
12050
12051        let depreciation: rust_decimal::Decimal = entries
12052            .iter()
12053            .flat_map(|e| e.lines.iter())
12054            .filter(|l| l.account_code.starts_with("60"))
12055            .map(|l| l.debit_amount)
12056            .sum();
12057        let operating_cash_flow = pretax_income + depreciation;
12058
12059        // GL accounts for reference data.
12060        let accounts: Vec<String> = self
12061            .coa
12062            .as_ref()
12063            .map(|coa| {
12064                coa.get_postable_accounts()
12065                    .iter()
12066                    .map(|acc| acc.account_code().to_string())
12067                    .collect()
12068            })
12069            .unwrap_or_default();
12070
12071        // Team member IDs and display names from master data.
12072        let team_member_ids: Vec<String> = self
12073            .master_data
12074            .employees
12075            .iter()
12076            .take(8) // Cap team size
12077            .map(|e| e.employee_id.clone())
12078            .collect();
12079        let team_member_pairs: Vec<(String, String)> = self
12080            .master_data
12081            .employees
12082            .iter()
12083            .take(8)
12084            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12085            .collect();
12086
12087        let vendor_names: Vec<String> = self
12088            .master_data
12089            .vendors
12090            .iter()
12091            .map(|v| v.name.clone())
12092            .collect();
12093        let customer_names: Vec<String> = self
12094            .master_data
12095            .customers
12096            .iter()
12097            .map(|c| c.name.clone())
12098            .collect();
12099
12100        let entity_codes: Vec<String> = self
12101            .config
12102            .companies
12103            .iter()
12104            .map(|c| c.code.clone())
12105            .collect();
12106
12107        // Journal entry IDs for evidence tracing (sample up to 50).
12108        let journal_entry_ids: Vec<String> = entries
12109            .iter()
12110            .take(50)
12111            .map(|e| e.header.document_id.to_string())
12112            .collect();
12113
12114        // Account balances for risk weighting (aggregate debit - credit per account).
12115        let mut account_balances = std::collections::HashMap::<String, f64>::new();
12116        for entry in entries {
12117            for line in &entry.lines {
12118                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12119                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12120                *account_balances
12121                    .entry(line.account_code.clone())
12122                    .or_insert(0.0) += debit_f64 - credit_f64;
12123            }
12124        }
12125
12126        // Internal control IDs and anomaly refs are populated by the
12127        // caller when available; here we default to empty because the
12128        // orchestrator state may not have generated controls/anomalies
12129        // yet at this point in the pipeline.
12130        let control_ids: Vec<String> = Vec::new();
12131        let anomaly_refs: Vec<String> = Vec::new();
12132
12133        let mut context = EngagementContext {
12134            company_code,
12135            company_name,
12136            fiscal_year: start_date.year(),
12137            currency,
12138            total_revenue,
12139            total_assets,
12140            engagement_start: start_date,
12141            report_date: period_end,
12142            pretax_income,
12143            equity,
12144            gross_profit,
12145            working_capital,
12146            operating_cash_flow,
12147            total_debt,
12148            team_member_ids,
12149            team_member_pairs,
12150            accounts,
12151            vendor_names,
12152            customer_names,
12153            journal_entry_ids,
12154            account_balances,
12155            control_ids,
12156            anomaly_refs,
12157            journal_entries: entries.to_vec(),
12158            is_us_listed: false,
12159            entity_codes,
12160            auditor_firm_name: "DataSynth Audit LLP".into(),
12161            accounting_framework: self
12162                .config
12163                .accounting_standards
12164                .framework
12165                .map(|f| match f {
12166                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12167                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12168                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12169                        "French GAAP"
12170                    }
12171                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12172                        "German GAAP"
12173                    }
12174                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12175                        "Dual Reporting"
12176                    }
12177                })
12178                .unwrap_or("IFRS")
12179                .into(),
12180        };
12181
12182        // 4. Create and run the FSM engine.
12183        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12184        let rng = ChaCha8Rng::seed_from_u64(seed);
12185        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12186
12187        let mut result = engine
12188            .run_engagement(&context)
12189            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12190
12191        info!(
12192            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12193             {} phases completed, duration {:.1}h",
12194            result.event_log.len(),
12195            result.artifacts.total_artifacts(),
12196            result.anomalies.len(),
12197            result.phases_completed.len(),
12198            result.total_duration_hours,
12199        );
12200
12201        // 4b. Populate financial data in the artifact bag for downstream consumers.
12202        let tb_entity = context.company_code.clone();
12203        let tb_fy = context.fiscal_year;
12204        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12205        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12206            entries,
12207            &tb_entity,
12208            tb_fy,
12209            self.coa.as_ref().map(|c| c.as_ref()),
12210        );
12211
12212        // 5. Map ArtifactBag fields to AuditSnapshot.
12213        let bag = result.artifacts;
12214        let mut snapshot = AuditSnapshot {
12215            engagements: bag.engagements,
12216            engagement_letters: bag.engagement_letters,
12217            materiality_calculations: bag.materiality_calculations,
12218            risk_assessments: bag.risk_assessments,
12219            combined_risk_assessments: bag.combined_risk_assessments,
12220            workpapers: bag.workpapers,
12221            evidence: bag.evidence,
12222            findings: bag.findings,
12223            judgments: bag.judgments,
12224            sampling_plans: bag.sampling_plans,
12225            sampled_items: bag.sampled_items,
12226            analytical_results: bag.analytical_results,
12227            going_concern_assessments: bag.going_concern_assessments,
12228            subsequent_events: bag.subsequent_events,
12229            audit_opinions: bag.audit_opinions,
12230            key_audit_matters: bag.key_audit_matters,
12231            procedure_steps: bag.procedure_steps,
12232            samples: bag.samples,
12233            confirmations: bag.confirmations,
12234            confirmation_responses: bag.confirmation_responses,
12235            // Store the event trail for downstream export.
12236            fsm_event_trail: Some(result.event_log),
12237            // Fields not produced by the FSM engine remain at their defaults.
12238            ..Default::default()
12239        };
12240
12241        // 6. Add static reference data (same as legacy path).
12242        {
12243            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12244            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12245        }
12246        {
12247            use datasynth_standards::audit::isa_reference::IsaStandard;
12248            snapshot.isa_mappings = IsaStandard::standard_entries();
12249        }
12250
12251        info!(
12252            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12253             {} risk assessments, {} findings, {} materiality calcs",
12254            snapshot.engagements.len(),
12255            snapshot.workpapers.len(),
12256            snapshot.evidence.len(),
12257            snapshot.risk_assessments.len(),
12258            snapshot.findings.len(),
12259            snapshot.materiality_calculations.len(),
12260        );
12261
12262        Ok(snapshot)
12263    }
12264
12265    /// Export journal entries as graph data for ML training and network reconstruction.
12266    ///
12267    /// Builds a transaction graph where:
12268    /// - Nodes are GL accounts
12269    /// - Edges are money flows from credit to debit accounts
12270    /// - Edge attributes include amount, date, business process, anomaly flags
12271    fn export_graphs(
12272        &mut self,
12273        entries: &[JournalEntry],
12274        _coa: &Arc<ChartOfAccounts>,
12275        stats: &mut EnhancedGenerationStatistics,
12276    ) -> SynthResult<GraphExportSnapshot> {
12277        let pb = self.create_progress_bar(100, "Exporting Graphs");
12278
12279        let mut snapshot = GraphExportSnapshot::default();
12280
12281        // Get output directory
12282        let output_dir = self
12283            .output_path
12284            .clone()
12285            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12286        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12287
12288        // Process each graph type configuration
12289        for graph_type in &self.config.graph_export.graph_types {
12290            if let Some(pb) = &pb {
12291                pb.inc(10);
12292            }
12293
12294            // Build transaction graph
12295            let graph_config = TransactionGraphConfig {
12296                include_vendors: false,
12297                include_customers: false,
12298                create_debit_credit_edges: true,
12299                include_document_nodes: graph_type.include_document_nodes,
12300                min_edge_weight: graph_type.min_edge_weight,
12301                aggregate_parallel_edges: graph_type.aggregate_edges,
12302                framework: None,
12303            };
12304
12305            let mut builder = TransactionGraphBuilder::new(graph_config);
12306            builder.add_journal_entries(entries);
12307            let graph = builder.build();
12308
12309            // Update stats
12310            stats.graph_node_count += graph.node_count();
12311            stats.graph_edge_count += graph.edge_count();
12312
12313            if let Some(pb) = &pb {
12314                pb.inc(40);
12315            }
12316
12317            // Export to each configured format
12318            for format in &self.config.graph_export.formats {
12319                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12320
12321                // Create output directory
12322                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12323                    warn!("Failed to create graph output directory: {}", e);
12324                    continue;
12325                }
12326
12327                match format {
12328                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12329                        let pyg_config = PyGExportConfig {
12330                            common: datasynth_graph::CommonExportConfig {
12331                                export_node_features: true,
12332                                export_edge_features: true,
12333                                export_node_labels: true,
12334                                export_edge_labels: true,
12335                                export_masks: true,
12336                                train_ratio: self.config.graph_export.train_ratio,
12337                                val_ratio: self.config.graph_export.validation_ratio,
12338                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12339                            },
12340                            one_hot_categoricals: false,
12341                        };
12342
12343                        let exporter = PyGExporter::new(pyg_config);
12344                        match exporter.export(&graph, &format_dir) {
12345                            Ok(metadata) => {
12346                                snapshot.exports.insert(
12347                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12348                                    GraphExportInfo {
12349                                        name: graph_type.name.clone(),
12350                                        format: "pytorch_geometric".to_string(),
12351                                        output_path: format_dir.clone(),
12352                                        node_count: metadata.num_nodes,
12353                                        edge_count: metadata.num_edges,
12354                                    },
12355                                );
12356                                snapshot.graph_count += 1;
12357                            }
12358                            Err(e) => {
12359                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12360                            }
12361                        }
12362                    }
12363                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12364                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12365
12366                        let neo4j_config = Neo4jExportConfig {
12367                            export_node_properties: true,
12368                            export_edge_properties: true,
12369                            export_features: true,
12370                            generate_cypher: true,
12371                            generate_admin_import: true,
12372                            database_name: "synth".to_string(),
12373                            cypher_batch_size: 1000,
12374                        };
12375
12376                        let exporter = Neo4jExporter::new(neo4j_config);
12377                        match exporter.export(&graph, &format_dir) {
12378                            Ok(metadata) => {
12379                                snapshot.exports.insert(
12380                                    format!("{}_{}", graph_type.name, "neo4j"),
12381                                    GraphExportInfo {
12382                                        name: graph_type.name.clone(),
12383                                        format: "neo4j".to_string(),
12384                                        output_path: format_dir.clone(),
12385                                        node_count: metadata.num_nodes,
12386                                        edge_count: metadata.num_edges,
12387                                    },
12388                                );
12389                                snapshot.graph_count += 1;
12390                            }
12391                            Err(e) => {
12392                                warn!("Failed to export Neo4j graph: {}", e);
12393                            }
12394                        }
12395                    }
12396                    datasynth_config::schema::GraphExportFormat::Dgl => {
12397                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12398
12399                        let dgl_config = DGLExportConfig {
12400                            common: datasynth_graph::CommonExportConfig {
12401                                export_node_features: true,
12402                                export_edge_features: true,
12403                                export_node_labels: true,
12404                                export_edge_labels: true,
12405                                export_masks: true,
12406                                train_ratio: self.config.graph_export.train_ratio,
12407                                val_ratio: self.config.graph_export.validation_ratio,
12408                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12409                            },
12410                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12411                            include_pickle_script: true, // DGL ecosystem standard helper
12412                        };
12413
12414                        let exporter = DGLExporter::new(dgl_config);
12415                        match exporter.export(&graph, &format_dir) {
12416                            Ok(metadata) => {
12417                                snapshot.exports.insert(
12418                                    format!("{}_{}", graph_type.name, "dgl"),
12419                                    GraphExportInfo {
12420                                        name: graph_type.name.clone(),
12421                                        format: "dgl".to_string(),
12422                                        output_path: format_dir.clone(),
12423                                        node_count: metadata.common.num_nodes,
12424                                        edge_count: metadata.common.num_edges,
12425                                    },
12426                                );
12427                                snapshot.graph_count += 1;
12428                            }
12429                            Err(e) => {
12430                                warn!("Failed to export DGL graph: {}", e);
12431                            }
12432                        }
12433                    }
12434                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12435                        use datasynth_graph::{
12436                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12437                        };
12438
12439                        let rustgraph_config = RustGraphExportConfig {
12440                            include_features: true,
12441                            include_temporal: true,
12442                            include_labels: true,
12443                            source_name: "datasynth".to_string(),
12444                            batch_id: None,
12445                            output_format: RustGraphOutputFormat::JsonLines,
12446                            export_node_properties: true,
12447                            export_edge_properties: true,
12448                            pretty_print: false,
12449                        };
12450
12451                        let exporter = RustGraphExporter::new(rustgraph_config);
12452                        match exporter.export(&graph, &format_dir) {
12453                            Ok(metadata) => {
12454                                snapshot.exports.insert(
12455                                    format!("{}_{}", graph_type.name, "rustgraph"),
12456                                    GraphExportInfo {
12457                                        name: graph_type.name.clone(),
12458                                        format: "rustgraph".to_string(),
12459                                        output_path: format_dir.clone(),
12460                                        node_count: metadata.num_nodes,
12461                                        edge_count: metadata.num_edges,
12462                                    },
12463                                );
12464                                snapshot.graph_count += 1;
12465                            }
12466                            Err(e) => {
12467                                warn!("Failed to export RustGraph: {}", e);
12468                            }
12469                        }
12470                    }
12471                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12472                        // Hypergraph export is handled separately in Phase 10b
12473                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12474                    }
12475                }
12476            }
12477
12478            if let Some(pb) = &pb {
12479                pb.inc(40);
12480            }
12481        }
12482
12483        stats.graph_export_count = snapshot.graph_count;
12484        snapshot.exported = snapshot.graph_count > 0;
12485
12486        if let Some(pb) = pb {
12487            pb.finish_with_message(format!(
12488                "Graphs exported: {} graphs ({} nodes, {} edges)",
12489                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12490            ));
12491        }
12492
12493        Ok(snapshot)
12494    }
12495
12496    /// Build additional graph types (banking, approval, entity) when relevant data
12497    /// is available. These run as a late phase because the data they need (banking
12498    /// snapshot, intercompany snapshot) is only generated after the main graph
12499    /// export phase.
12500    fn build_additional_graphs(
12501        &self,
12502        banking: &BankingSnapshot,
12503        intercompany: &IntercompanySnapshot,
12504        entries: &[JournalEntry],
12505        stats: &mut EnhancedGenerationStatistics,
12506    ) {
12507        let output_dir = self
12508            .output_path
12509            .clone()
12510            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12511        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12512
12513        // Banking graph: build when banking customers and transactions exist
12514        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12515            info!("Phase 10c: Building banking network graph");
12516            let config = BankingGraphConfig::default();
12517            let mut builder = BankingGraphBuilder::new(config);
12518            builder.add_customers(&banking.customers);
12519            builder.add_accounts(&banking.accounts, &banking.customers);
12520            builder.add_transactions(&banking.transactions);
12521            let graph = builder.build();
12522
12523            let node_count = graph.node_count();
12524            let edge_count = graph.edge_count();
12525            stats.graph_node_count += node_count;
12526            stats.graph_edge_count += edge_count;
12527
12528            // Export as PyG if configured
12529            for format in &self.config.graph_export.formats {
12530                if matches!(
12531                    format,
12532                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12533                ) {
12534                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12535                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12536                        warn!("Failed to create banking graph output dir: {}", e);
12537                        continue;
12538                    }
12539                    let pyg_config = PyGExportConfig::default();
12540                    let exporter = PyGExporter::new(pyg_config);
12541                    if let Err(e) = exporter.export(&graph, &format_dir) {
12542                        warn!("Failed to export banking graph as PyG: {}", e);
12543                    } else {
12544                        info!(
12545                            "Banking network graph exported: {} nodes, {} edges",
12546                            node_count, edge_count
12547                        );
12548                    }
12549                }
12550            }
12551        }
12552
12553        // Approval graph: build from journal entry approval workflows
12554        let approval_entries: Vec<_> = entries
12555            .iter()
12556            .filter(|je| je.header.approval_workflow.is_some())
12557            .collect();
12558
12559        if !approval_entries.is_empty() {
12560            info!(
12561                "Phase 10c: Building approval network graph ({} entries with approvals)",
12562                approval_entries.len()
12563            );
12564            let config = ApprovalGraphConfig::default();
12565            let mut builder = ApprovalGraphBuilder::new(config);
12566
12567            for je in &approval_entries {
12568                if let Some(ref wf) = je.header.approval_workflow {
12569                    for action in &wf.actions {
12570                        let record = datasynth_core::models::ApprovalRecord {
12571                            approval_id: format!(
12572                                "APR-{}-{}",
12573                                je.header.document_id, action.approval_level
12574                            ),
12575                            document_number: je.header.document_id.to_string(),
12576                            document_type: "JE".to_string(),
12577                            company_code: je.company_code().to_string(),
12578                            requester_id: wf.preparer_id.clone(),
12579                            requester_name: Some(wf.preparer_name.clone()),
12580                            approver_id: action.actor_id.clone(),
12581                            approver_name: action.actor_name.clone(),
12582                            approval_date: je.posting_date(),
12583                            action: format!("{:?}", action.action),
12584                            amount: wf.amount,
12585                            approval_limit: None,
12586                            comments: action.comments.clone(),
12587                            delegation_from: None,
12588                            is_auto_approved: false,
12589                        };
12590                        builder.add_approval(&record);
12591                    }
12592                }
12593            }
12594
12595            let graph = builder.build();
12596            let node_count = graph.node_count();
12597            let edge_count = graph.edge_count();
12598            stats.graph_node_count += node_count;
12599            stats.graph_edge_count += edge_count;
12600
12601            // Export as PyG if configured
12602            for format in &self.config.graph_export.formats {
12603                if matches!(
12604                    format,
12605                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12606                ) {
12607                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12608                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12609                        warn!("Failed to create approval graph output dir: {}", e);
12610                        continue;
12611                    }
12612                    let pyg_config = PyGExportConfig::default();
12613                    let exporter = PyGExporter::new(pyg_config);
12614                    if let Err(e) = exporter.export(&graph, &format_dir) {
12615                        warn!("Failed to export approval graph as PyG: {}", e);
12616                    } else {
12617                        info!(
12618                            "Approval network graph exported: {} nodes, {} edges",
12619                            node_count, edge_count
12620                        );
12621                    }
12622                }
12623            }
12624        }
12625
12626        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12627        if self.config.companies.len() >= 2 {
12628            info!(
12629                "Phase 10c: Building entity relationship graph ({} companies)",
12630                self.config.companies.len()
12631            );
12632
12633            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12634                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12635
12636            // Map CompanyConfig → Company objects
12637            let parent_code = &self.config.companies[0].code;
12638            let mut companies: Vec<datasynth_core::models::Company> =
12639                Vec::with_capacity(self.config.companies.len());
12640
12641            // First company is the parent
12642            let first = &self.config.companies[0];
12643            companies.push(datasynth_core::models::Company::parent(
12644                &first.code,
12645                &first.name,
12646                &first.country,
12647                &first.currency,
12648            ));
12649
12650            // Remaining companies are subsidiaries (100% owned by parent)
12651            for cc in self.config.companies.iter().skip(1) {
12652                companies.push(datasynth_core::models::Company::subsidiary(
12653                    &cc.code,
12654                    &cc.name,
12655                    &cc.country,
12656                    &cc.currency,
12657                    parent_code,
12658                    rust_decimal::Decimal::from(100),
12659                ));
12660            }
12661
12662            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12663            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12664                self.config
12665                    .companies
12666                    .iter()
12667                    .skip(1)
12668                    .enumerate()
12669                    .map(|(i, cc)| {
12670                        let mut rel =
12671                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12672                                format!("REL{:03}", i + 1),
12673                                parent_code.clone(),
12674                                cc.code.clone(),
12675                                rust_decimal::Decimal::from(100),
12676                                start_date,
12677                            );
12678                        rel.functional_currency = cc.currency.clone();
12679                        rel
12680                    })
12681                    .collect();
12682
12683            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12684            builder.add_companies(&companies);
12685            builder.add_ownership_relationships(&relationships);
12686
12687            // Thread IC matched-pair transaction edges into the entity graph
12688            for pair in &intercompany.matched_pairs {
12689                builder.add_intercompany_edge(
12690                    &pair.seller_company,
12691                    &pair.buyer_company,
12692                    pair.amount,
12693                    &format!("{:?}", pair.transaction_type),
12694                );
12695            }
12696
12697            let graph = builder.build();
12698            let node_count = graph.node_count();
12699            let edge_count = graph.edge_count();
12700            stats.graph_node_count += node_count;
12701            stats.graph_edge_count += edge_count;
12702
12703            // Export as PyG if configured
12704            for format in &self.config.graph_export.formats {
12705                if matches!(
12706                    format,
12707                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12708                ) {
12709                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12710                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12711                        warn!("Failed to create entity graph output dir: {}", e);
12712                        continue;
12713                    }
12714                    let pyg_config = PyGExportConfig::default();
12715                    let exporter = PyGExporter::new(pyg_config);
12716                    if let Err(e) = exporter.export(&graph, &format_dir) {
12717                        warn!("Failed to export entity graph as PyG: {}", e);
12718                    } else {
12719                        info!(
12720                            "Entity relationship graph exported: {} nodes, {} edges",
12721                            node_count, edge_count
12722                        );
12723                    }
12724                }
12725            }
12726        } else {
12727            debug!(
12728                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12729                self.config.companies.len()
12730            );
12731        }
12732    }
12733
12734    /// Export a multi-layer hypergraph for RustGraph integration.
12735    ///
12736    /// Builds a 3-layer hypergraph:
12737    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12738    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12739    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12740    #[allow(clippy::too_many_arguments)]
12741    fn export_hypergraph(
12742        &self,
12743        coa: &Arc<ChartOfAccounts>,
12744        entries: &[JournalEntry],
12745        document_flows: &DocumentFlowSnapshot,
12746        sourcing: &SourcingSnapshot,
12747        hr: &HrSnapshot,
12748        manufacturing: &ManufacturingSnapshot,
12749        banking: &BankingSnapshot,
12750        audit: &AuditSnapshot,
12751        financial_reporting: &FinancialReportingSnapshot,
12752        ocpm: &OcpmSnapshot,
12753        compliance: &ComplianceRegulationsSnapshot,
12754        stats: &mut EnhancedGenerationStatistics,
12755    ) -> SynthResult<HypergraphExportInfo> {
12756        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12757        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12758        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12759        use datasynth_graph::models::hypergraph::AggregationStrategy;
12760
12761        let hg_settings = &self.config.graph_export.hypergraph;
12762
12763        // Parse aggregation strategy from config string
12764        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12765            "truncate" => AggregationStrategy::Truncate,
12766            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12767            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12768            "importance_sample" => AggregationStrategy::ImportanceSample,
12769            _ => AggregationStrategy::PoolByCounterparty,
12770        };
12771
12772        let builder_config = HypergraphConfig {
12773            max_nodes: hg_settings.max_nodes,
12774            aggregation_strategy,
12775            include_coso: hg_settings.governance_layer.include_coso,
12776            include_controls: hg_settings.governance_layer.include_controls,
12777            include_sox: hg_settings.governance_layer.include_sox,
12778            include_vendors: hg_settings.governance_layer.include_vendors,
12779            include_customers: hg_settings.governance_layer.include_customers,
12780            include_employees: hg_settings.governance_layer.include_employees,
12781            include_p2p: hg_settings.process_layer.include_p2p,
12782            include_o2c: hg_settings.process_layer.include_o2c,
12783            include_s2c: hg_settings.process_layer.include_s2c,
12784            include_h2r: hg_settings.process_layer.include_h2r,
12785            include_mfg: hg_settings.process_layer.include_mfg,
12786            include_bank: hg_settings.process_layer.include_bank,
12787            include_audit: hg_settings.process_layer.include_audit,
12788            include_r2r: hg_settings.process_layer.include_r2r,
12789            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12790            docs_per_counterparty_threshold: hg_settings
12791                .process_layer
12792                .docs_per_counterparty_threshold,
12793            include_accounts: hg_settings.accounting_layer.include_accounts,
12794            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12795            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12796            include_compliance: self.config.compliance_regulations.enabled,
12797            include_tax: true,
12798            include_treasury: true,
12799            include_esg: true,
12800            include_project: true,
12801            include_intercompany: true,
12802            include_temporal_events: true,
12803        };
12804
12805        let mut builder = HypergraphBuilder::new(builder_config);
12806
12807        // Layer 1: Governance & Controls
12808        builder.add_coso_framework();
12809
12810        // Add controls if available (generated during JE generation)
12811        // Controls are generated per-company; we use the standard set
12812        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12813            let controls = InternalControl::standard_controls();
12814            builder.add_controls(&controls);
12815        }
12816
12817        // Add master data
12818        builder.add_vendors(&self.master_data.vendors);
12819        builder.add_customers(&self.master_data.customers);
12820        builder.add_employees(&self.master_data.employees);
12821
12822        // Layer 2: Process Events (all process families)
12823        builder.add_p2p_documents(
12824            &document_flows.purchase_orders,
12825            &document_flows.goods_receipts,
12826            &document_flows.vendor_invoices,
12827            &document_flows.payments,
12828        );
12829        builder.add_o2c_documents(
12830            &document_flows.sales_orders,
12831            &document_flows.deliveries,
12832            &document_flows.customer_invoices,
12833        );
12834        builder.add_s2c_documents(
12835            &sourcing.sourcing_projects,
12836            &sourcing.qualifications,
12837            &sourcing.rfx_events,
12838            &sourcing.bids,
12839            &sourcing.bid_evaluations,
12840            &sourcing.contracts,
12841        );
12842        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12843        builder.add_mfg_documents(
12844            &manufacturing.production_orders,
12845            &manufacturing.quality_inspections,
12846            &manufacturing.cycle_counts,
12847        );
12848        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12849        builder.add_audit_documents(
12850            &audit.engagements,
12851            &audit.workpapers,
12852            &audit.findings,
12853            &audit.evidence,
12854            &audit.risk_assessments,
12855            &audit.judgments,
12856            &audit.materiality_calculations,
12857            &audit.audit_opinions,
12858            &audit.going_concern_assessments,
12859        );
12860        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12861
12862        // OCPM events as hyperedges
12863        if let Some(ref event_log) = ocpm.event_log {
12864            builder.add_ocpm_events(event_log);
12865        }
12866
12867        // Compliance regulations as cross-layer nodes
12868        if self.config.compliance_regulations.enabled
12869            && hg_settings.governance_layer.include_controls
12870        {
12871            // Reconstruct ComplianceStandard objects from the registry
12872            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12873            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12874                .standard_records
12875                .iter()
12876                .filter_map(|r| {
12877                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12878                    registry.get(&sid).cloned()
12879                })
12880                .collect();
12881
12882            builder.add_compliance_regulations(
12883                &standards,
12884                &compliance.findings,
12885                &compliance.filings,
12886            );
12887        }
12888
12889        // Layer 3: Accounting Network
12890        builder.add_accounts(coa);
12891        builder.add_journal_entries_as_hyperedges(entries);
12892
12893        // Build the hypergraph
12894        let hypergraph = builder.build();
12895
12896        // Export
12897        let output_dir = self
12898            .output_path
12899            .clone()
12900            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12901        let hg_dir = output_dir
12902            .join(&self.config.graph_export.output_subdirectory)
12903            .join(&hg_settings.output_subdirectory);
12904
12905        // Branch on output format
12906        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12907            "unified" => {
12908                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12909                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12910                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12911                })?;
12912                (
12913                    metadata.num_nodes,
12914                    metadata.num_edges,
12915                    metadata.num_hyperedges,
12916                )
12917            }
12918            _ => {
12919                // "native" or any unrecognized format → use existing exporter
12920                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12921                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12922                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12923                })?;
12924                (
12925                    metadata.num_nodes,
12926                    metadata.num_edges,
12927                    metadata.num_hyperedges,
12928                )
12929            }
12930        };
12931
12932        // Stream to RustGraph ingest endpoint if configured
12933        #[cfg(feature = "streaming")]
12934        if let Some(ref target_url) = hg_settings.stream_target {
12935            use crate::stream_client::{StreamClient, StreamConfig};
12936            use std::io::Write as _;
12937
12938            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12939            let stream_config = StreamConfig {
12940                target_url: target_url.clone(),
12941                batch_size: hg_settings.stream_batch_size,
12942                api_key,
12943                ..StreamConfig::default()
12944            };
12945
12946            match StreamClient::new(stream_config) {
12947                Ok(mut client) => {
12948                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12949                    match exporter.export_to_writer(&hypergraph, &mut client) {
12950                        Ok(_) => {
12951                            if let Err(e) = client.flush() {
12952                                warn!("Failed to flush stream client: {}", e);
12953                            } else {
12954                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12955                            }
12956                        }
12957                        Err(e) => {
12958                            warn!("Streaming export failed: {}", e);
12959                        }
12960                    }
12961                }
12962                Err(e) => {
12963                    warn!("Failed to create stream client: {}", e);
12964                }
12965            }
12966        }
12967
12968        // Update stats
12969        stats.graph_node_count += num_nodes;
12970        stats.graph_edge_count += num_edges;
12971        stats.graph_export_count += 1;
12972
12973        Ok(HypergraphExportInfo {
12974            node_count: num_nodes,
12975            edge_count: num_edges,
12976            hyperedge_count: num_hyperedges,
12977            output_path: hg_dir,
12978        })
12979    }
12980
12981    /// Generate banking KYC/AML data.
12982    ///
12983    /// Creates banking customers, accounts, and transactions with AML typology injection.
12984    /// Uses the BankingOrchestrator from synth-banking crate.
12985    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12986        let pb = self.create_progress_bar(100, "Generating Banking Data");
12987
12988        // Build the banking orchestrator from config
12989        let orchestrator = BankingOrchestratorBuilder::new()
12990            .config(self.config.banking.clone())
12991            .seed(self.seed + 9000)
12992            .country_pack(self.primary_pack().clone())
12993            .build();
12994
12995        if let Some(pb) = &pb {
12996            pb.inc(10);
12997        }
12998
12999        // Generate the banking data
13000        let result = orchestrator.generate();
13001
13002        if let Some(pb) = &pb {
13003            pb.inc(90);
13004            pb.finish_with_message(format!(
13005                "Banking: {} customers, {} transactions",
13006                result.customers.len(),
13007                result.transactions.len()
13008            ));
13009        }
13010
13011        // Cross-reference banking customers with core master data so that
13012        // banking customer names align with the enterprise customer list.
13013        // We rotate through core customers, overlaying their name and country
13014        // onto the generated banking customers where possible.
13015        let mut banking_customers = result.customers;
13016        let core_customers = &self.master_data.customers;
13017        if !core_customers.is_empty() {
13018            for (i, bc) in banking_customers.iter_mut().enumerate() {
13019                let core = &core_customers[i % core_customers.len()];
13020                bc.name = CustomerName::business(&core.name);
13021                bc.residence_country = core.country.clone();
13022                bc.enterprise_customer_id = Some(core.customer_id.clone());
13023            }
13024            debug!(
13025                "Cross-referenced {} banking customers with {} core customers",
13026                banking_customers.len(),
13027                core_customers.len()
13028            );
13029        }
13030
13031        Ok(BankingSnapshot {
13032            customers: banking_customers,
13033            accounts: result.accounts,
13034            transactions: result.transactions,
13035            transaction_labels: result.transaction_labels,
13036            customer_labels: result.customer_labels,
13037            account_labels: result.account_labels,
13038            relationship_labels: result.relationship_labels,
13039            narratives: result.narratives,
13040            suspicious_count: result.stats.suspicious_count,
13041            scenario_count: result.scenarios.len(),
13042        })
13043    }
13044
13045    /// Calculate total transactions to generate.
13046    fn calculate_total_transactions(&self) -> u64 {
13047        let months = self.config.global.period_months as f64;
13048        self.config
13049            .companies
13050            .iter()
13051            .map(|c| {
13052                let annual = c.annual_transaction_volume.count() as f64;
13053                let weighted = annual * c.volume_weight;
13054                (weighted * months / 12.0) as u64
13055            })
13056            .sum()
13057    }
13058
13059    /// Create a progress bar if progress display is enabled.
13060    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13061        if !self.phase_config.show_progress {
13062            return None;
13063        }
13064
13065        let pb = if let Some(mp) = &self.multi_progress {
13066            mp.add(ProgressBar::new(total))
13067        } else {
13068            ProgressBar::new(total)
13069        };
13070
13071        pb.set_style(
13072            ProgressStyle::default_bar()
13073                .template(&format!(
13074                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13075                ))
13076                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13077                .progress_chars("#>-"),
13078        );
13079
13080        Some(pb)
13081    }
13082
13083    /// Get the generated chart of accounts.
13084    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13085        self.coa.clone()
13086    }
13087
13088    /// Get the generated master data.
13089    pub fn get_master_data(&self) -> &MasterDataSnapshot {
13090        &self.master_data
13091    }
13092
13093    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
13094    fn phase_compliance_regulations(
13095        &mut self,
13096        _stats: &mut EnhancedGenerationStatistics,
13097    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13098        if !self.phase_config.generate_compliance_regulations {
13099            return Ok(ComplianceRegulationsSnapshot::default());
13100        }
13101
13102        info!("Phase: Generating Compliance Regulations Data");
13103
13104        let cr_config = &self.config.compliance_regulations;
13105
13106        // Determine jurisdictions: from config or inferred from companies
13107        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13108            self.config
13109                .companies
13110                .iter()
13111                .map(|c| c.country.clone())
13112                .collect::<std::collections::HashSet<_>>()
13113                .into_iter()
13114                .collect()
13115        } else {
13116            cr_config.jurisdictions.clone()
13117        };
13118
13119        // Determine reference date
13120        let fallback_date =
13121            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13122        let reference_date = cr_config
13123            .reference_date
13124            .as_ref()
13125            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13126            .unwrap_or_else(|| {
13127                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13128                    .unwrap_or(fallback_date)
13129            });
13130
13131        // Generate standards registry data
13132        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13133        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13134        let cross_reference_records = reg_gen.generate_cross_reference_records();
13135        let jurisdiction_records =
13136            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13137
13138        info!(
13139            "  Standards: {} records, {} cross-references, {} jurisdictions",
13140            standard_records.len(),
13141            cross_reference_records.len(),
13142            jurisdiction_records.len()
13143        );
13144
13145        // Generate audit procedures (if enabled)
13146        let audit_procedures = if cr_config.audit_procedures.enabled {
13147            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13148                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13149                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13150                confidence_level: cr_config.audit_procedures.confidence_level,
13151                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13152            };
13153            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13154                self.seed + 9000,
13155                proc_config,
13156            );
13157            let registry = reg_gen.registry();
13158            let mut all_procs = Vec::new();
13159            for jurisdiction in &jurisdictions {
13160                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13161                all_procs.extend(procs);
13162            }
13163            info!("  Audit procedures: {}", all_procs.len());
13164            all_procs
13165        } else {
13166            Vec::new()
13167        };
13168
13169        // Generate compliance findings (if enabled)
13170        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13171            let finding_config =
13172                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13173                    finding_rate: cr_config.findings.finding_rate,
13174                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13175                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13176                    generate_remediation: cr_config.findings.generate_remediation,
13177                };
13178            let mut finding_gen =
13179                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13180                    self.seed + 9100,
13181                    finding_config,
13182                );
13183            let mut all_findings = Vec::new();
13184            for company in &self.config.companies {
13185                let company_findings =
13186                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13187                all_findings.extend(company_findings);
13188            }
13189            info!("  Compliance findings: {}", all_findings.len());
13190            all_findings
13191        } else {
13192            Vec::new()
13193        };
13194
13195        // Generate regulatory filings (if enabled)
13196        let filings = if cr_config.filings.enabled {
13197            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13198                filing_types: cr_config.filings.filing_types.clone(),
13199                generate_status_progression: cr_config.filings.generate_status_progression,
13200            };
13201            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13202                self.seed + 9200,
13203                filing_config,
13204            );
13205            let company_codes: Vec<String> = self
13206                .config
13207                .companies
13208                .iter()
13209                .map(|c| c.code.clone())
13210                .collect();
13211            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13212                .unwrap_or(fallback_date);
13213            let filings = filing_gen.generate_filings(
13214                &company_codes,
13215                &jurisdictions,
13216                start_date,
13217                self.config.global.period_months,
13218            );
13219            info!("  Regulatory filings: {}", filings.len());
13220            filings
13221        } else {
13222            Vec::new()
13223        };
13224
13225        // Build compliance graph (if enabled)
13226        let compliance_graph = if cr_config.graph.enabled {
13227            let graph_config = datasynth_graph::ComplianceGraphConfig {
13228                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13229                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13230                include_cross_references: cr_config.graph.include_cross_references,
13231                include_supersession_edges: cr_config.graph.include_supersession_edges,
13232                include_account_links: cr_config.graph.include_account_links,
13233                include_control_links: cr_config.graph.include_control_links,
13234                include_company_links: cr_config.graph.include_company_links,
13235            };
13236            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13237
13238            // Add standard nodes
13239            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13240                .iter()
13241                .map(|r| datasynth_graph::StandardNodeInput {
13242                    standard_id: r.standard_id.clone(),
13243                    title: r.title.clone(),
13244                    category: r.category.clone(),
13245                    domain: r.domain.clone(),
13246                    is_active: r.is_active,
13247                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13248                    applicable_account_types: r.applicable_account_types.clone(),
13249                    applicable_processes: r.applicable_processes.clone(),
13250                })
13251                .collect();
13252            builder.add_standards(&standard_inputs);
13253
13254            // Add jurisdiction nodes
13255            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13256                jurisdiction_records
13257                    .iter()
13258                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13259                        country_code: r.country_code.clone(),
13260                        country_name: r.country_name.clone(),
13261                        framework: r.accounting_framework.clone(),
13262                        standard_count: r.standard_count,
13263                        tax_rate: r.statutory_tax_rate,
13264                    })
13265                    .collect();
13266            builder.add_jurisdictions(&jurisdiction_inputs);
13267
13268            // Add cross-reference edges
13269            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13270                cross_reference_records
13271                    .iter()
13272                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13273                        from_standard: r.from_standard.clone(),
13274                        to_standard: r.to_standard.clone(),
13275                        relationship: r.relationship.clone(),
13276                        convergence_level: r.convergence_level,
13277                    })
13278                    .collect();
13279            builder.add_cross_references(&xref_inputs);
13280
13281            // Add jurisdiction→standard mappings
13282            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13283                .iter()
13284                .map(|r| datasynth_graph::JurisdictionMappingInput {
13285                    country_code: r.jurisdiction.clone(),
13286                    standard_id: r.standard_id.clone(),
13287                })
13288                .collect();
13289            builder.add_jurisdiction_mappings(&mapping_inputs);
13290
13291            // Add procedure nodes
13292            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13293                .iter()
13294                .map(|p| datasynth_graph::ProcedureNodeInput {
13295                    procedure_id: p.procedure_id.clone(),
13296                    standard_id: p.standard_id.clone(),
13297                    procedure_type: p.procedure_type.clone(),
13298                    sample_size: p.sample_size,
13299                    confidence_level: p.confidence_level,
13300                })
13301                .collect();
13302            builder.add_procedures(&proc_inputs);
13303
13304            // Add finding nodes
13305            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13306                .iter()
13307                .map(|f| datasynth_graph::FindingNodeInput {
13308                    finding_id: f.finding_id.to_string(),
13309                    standard_id: f
13310                        .related_standards
13311                        .first()
13312                        .map(|s| s.as_str().to_string())
13313                        .unwrap_or_default(),
13314                    severity: f.severity.to_string(),
13315                    deficiency_level: f.deficiency_level.to_string(),
13316                    severity_score: f.deficiency_level.severity_score(),
13317                    control_id: f.control_id.clone(),
13318                    affected_accounts: f.affected_accounts.clone(),
13319                })
13320                .collect();
13321            builder.add_findings(&finding_inputs);
13322
13323            // Cross-domain: link standards to accounts from chart of accounts
13324            if cr_config.graph.include_account_links {
13325                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13326                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13327                for std_record in &standard_records {
13328                    if let Some(std_obj) =
13329                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13330                            &std_record.standard_id,
13331                        ))
13332                    {
13333                        for acct_type in &std_obj.applicable_account_types {
13334                            account_links.push(datasynth_graph::AccountLinkInput {
13335                                standard_id: std_record.standard_id.clone(),
13336                                account_code: acct_type.clone(),
13337                                account_name: acct_type.clone(),
13338                            });
13339                        }
13340                    }
13341                }
13342                builder.add_account_links(&account_links);
13343            }
13344
13345            // Cross-domain: link standards to internal controls
13346            if cr_config.graph.include_control_links {
13347                let mut control_links = Vec::new();
13348                // SOX/PCAOB standards link to all controls
13349                let sox_like_ids: Vec<String> = standard_records
13350                    .iter()
13351                    .filter(|r| {
13352                        r.standard_id.starts_with("SOX")
13353                            || r.standard_id.starts_with("PCAOB-AS-2201")
13354                    })
13355                    .map(|r| r.standard_id.clone())
13356                    .collect();
13357                // Get control IDs from config (C001-C060 standard controls)
13358                let control_ids = [
13359                    ("C001", "Cash Controls"),
13360                    ("C002", "Large Transaction Approval"),
13361                    ("C010", "PO Approval"),
13362                    ("C011", "Three-Way Match"),
13363                    ("C020", "Revenue Recognition"),
13364                    ("C021", "Credit Check"),
13365                    ("C030", "Manual JE Approval"),
13366                    ("C031", "Period Close Review"),
13367                    ("C032", "Account Reconciliation"),
13368                    ("C040", "Payroll Processing"),
13369                    ("C050", "Fixed Asset Capitalization"),
13370                    ("C060", "Intercompany Elimination"),
13371                ];
13372                for sox_id in &sox_like_ids {
13373                    for (ctrl_id, ctrl_name) in &control_ids {
13374                        control_links.push(datasynth_graph::ControlLinkInput {
13375                            standard_id: sox_id.clone(),
13376                            control_id: ctrl_id.to_string(),
13377                            control_name: ctrl_name.to_string(),
13378                        });
13379                    }
13380                }
13381                builder.add_control_links(&control_links);
13382            }
13383
13384            // Cross-domain: filing nodes with company links
13385            if cr_config.graph.include_company_links {
13386                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13387                    .iter()
13388                    .enumerate()
13389                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13390                        filing_id: format!("F{:04}", i + 1),
13391                        filing_type: f.filing_type.to_string(),
13392                        company_code: f.company_code.clone(),
13393                        jurisdiction: f.jurisdiction.clone(),
13394                        status: format!("{:?}", f.status),
13395                    })
13396                    .collect();
13397                builder.add_filings(&filing_inputs);
13398            }
13399
13400            let graph = builder.build();
13401            info!(
13402                "  Compliance graph: {} nodes, {} edges",
13403                graph.nodes.len(),
13404                graph.edges.len()
13405            );
13406            Some(graph)
13407        } else {
13408            None
13409        };
13410
13411        self.check_resources_with_log("post-compliance-regulations")?;
13412
13413        Ok(ComplianceRegulationsSnapshot {
13414            standard_records,
13415            cross_reference_records,
13416            jurisdiction_records,
13417            audit_procedures,
13418            findings,
13419            filings,
13420            compliance_graph,
13421        })
13422    }
13423
13424    /// Build a lineage graph describing config → phase → output relationships.
13425    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13426        use super::lineage::LineageGraphBuilder;
13427
13428        let mut builder = LineageGraphBuilder::new();
13429
13430        // Config sections
13431        builder.add_config_section("config:global", "Global Config");
13432        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13433        builder.add_config_section("config:transactions", "Transaction Config");
13434
13435        // Generator phases
13436        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13437        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13438
13439        // Config → phase edges
13440        builder.configured_by("phase:coa", "config:chart_of_accounts");
13441        builder.configured_by("phase:je", "config:transactions");
13442
13443        // Output files
13444        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13445        builder.produced_by("output:je", "phase:je");
13446
13447        // Optional phases based on config
13448        if self.phase_config.generate_master_data {
13449            builder.add_config_section("config:master_data", "Master Data Config");
13450            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13451            builder.configured_by("phase:master_data", "config:master_data");
13452            builder.input_to("phase:master_data", "phase:je");
13453        }
13454
13455        if self.phase_config.generate_document_flows {
13456            builder.add_config_section("config:document_flows", "Document Flow Config");
13457            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13458            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13459            builder.configured_by("phase:p2p", "config:document_flows");
13460            builder.configured_by("phase:o2c", "config:document_flows");
13461
13462            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13463            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13464            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13465            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13466            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13467
13468            builder.produced_by("output:po", "phase:p2p");
13469            builder.produced_by("output:gr", "phase:p2p");
13470            builder.produced_by("output:vi", "phase:p2p");
13471            builder.produced_by("output:so", "phase:o2c");
13472            builder.produced_by("output:ci", "phase:o2c");
13473        }
13474
13475        if self.phase_config.inject_anomalies {
13476            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13477            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13478            builder.configured_by("phase:anomaly", "config:fraud");
13479            builder.add_output_file(
13480                "output:labels",
13481                "Anomaly Labels",
13482                "labels/anomaly_labels.csv",
13483            );
13484            builder.produced_by("output:labels", "phase:anomaly");
13485        }
13486
13487        if self.phase_config.generate_audit {
13488            builder.add_config_section("config:audit", "Audit Config");
13489            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13490            builder.configured_by("phase:audit", "config:audit");
13491        }
13492
13493        if self.phase_config.generate_banking {
13494            builder.add_config_section("config:banking", "Banking Config");
13495            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13496            builder.configured_by("phase:banking", "config:banking");
13497        }
13498
13499        if self.config.llm.enabled {
13500            builder.add_config_section("config:llm", "LLM Enrichment Config");
13501            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13502            builder.configured_by("phase:llm_enrichment", "config:llm");
13503        }
13504
13505        if self.config.diffusion.enabled {
13506            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13507            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13508            builder.configured_by("phase:diffusion", "config:diffusion");
13509        }
13510
13511        if self.config.causal.enabled {
13512            builder.add_config_section("config:causal", "Causal Generation Config");
13513            builder.add_generator_phase("phase:causal", "Causal Overlay");
13514            builder.configured_by("phase:causal", "config:causal");
13515        }
13516
13517        builder.build()
13518    }
13519
13520    // -----------------------------------------------------------------------
13521    // Trial-balance helpers used to replace hardcoded proxy values
13522    // -----------------------------------------------------------------------
13523
13524    /// Compute total revenue for a company from its journal entries.
13525    ///
13526    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13527    /// net credits on all revenue-account lines filtered to `company_code`.
13528    fn compute_company_revenue(
13529        entries: &[JournalEntry],
13530        company_code: &str,
13531    ) -> rust_decimal::Decimal {
13532        use rust_decimal::Decimal;
13533        let mut revenue = Decimal::ZERO;
13534        for je in entries {
13535            if je.header.company_code != company_code {
13536                continue;
13537            }
13538            for line in &je.lines {
13539                if line.gl_account.starts_with('4') {
13540                    // Revenue is credit-normal
13541                    revenue += line.credit_amount - line.debit_amount;
13542                }
13543            }
13544        }
13545        revenue.max(Decimal::ZERO)
13546    }
13547
13548    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13549    ///
13550    /// Asset accounts start with "1"; liability accounts start with "2".
13551    fn compute_entity_net_assets(
13552        entries: &[JournalEntry],
13553        entity_code: &str,
13554    ) -> rust_decimal::Decimal {
13555        use rust_decimal::Decimal;
13556        let mut asset_net = Decimal::ZERO;
13557        let mut liability_net = Decimal::ZERO;
13558        for je in entries {
13559            if je.header.company_code != entity_code {
13560                continue;
13561            }
13562            for line in &je.lines {
13563                if line.gl_account.starts_with('1') {
13564                    asset_net += line.debit_amount - line.credit_amount;
13565                } else if line.gl_account.starts_with('2') {
13566                    liability_net += line.credit_amount - line.debit_amount;
13567                }
13568            }
13569        }
13570        asset_net - liability_net
13571    }
13572}
13573
13574/// Get the directory name for a graph export format.
13575fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13576    match format {
13577        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13578        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13579        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13580        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13581        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13582    }
13583}
13584
13585/// Aggregate journal entry lines into per-account trial balance rows.
13586///
13587/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13588/// debit/credit totals and a net balance (debit minus credit).
13589fn compute_trial_balance_entries(
13590    entries: &[JournalEntry],
13591    entity_code: &str,
13592    fiscal_year: i32,
13593    coa: Option<&ChartOfAccounts>,
13594) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13595    use std::collections::BTreeMap;
13596
13597    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13598        BTreeMap::new();
13599
13600    for je in entries {
13601        for line in &je.lines {
13602            let entry = balances.entry(line.account_code.clone()).or_default();
13603            entry.0 += line.debit_amount;
13604            entry.1 += line.credit_amount;
13605        }
13606    }
13607
13608    balances
13609        .into_iter()
13610        .map(
13611            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13612                account_description: coa
13613                    .and_then(|c| c.get_account(&account_code))
13614                    .map(|a| a.description().to_string())
13615                    .unwrap_or_else(|| account_code.clone()),
13616                account_code,
13617                debit_balance: debit,
13618                credit_balance: credit,
13619                net_balance: debit - credit,
13620                entity_code: entity_code.to_string(),
13621                period: format!("FY{}", fiscal_year),
13622            },
13623        )
13624        .collect()
13625}
13626
13627#[cfg(test)]
13628#[allow(clippy::unwrap_used)]
13629mod tests {
13630    use super::*;
13631    use datasynth_config::schema::*;
13632
13633    fn create_test_config() -> GeneratorConfig {
13634        GeneratorConfig {
13635            global: GlobalConfig {
13636                industry: IndustrySector::Manufacturing,
13637                start_date: "2024-01-01".to_string(),
13638                period_months: 1,
13639                seed: Some(42),
13640                parallel: false,
13641                group_currency: "USD".to_string(),
13642                presentation_currency: None,
13643                worker_threads: 0,
13644                memory_limit_mb: 0,
13645                fiscal_year_months: None,
13646            },
13647            companies: vec![CompanyConfig {
13648                code: "1000".to_string(),
13649                name: "Test Company".to_string(),
13650                currency: "USD".to_string(),
13651                functional_currency: None,
13652                country: "US".to_string(),
13653                annual_transaction_volume: TransactionVolume::TenK,
13654                volume_weight: 1.0,
13655                fiscal_year_variant: "K4".to_string(),
13656            }],
13657            chart_of_accounts: ChartOfAccountsConfig {
13658                complexity: CoAComplexity::Small,
13659                industry_specific: true,
13660                custom_accounts: None,
13661                min_hierarchy_depth: 2,
13662                max_hierarchy_depth: 4,
13663            },
13664            transactions: TransactionConfig::default(),
13665            output: OutputConfig::default(),
13666            fraud: FraudConfig::default(),
13667            internal_controls: InternalControlsConfig::default(),
13668            business_processes: BusinessProcessConfig::default(),
13669            user_personas: UserPersonaConfig::default(),
13670            templates: TemplateConfig::default(),
13671            approval: ApprovalConfig::default(),
13672            departments: DepartmentConfig::default(),
13673            master_data: MasterDataConfig::default(),
13674            document_flows: DocumentFlowConfig::default(),
13675            intercompany: IntercompanyConfig::default(),
13676            balance: BalanceConfig::default(),
13677            ocpm: OcpmConfig::default(),
13678            audit: AuditGenerationConfig::default(),
13679            banking: datasynth_banking::BankingConfig::default(),
13680            data_quality: DataQualitySchemaConfig::default(),
13681            scenario: ScenarioConfig::default(),
13682            temporal: TemporalDriftConfig::default(),
13683            graph_export: GraphExportConfig::default(),
13684            streaming: StreamingSchemaConfig::default(),
13685            rate_limit: RateLimitSchemaConfig::default(),
13686            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13687            relationships: RelationshipSchemaConfig::default(),
13688            accounting_standards: AccountingStandardsConfig::default(),
13689            audit_standards: AuditStandardsConfig::default(),
13690            distributions: Default::default(),
13691            temporal_patterns: Default::default(),
13692            vendor_network: VendorNetworkSchemaConfig::default(),
13693            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13694            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13695            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13696            organizational_events: OrganizationalEventsSchemaConfig::default(),
13697            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13698            market_drift: MarketDriftSchemaConfig::default(),
13699            drift_labeling: DriftLabelingSchemaConfig::default(),
13700            anomaly_injection: Default::default(),
13701            industry_specific: Default::default(),
13702            fingerprint_privacy: Default::default(),
13703            quality_gates: Default::default(),
13704            compliance: Default::default(),
13705            webhooks: Default::default(),
13706            llm: Default::default(),
13707            diffusion: Default::default(),
13708            causal: Default::default(),
13709            source_to_pay: Default::default(),
13710            financial_reporting: Default::default(),
13711            hr: Default::default(),
13712            manufacturing: Default::default(),
13713            sales_quotes: Default::default(),
13714            tax: Default::default(),
13715            treasury: Default::default(),
13716            project_accounting: Default::default(),
13717            esg: Default::default(),
13718            country_packs: None,
13719            scenarios: Default::default(),
13720            session: Default::default(),
13721            compliance_regulations: Default::default(),
13722        }
13723    }
13724
13725    #[test]
13726    fn test_enhanced_orchestrator_creation() {
13727        let config = create_test_config();
13728        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13729        assert!(orchestrator.is_ok());
13730    }
13731
13732    #[test]
13733    fn test_minimal_generation() {
13734        let config = create_test_config();
13735        let phase_config = PhaseConfig {
13736            generate_master_data: false,
13737            generate_document_flows: false,
13738            generate_journal_entries: true,
13739            inject_anomalies: false,
13740            show_progress: false,
13741            ..Default::default()
13742        };
13743
13744        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13745        let result = orchestrator.generate();
13746
13747        assert!(result.is_ok());
13748        let result = result.unwrap();
13749        assert!(!result.journal_entries.is_empty());
13750    }
13751
13752    #[test]
13753    fn test_master_data_generation() {
13754        let config = create_test_config();
13755        let phase_config = PhaseConfig {
13756            generate_master_data: true,
13757            generate_document_flows: false,
13758            generate_journal_entries: false,
13759            inject_anomalies: false,
13760            show_progress: false,
13761            vendors_per_company: 5,
13762            customers_per_company: 5,
13763            materials_per_company: 10,
13764            assets_per_company: 5,
13765            employees_per_company: 10,
13766            ..Default::default()
13767        };
13768
13769        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13770        let result = orchestrator.generate().unwrap();
13771
13772        assert!(!result.master_data.vendors.is_empty());
13773        assert!(!result.master_data.customers.is_empty());
13774        assert!(!result.master_data.materials.is_empty());
13775    }
13776
13777    #[test]
13778    fn test_document_flow_generation() {
13779        let config = create_test_config();
13780        let phase_config = PhaseConfig {
13781            generate_master_data: true,
13782            generate_document_flows: true,
13783            generate_journal_entries: false,
13784            inject_anomalies: false,
13785            inject_data_quality: false,
13786            validate_balances: false,
13787            generate_ocpm_events: false,
13788            show_progress: false,
13789            vendors_per_company: 5,
13790            customers_per_company: 5,
13791            materials_per_company: 10,
13792            assets_per_company: 5,
13793            employees_per_company: 10,
13794            p2p_chains: 5,
13795            o2c_chains: 5,
13796            ..Default::default()
13797        };
13798
13799        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13800        let result = orchestrator.generate().unwrap();
13801
13802        // Should have generated P2P and O2C chains
13803        assert!(!result.document_flows.p2p_chains.is_empty());
13804        assert!(!result.document_flows.o2c_chains.is_empty());
13805
13806        // Flattened documents should be populated
13807        assert!(!result.document_flows.purchase_orders.is_empty());
13808        assert!(!result.document_flows.sales_orders.is_empty());
13809    }
13810
13811    #[test]
13812    fn test_anomaly_injection() {
13813        let config = create_test_config();
13814        let phase_config = PhaseConfig {
13815            generate_master_data: false,
13816            generate_document_flows: false,
13817            generate_journal_entries: true,
13818            inject_anomalies: true,
13819            show_progress: false,
13820            ..Default::default()
13821        };
13822
13823        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13824        let result = orchestrator.generate().unwrap();
13825
13826        // Should have journal entries
13827        assert!(!result.journal_entries.is_empty());
13828
13829        // With ~833 entries and 2% rate, expect some anomalies
13830        // Note: This is probabilistic, so we just verify the structure exists
13831        assert!(result.anomaly_labels.summary.is_some());
13832    }
13833
13834    #[test]
13835    fn test_full_generation_pipeline() {
13836        let config = create_test_config();
13837        let phase_config = PhaseConfig {
13838            generate_master_data: true,
13839            generate_document_flows: true,
13840            generate_journal_entries: true,
13841            inject_anomalies: false,
13842            inject_data_quality: false,
13843            validate_balances: true,
13844            generate_ocpm_events: false,
13845            show_progress: false,
13846            vendors_per_company: 3,
13847            customers_per_company: 3,
13848            materials_per_company: 5,
13849            assets_per_company: 3,
13850            employees_per_company: 5,
13851            p2p_chains: 3,
13852            o2c_chains: 3,
13853            ..Default::default()
13854        };
13855
13856        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13857        let result = orchestrator.generate().unwrap();
13858
13859        // All phases should have results
13860        assert!(!result.master_data.vendors.is_empty());
13861        assert!(!result.master_data.customers.is_empty());
13862        assert!(!result.document_flows.p2p_chains.is_empty());
13863        assert!(!result.document_flows.o2c_chains.is_empty());
13864        assert!(!result.journal_entries.is_empty());
13865        assert!(result.statistics.accounts_count > 0);
13866
13867        // Subledger linking should have run
13868        assert!(!result.subledger.ap_invoices.is_empty());
13869        assert!(!result.subledger.ar_invoices.is_empty());
13870
13871        // Balance validation should have run
13872        assert!(result.balance_validation.validated);
13873        assert!(result.balance_validation.entries_processed > 0);
13874    }
13875
13876    #[test]
13877    fn test_subledger_linking() {
13878        let config = create_test_config();
13879        let phase_config = PhaseConfig {
13880            generate_master_data: true,
13881            generate_document_flows: true,
13882            generate_journal_entries: false,
13883            inject_anomalies: false,
13884            inject_data_quality: false,
13885            validate_balances: false,
13886            generate_ocpm_events: false,
13887            show_progress: false,
13888            vendors_per_company: 5,
13889            customers_per_company: 5,
13890            materials_per_company: 10,
13891            assets_per_company: 3,
13892            employees_per_company: 5,
13893            p2p_chains: 5,
13894            o2c_chains: 5,
13895            ..Default::default()
13896        };
13897
13898        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13899        let result = orchestrator.generate().unwrap();
13900
13901        // Should have document flows
13902        assert!(!result.document_flows.vendor_invoices.is_empty());
13903        assert!(!result.document_flows.customer_invoices.is_empty());
13904
13905        // Subledger should be linked from document flows
13906        assert!(!result.subledger.ap_invoices.is_empty());
13907        assert!(!result.subledger.ar_invoices.is_empty());
13908
13909        // AP invoices count should match vendor invoices count
13910        assert_eq!(
13911            result.subledger.ap_invoices.len(),
13912            result.document_flows.vendor_invoices.len()
13913        );
13914
13915        // AR invoices count should match customer invoices count
13916        assert_eq!(
13917            result.subledger.ar_invoices.len(),
13918            result.document_flows.customer_invoices.len()
13919        );
13920
13921        // Statistics should reflect subledger counts
13922        assert_eq!(
13923            result.statistics.ap_invoice_count,
13924            result.subledger.ap_invoices.len()
13925        );
13926        assert_eq!(
13927            result.statistics.ar_invoice_count,
13928            result.subledger.ar_invoices.len()
13929        );
13930    }
13931
13932    #[test]
13933    fn test_balance_validation() {
13934        let config = create_test_config();
13935        let phase_config = PhaseConfig {
13936            generate_master_data: false,
13937            generate_document_flows: false,
13938            generate_journal_entries: true,
13939            inject_anomalies: false,
13940            validate_balances: true,
13941            show_progress: false,
13942            ..Default::default()
13943        };
13944
13945        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13946        let result = orchestrator.generate().unwrap();
13947
13948        // Balance validation should run
13949        assert!(result.balance_validation.validated);
13950        assert!(result.balance_validation.entries_processed > 0);
13951
13952        // Generated JEs should be balanced (no unbalanced entries)
13953        assert!(!result.balance_validation.has_unbalanced_entries);
13954
13955        // Total debits should equal total credits
13956        assert_eq!(
13957            result.balance_validation.total_debits,
13958            result.balance_validation.total_credits
13959        );
13960    }
13961
13962    #[test]
13963    fn test_statistics_accuracy() {
13964        let config = create_test_config();
13965        let phase_config = PhaseConfig {
13966            generate_master_data: true,
13967            generate_document_flows: false,
13968            generate_journal_entries: true,
13969            inject_anomalies: false,
13970            show_progress: false,
13971            vendors_per_company: 10,
13972            customers_per_company: 20,
13973            materials_per_company: 15,
13974            assets_per_company: 5,
13975            employees_per_company: 8,
13976            ..Default::default()
13977        };
13978
13979        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13980        let result = orchestrator.generate().unwrap();
13981
13982        // Statistics should match actual data
13983        assert_eq!(
13984            result.statistics.vendor_count,
13985            result.master_data.vendors.len()
13986        );
13987        assert_eq!(
13988            result.statistics.customer_count,
13989            result.master_data.customers.len()
13990        );
13991        assert_eq!(
13992            result.statistics.material_count,
13993            result.master_data.materials.len()
13994        );
13995        assert_eq!(
13996            result.statistics.total_entries as usize,
13997            result.journal_entries.len()
13998        );
13999    }
14000
14001    #[test]
14002    fn test_phase_config_defaults() {
14003        let config = PhaseConfig::default();
14004        assert!(config.generate_master_data);
14005        assert!(config.generate_document_flows);
14006        assert!(config.generate_journal_entries);
14007        assert!(!config.inject_anomalies);
14008        assert!(config.validate_balances);
14009        assert!(config.show_progress);
14010        assert!(config.vendors_per_company > 0);
14011        assert!(config.customers_per_company > 0);
14012    }
14013
14014    #[test]
14015    fn test_get_coa_before_generation() {
14016        let config = create_test_config();
14017        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14018
14019        // Before generation, CoA should be None
14020        assert!(orchestrator.get_coa().is_none());
14021    }
14022
14023    #[test]
14024    fn test_get_coa_after_generation() {
14025        let config = create_test_config();
14026        let phase_config = PhaseConfig {
14027            generate_master_data: false,
14028            generate_document_flows: false,
14029            generate_journal_entries: true,
14030            inject_anomalies: false,
14031            show_progress: false,
14032            ..Default::default()
14033        };
14034
14035        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14036        let _ = orchestrator.generate().unwrap();
14037
14038        // After generation, CoA should be available
14039        assert!(orchestrator.get_coa().is_some());
14040    }
14041
14042    #[test]
14043    fn test_get_master_data() {
14044        let config = create_test_config();
14045        let phase_config = PhaseConfig {
14046            generate_master_data: true,
14047            generate_document_flows: false,
14048            generate_journal_entries: false,
14049            inject_anomalies: false,
14050            show_progress: false,
14051            vendors_per_company: 5,
14052            customers_per_company: 5,
14053            materials_per_company: 5,
14054            assets_per_company: 5,
14055            employees_per_company: 5,
14056            ..Default::default()
14057        };
14058
14059        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14060        let result = orchestrator.generate().unwrap();
14061
14062        // After generate(), master_data is moved into the result
14063        assert!(!result.master_data.vendors.is_empty());
14064    }
14065
14066    #[test]
14067    fn test_with_progress_builder() {
14068        let config = create_test_config();
14069        let orchestrator = EnhancedOrchestrator::with_defaults(config)
14070            .unwrap()
14071            .with_progress(false);
14072
14073        // Should still work without progress
14074        assert!(!orchestrator.phase_config.show_progress);
14075    }
14076
14077    #[test]
14078    fn test_multi_company_generation() {
14079        let mut config = create_test_config();
14080        config.companies.push(CompanyConfig {
14081            code: "2000".to_string(),
14082            name: "Subsidiary".to_string(),
14083            currency: "EUR".to_string(),
14084            functional_currency: None,
14085            country: "DE".to_string(),
14086            annual_transaction_volume: TransactionVolume::TenK,
14087            volume_weight: 0.5,
14088            fiscal_year_variant: "K4".to_string(),
14089        });
14090
14091        let phase_config = PhaseConfig {
14092            generate_master_data: true,
14093            generate_document_flows: false,
14094            generate_journal_entries: true,
14095            inject_anomalies: false,
14096            show_progress: false,
14097            vendors_per_company: 5,
14098            customers_per_company: 5,
14099            materials_per_company: 5,
14100            assets_per_company: 5,
14101            employees_per_company: 5,
14102            ..Default::default()
14103        };
14104
14105        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14106        let result = orchestrator.generate().unwrap();
14107
14108        // Should have master data for both companies
14109        assert!(result.statistics.vendor_count >= 10); // 5 per company
14110        assert!(result.statistics.customer_count >= 10);
14111        assert!(result.statistics.companies_count == 2);
14112    }
14113
14114    #[test]
14115    fn test_empty_master_data_skips_document_flows() {
14116        let config = create_test_config();
14117        let phase_config = PhaseConfig {
14118            generate_master_data: false,   // Skip master data
14119            generate_document_flows: true, // Try to generate flows
14120            generate_journal_entries: false,
14121            inject_anomalies: false,
14122            show_progress: false,
14123            ..Default::default()
14124        };
14125
14126        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127        let result = orchestrator.generate().unwrap();
14128
14129        // Without master data, document flows should be empty
14130        assert!(result.document_flows.p2p_chains.is_empty());
14131        assert!(result.document_flows.o2c_chains.is_empty());
14132    }
14133
14134    #[test]
14135    fn test_journal_entry_line_item_count() {
14136        let config = create_test_config();
14137        let phase_config = PhaseConfig {
14138            generate_master_data: false,
14139            generate_document_flows: false,
14140            generate_journal_entries: true,
14141            inject_anomalies: false,
14142            show_progress: false,
14143            ..Default::default()
14144        };
14145
14146        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14147        let result = orchestrator.generate().unwrap();
14148
14149        // Total line items should match sum of all entry line counts
14150        let calculated_line_items: u64 = result
14151            .journal_entries
14152            .iter()
14153            .map(|e| e.line_count() as u64)
14154            .sum();
14155        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14156    }
14157
14158    #[test]
14159    fn test_audit_generation() {
14160        let config = create_test_config();
14161        let phase_config = PhaseConfig {
14162            generate_master_data: false,
14163            generate_document_flows: false,
14164            generate_journal_entries: true,
14165            inject_anomalies: false,
14166            show_progress: false,
14167            generate_audit: true,
14168            audit_engagements: 2,
14169            workpapers_per_engagement: 5,
14170            evidence_per_workpaper: 2,
14171            risks_per_engagement: 3,
14172            findings_per_engagement: 2,
14173            judgments_per_engagement: 2,
14174            ..Default::default()
14175        };
14176
14177        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14178        let result = orchestrator.generate().unwrap();
14179
14180        // Should have generated audit data
14181        assert_eq!(result.audit.engagements.len(), 2);
14182        assert!(!result.audit.workpapers.is_empty());
14183        assert!(!result.audit.evidence.is_empty());
14184        assert!(!result.audit.risk_assessments.is_empty());
14185        assert!(!result.audit.findings.is_empty());
14186        assert!(!result.audit.judgments.is_empty());
14187
14188        // New ISA entity collections should also be populated
14189        assert!(
14190            !result.audit.confirmations.is_empty(),
14191            "ISA 505 confirmations should be generated"
14192        );
14193        assert!(
14194            !result.audit.confirmation_responses.is_empty(),
14195            "ISA 505 confirmation responses should be generated"
14196        );
14197        assert!(
14198            !result.audit.procedure_steps.is_empty(),
14199            "ISA 330 procedure steps should be generated"
14200        );
14201        // Samples may or may not be generated depending on workpaper sampling methods
14202        assert!(
14203            !result.audit.analytical_results.is_empty(),
14204            "ISA 520 analytical procedures should be generated"
14205        );
14206        assert!(
14207            !result.audit.ia_functions.is_empty(),
14208            "ISA 610 IA functions should be generated (one per engagement)"
14209        );
14210        assert!(
14211            !result.audit.related_parties.is_empty(),
14212            "ISA 550 related parties should be generated"
14213        );
14214
14215        // Statistics should match
14216        assert_eq!(
14217            result.statistics.audit_engagement_count,
14218            result.audit.engagements.len()
14219        );
14220        assert_eq!(
14221            result.statistics.audit_workpaper_count,
14222            result.audit.workpapers.len()
14223        );
14224        assert_eq!(
14225            result.statistics.audit_evidence_count,
14226            result.audit.evidence.len()
14227        );
14228        assert_eq!(
14229            result.statistics.audit_risk_count,
14230            result.audit.risk_assessments.len()
14231        );
14232        assert_eq!(
14233            result.statistics.audit_finding_count,
14234            result.audit.findings.len()
14235        );
14236        assert_eq!(
14237            result.statistics.audit_judgment_count,
14238            result.audit.judgments.len()
14239        );
14240        assert_eq!(
14241            result.statistics.audit_confirmation_count,
14242            result.audit.confirmations.len()
14243        );
14244        assert_eq!(
14245            result.statistics.audit_confirmation_response_count,
14246            result.audit.confirmation_responses.len()
14247        );
14248        assert_eq!(
14249            result.statistics.audit_procedure_step_count,
14250            result.audit.procedure_steps.len()
14251        );
14252        assert_eq!(
14253            result.statistics.audit_sample_count,
14254            result.audit.samples.len()
14255        );
14256        assert_eq!(
14257            result.statistics.audit_analytical_result_count,
14258            result.audit.analytical_results.len()
14259        );
14260        assert_eq!(
14261            result.statistics.audit_ia_function_count,
14262            result.audit.ia_functions.len()
14263        );
14264        assert_eq!(
14265            result.statistics.audit_ia_report_count,
14266            result.audit.ia_reports.len()
14267        );
14268        assert_eq!(
14269            result.statistics.audit_related_party_count,
14270            result.audit.related_parties.len()
14271        );
14272        assert_eq!(
14273            result.statistics.audit_related_party_transaction_count,
14274            result.audit.related_party_transactions.len()
14275        );
14276    }
14277
14278    #[test]
14279    fn test_new_phases_disabled_by_default() {
14280        let config = create_test_config();
14281        // Verify new config fields default to disabled
14282        assert!(!config.llm.enabled);
14283        assert!(!config.diffusion.enabled);
14284        assert!(!config.causal.enabled);
14285
14286        let phase_config = PhaseConfig {
14287            generate_master_data: false,
14288            generate_document_flows: false,
14289            generate_journal_entries: true,
14290            inject_anomalies: false,
14291            show_progress: false,
14292            ..Default::default()
14293        };
14294
14295        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14296        let result = orchestrator.generate().unwrap();
14297
14298        // All new phase statistics should be zero when disabled
14299        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14300        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14301        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14302        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14303        assert_eq!(result.statistics.causal_generation_ms, 0);
14304        assert_eq!(result.statistics.causal_samples_generated, 0);
14305        assert!(result.statistics.causal_validation_passed.is_none());
14306        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14307        assert!(result.counterfactual_pairs.is_empty());
14308    }
14309
14310    #[test]
14311    fn test_counterfactual_generation_enabled() {
14312        let config = create_test_config();
14313        let phase_config = PhaseConfig {
14314            generate_master_data: false,
14315            generate_document_flows: false,
14316            generate_journal_entries: true,
14317            inject_anomalies: false,
14318            show_progress: false,
14319            generate_counterfactuals: true,
14320            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14321            ..Default::default()
14322        };
14323
14324        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14325        let result = orchestrator.generate().unwrap();
14326
14327        // With JE generation enabled, counterfactual pairs should be generated
14328        if !result.journal_entries.is_empty() {
14329            assert_eq!(
14330                result.counterfactual_pairs.len(),
14331                result.journal_entries.len()
14332            );
14333            assert_eq!(
14334                result.statistics.counterfactual_pair_count,
14335                result.journal_entries.len()
14336            );
14337            // Each pair should have a distinct pair_id
14338            let ids: std::collections::HashSet<_> = result
14339                .counterfactual_pairs
14340                .iter()
14341                .map(|p| p.pair_id.clone())
14342                .collect();
14343            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14344        }
14345    }
14346
14347    #[test]
14348    fn test_llm_enrichment_enabled() {
14349        let mut config = create_test_config();
14350        config.llm.enabled = true;
14351        config.llm.max_vendor_enrichments = 3;
14352
14353        let phase_config = PhaseConfig {
14354            generate_master_data: true,
14355            generate_document_flows: false,
14356            generate_journal_entries: false,
14357            inject_anomalies: false,
14358            show_progress: false,
14359            vendors_per_company: 5,
14360            customers_per_company: 3,
14361            materials_per_company: 3,
14362            assets_per_company: 3,
14363            employees_per_company: 3,
14364            ..Default::default()
14365        };
14366
14367        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14368        let result = orchestrator.generate().unwrap();
14369
14370        // LLM enrichment should have run
14371        assert!(result.statistics.llm_vendors_enriched > 0);
14372        assert!(result.statistics.llm_vendors_enriched <= 3);
14373    }
14374
14375    #[test]
14376    fn test_diffusion_enhancement_enabled() {
14377        let mut config = create_test_config();
14378        config.diffusion.enabled = true;
14379        config.diffusion.n_steps = 50;
14380        config.diffusion.sample_size = 20;
14381
14382        let phase_config = PhaseConfig {
14383            generate_master_data: false,
14384            generate_document_flows: false,
14385            generate_journal_entries: true,
14386            inject_anomalies: false,
14387            show_progress: false,
14388            ..Default::default()
14389        };
14390
14391        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14392        let result = orchestrator.generate().unwrap();
14393
14394        // Diffusion phase should have generated samples
14395        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14396    }
14397
14398    #[test]
14399    fn test_causal_overlay_enabled() {
14400        let mut config = create_test_config();
14401        config.causal.enabled = true;
14402        config.causal.template = "fraud_detection".to_string();
14403        config.causal.sample_size = 100;
14404        config.causal.validate = true;
14405
14406        let phase_config = PhaseConfig {
14407            generate_master_data: false,
14408            generate_document_flows: false,
14409            generate_journal_entries: true,
14410            inject_anomalies: false,
14411            show_progress: false,
14412            ..Default::default()
14413        };
14414
14415        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14416        let result = orchestrator.generate().unwrap();
14417
14418        // Causal phase should have generated samples
14419        assert_eq!(result.statistics.causal_samples_generated, 100);
14420        // Validation should have run
14421        assert!(result.statistics.causal_validation_passed.is_some());
14422    }
14423
14424    #[test]
14425    fn test_causal_overlay_revenue_cycle_template() {
14426        let mut config = create_test_config();
14427        config.causal.enabled = true;
14428        config.causal.template = "revenue_cycle".to_string();
14429        config.causal.sample_size = 50;
14430        config.causal.validate = false;
14431
14432        let phase_config = PhaseConfig {
14433            generate_master_data: false,
14434            generate_document_flows: false,
14435            generate_journal_entries: true,
14436            inject_anomalies: false,
14437            show_progress: false,
14438            ..Default::default()
14439        };
14440
14441        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14442        let result = orchestrator.generate().unwrap();
14443
14444        // Causal phase should have generated samples
14445        assert_eq!(result.statistics.causal_samples_generated, 50);
14446        // Validation was disabled
14447        assert!(result.statistics.causal_validation_passed.is_none());
14448    }
14449
14450    #[test]
14451    fn test_all_new_phases_enabled_together() {
14452        let mut config = create_test_config();
14453        config.llm.enabled = true;
14454        config.llm.max_vendor_enrichments = 2;
14455        config.diffusion.enabled = true;
14456        config.diffusion.n_steps = 20;
14457        config.diffusion.sample_size = 10;
14458        config.causal.enabled = true;
14459        config.causal.sample_size = 50;
14460        config.causal.validate = true;
14461
14462        let phase_config = PhaseConfig {
14463            generate_master_data: true,
14464            generate_document_flows: false,
14465            generate_journal_entries: true,
14466            inject_anomalies: false,
14467            show_progress: false,
14468            vendors_per_company: 5,
14469            customers_per_company: 3,
14470            materials_per_company: 3,
14471            assets_per_company: 3,
14472            employees_per_company: 3,
14473            ..Default::default()
14474        };
14475
14476        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14477        let result = orchestrator.generate().unwrap();
14478
14479        // All three phases should have run
14480        assert!(result.statistics.llm_vendors_enriched > 0);
14481        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14482        assert_eq!(result.statistics.causal_samples_generated, 50);
14483        assert!(result.statistics.causal_validation_passed.is_some());
14484    }
14485
14486    #[test]
14487    fn test_statistics_serialization_with_new_fields() {
14488        let stats = EnhancedGenerationStatistics {
14489            total_entries: 100,
14490            total_line_items: 500,
14491            llm_enrichment_ms: 42,
14492            llm_vendors_enriched: 10,
14493            diffusion_enhancement_ms: 100,
14494            diffusion_samples_generated: 50,
14495            causal_generation_ms: 200,
14496            causal_samples_generated: 100,
14497            causal_validation_passed: Some(true),
14498            ..Default::default()
14499        };
14500
14501        let json = serde_json::to_string(&stats).unwrap();
14502        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14503
14504        assert_eq!(deserialized.llm_enrichment_ms, 42);
14505        assert_eq!(deserialized.llm_vendors_enriched, 10);
14506        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14507        assert_eq!(deserialized.diffusion_samples_generated, 50);
14508        assert_eq!(deserialized.causal_generation_ms, 200);
14509        assert_eq!(deserialized.causal_samples_generated, 100);
14510        assert_eq!(deserialized.causal_validation_passed, Some(true));
14511    }
14512
14513    #[test]
14514    fn test_statistics_backward_compat_deserialization() {
14515        // Old JSON without the new fields should still deserialize
14516        let old_json = r#"{
14517            "total_entries": 100,
14518            "total_line_items": 500,
14519            "accounts_count": 50,
14520            "companies_count": 1,
14521            "period_months": 12,
14522            "vendor_count": 10,
14523            "customer_count": 20,
14524            "material_count": 15,
14525            "asset_count": 5,
14526            "employee_count": 8,
14527            "p2p_chain_count": 5,
14528            "o2c_chain_count": 5,
14529            "ap_invoice_count": 5,
14530            "ar_invoice_count": 5,
14531            "ocpm_event_count": 0,
14532            "ocpm_object_count": 0,
14533            "ocpm_case_count": 0,
14534            "audit_engagement_count": 0,
14535            "audit_workpaper_count": 0,
14536            "audit_evidence_count": 0,
14537            "audit_risk_count": 0,
14538            "audit_finding_count": 0,
14539            "audit_judgment_count": 0,
14540            "anomalies_injected": 0,
14541            "data_quality_issues": 0,
14542            "banking_customer_count": 0,
14543            "banking_account_count": 0,
14544            "banking_transaction_count": 0,
14545            "banking_suspicious_count": 0,
14546            "graph_export_count": 0,
14547            "graph_node_count": 0,
14548            "graph_edge_count": 0
14549        }"#;
14550
14551        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14552
14553        // New fields should default to 0 / None
14554        assert_eq!(stats.llm_enrichment_ms, 0);
14555        assert_eq!(stats.llm_vendors_enriched, 0);
14556        assert_eq!(stats.diffusion_enhancement_ms, 0);
14557        assert_eq!(stats.diffusion_samples_generated, 0);
14558        assert_eq!(stats.causal_generation_ms, 0);
14559        assert_eq!(stats.causal_samples_generated, 0);
14560        assert!(stats.causal_validation_passed.is_none());
14561    }
14562}