Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output (builder pattern).
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Set a streaming phase sink on an existing orchestrator.
1497    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498        self.phase_sink = Some(sink);
1499    }
1500
1501    /// Emit a batch of items to the phase sink (if configured).
1502    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503        if let Some(ref sink) = self.phase_sink {
1504            for item in items {
1505                if let Ok(value) = serde_json::to_value(item) {
1506                    if let Err(e) = sink.emit(phase, type_name, &value) {
1507                        warn!(
1508                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509                        );
1510                    }
1511                }
1512            }
1513            if let Err(e) = sink.phase_complete(phase) {
1514                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515            }
1516        }
1517    }
1518
1519    /// Enable/disable progress bars.
1520    pub fn with_progress(mut self, show: bool) -> Self {
1521        self.phase_config.show_progress = show;
1522        if show {
1523            self.multi_progress = Some(MultiProgress::new());
1524        }
1525        self
1526    }
1527
1528    /// Set the output path for disk space monitoring.
1529    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530        let path = path.into();
1531        self.output_path = Some(path.clone());
1532        // Rebuild resource guard with the output path
1533        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534        self
1535    }
1536
1537    /// Access the country pack registry.
1538    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539        &self.country_pack_registry
1540    }
1541
1542    /// Look up a country pack by country code string.
1543    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544        self.country_pack_registry.get_by_str(country)
1545    }
1546
1547    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1548    /// company, defaulting to `"US"` if no companies are configured.
1549    fn primary_country_code(&self) -> &str {
1550        self.config
1551            .companies
1552            .first()
1553            .map(|c| c.country.as_str())
1554            .unwrap_or("US")
1555    }
1556
1557    /// Resolve the country pack for the primary (first) company.
1558    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559        self.country_pack_for(self.primary_country_code())
1560    }
1561
1562    /// Resolve the CoA framework from config/country-pack.
1563    fn resolve_coa_framework(&self) -> CoAFramework {
1564        if self.config.accounting_standards.enabled {
1565            match self.config.accounting_standards.framework {
1566                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567                    return CoAFramework::FrenchPcg;
1568                }
1569                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570                    return CoAFramework::GermanSkr04;
1571                }
1572                _ => {}
1573            }
1574        }
1575        // Fallback: derive from country pack
1576        let pack = self.primary_pack();
1577        match pack.accounting.framework.as_str() {
1578            "french_gaap" => CoAFramework::FrenchPcg,
1579            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580            _ => CoAFramework::UsGaap,
1581        }
1582    }
1583
1584    /// Check if copula generators are available.
1585    ///
1586    /// Returns true if the orchestrator has copula generators for preserving
1587    /// correlations (typically from fingerprint-based generation).
1588    pub fn has_copulas(&self) -> bool {
1589        !self.copula_generators.is_empty()
1590    }
1591
1592    /// Get the copula generators.
1593    ///
1594    /// Returns a reference to the copula generators for use during generation.
1595    /// These can be used to generate correlated samples that preserve the
1596    /// statistical relationships from the source data.
1597    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598        &self.copula_generators
1599    }
1600
1601    /// Get a mutable reference to the copula generators.
1602    ///
1603    /// Allows generators to sample from copulas during data generation.
1604    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605        &mut self.copula_generators
1606    }
1607
1608    /// Sample correlated values from a named copula.
1609    ///
1610    /// Returns None if the copula doesn't exist.
1611    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612        self.copula_generators
1613            .iter_mut()
1614            .find(|c| c.name == copula_name)
1615            .map(|c| c.generator.sample())
1616    }
1617
1618    /// Create an orchestrator from a fingerprint file.
1619    ///
1620    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1621    /// and creates an orchestrator configured to generate data matching
1622    /// the statistical properties of the original data.
1623    ///
1624    /// # Arguments
1625    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1626    /// * `phase_config` - Phase configuration for generation
1627    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1628    ///
1629    /// # Example
1630    /// ```no_run
1631    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1632    /// use std::path::Path;
1633    ///
1634    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1635    ///     Path::new("fingerprint.dsf"),
1636    ///     PhaseConfig::default(),
1637    ///     1.0,
1638    /// ).unwrap();
1639    /// ```
1640    pub fn from_fingerprint(
1641        fingerprint_path: &std::path::Path,
1642        phase_config: PhaseConfig,
1643        scale: f64,
1644    ) -> SynthResult<Self> {
1645        info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647        // Read the fingerprint
1648        let reader = FingerprintReader::new();
1649        let fingerprint = reader
1650            .read_from_file(fingerprint_path)
1651            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654    }
1655
1656    /// Create an orchestrator from a loaded fingerprint.
1657    ///
1658    /// # Arguments
1659    /// * `fingerprint` - The loaded fingerprint
1660    /// * `phase_config` - Phase configuration for generation
1661    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1662    pub fn from_fingerprint_data(
1663        fingerprint: Fingerprint,
1664        phase_config: PhaseConfig,
1665        scale: f64,
1666    ) -> SynthResult<Self> {
1667        info!(
1668            "Synthesizing config from fingerprint (version: {}, tables: {})",
1669            fingerprint.manifest.version,
1670            fingerprint.schema.tables.len()
1671        );
1672
1673        // Generate a seed for the synthesis
1674        let seed: u64 = rand::random();
1675        info!("Fingerprint synthesis seed: {}", seed);
1676
1677        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1678        let options = SynthesisOptions {
1679            scale,
1680            seed: Some(seed),
1681            preserve_correlations: true,
1682            inject_anomalies: true,
1683        };
1684        let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686        // Synthesize full result including copula generators
1687        let synthesis_result = synthesizer
1688            .synthesize_full(&fingerprint, seed)
1689            .map_err(|e| {
1690                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691            })?;
1692
1693        // Start with a base config from the fingerprint's industry if available
1694        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695            Self::base_config_for_industry(industry)
1696        } else {
1697            Self::base_config_for_industry("manufacturing")
1698        };
1699
1700        // Apply the synthesized patches
1701        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703        // Log synthesis results
1704        info!(
1705            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706            fingerprint.schema.tables.len(),
1707            scale,
1708            synthesis_result.copula_generators.len()
1709        );
1710
1711        if !synthesis_result.copula_generators.is_empty() {
1712            for spec in &synthesis_result.copula_generators {
1713                info!(
1714                    "  Copula '{}' for table '{}': {} columns",
1715                    spec.name,
1716                    spec.table,
1717                    spec.columns.len()
1718                );
1719            }
1720        }
1721
1722        // Create the orchestrator with the synthesized config
1723        let mut orchestrator = Self::new(config, phase_config)?;
1724
1725        // Store copula generators for use during generation
1726        orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728        Ok(orchestrator)
1729    }
1730
1731    /// Create a base config for a given industry.
1732    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733        use datasynth_config::presets::create_preset;
1734        use datasynth_config::TransactionVolume;
1735        use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737        let sector = match industry.to_lowercase().as_str() {
1738            "manufacturing" => IndustrySector::Manufacturing,
1739            "retail" => IndustrySector::Retail,
1740            "financial" | "financial_services" => IndustrySector::FinancialServices,
1741            "healthcare" => IndustrySector::Healthcare,
1742            "technology" | "tech" => IndustrySector::Technology,
1743            _ => IndustrySector::Manufacturing,
1744        };
1745
1746        // Create a preset with reasonable defaults
1747        create_preset(
1748            sector,
1749            1,  // company count
1750            12, // period months
1751            CoAComplexity::Medium,
1752            TransactionVolume::TenK,
1753        )
1754    }
1755
1756    /// Apply a config patch to a GeneratorConfig.
1757    fn apply_config_patch(
1758        mut config: GeneratorConfig,
1759        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760    ) -> GeneratorConfig {
1761        use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763        for (key, value) in patch.values() {
1764            match (key.as_str(), value) {
1765                // Transaction count is handled via TransactionVolume enum on companies
1766                // Log it but cannot directly set it (would need to modify company volumes)
1767                ("transactions.count", ConfigValue::Integer(n)) => {
1768                    info!(
1769                        "Fingerprint suggests {} transactions (apply via company volumes)",
1770                        n
1771                    );
1772                }
1773                ("global.period_months", ConfigValue::Integer(n)) => {
1774                    config.global.period_months = (*n).clamp(1, 120) as u32;
1775                }
1776                ("global.start_date", ConfigValue::String(s)) => {
1777                    config.global.start_date = s.clone();
1778                }
1779                ("global.seed", ConfigValue::Integer(n)) => {
1780                    config.global.seed = Some(*n as u64);
1781                }
1782                ("fraud.enabled", ConfigValue::Bool(b)) => {
1783                    config.fraud.enabled = *b;
1784                }
1785                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786                    config.fraud.fraud_rate = *f;
1787                }
1788                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789                    config.data_quality.enabled = *b;
1790                }
1791                // Handle anomaly injection paths (mapped to fraud config)
1792                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                _ => {
1799                    debug!("Ignoring unknown config patch key: {}", key);
1800                }
1801            }
1802        }
1803
1804        config
1805    }
1806
1807    /// Build a resource guard from the configuration.
1808    fn build_resource_guard(
1809        config: &GeneratorConfig,
1810        output_path: Option<PathBuf>,
1811    ) -> ResourceGuard {
1812        let mut builder = ResourceGuardBuilder::new();
1813
1814        // Configure memory limit if set
1815        if config.global.memory_limit_mb > 0 {
1816            builder = builder.memory_limit(config.global.memory_limit_mb);
1817        }
1818
1819        // Configure disk monitoring for output path
1820        if let Some(path) = output_path {
1821            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1822        }
1823
1824        // Use conservative degradation settings for production safety
1825        builder = builder.conservative();
1826
1827        builder.build()
1828    }
1829
1830    /// Check resources (memory, disk, CPU) and return degradation level.
1831    ///
1832    /// Returns an error if hard limits are exceeded.
1833    /// Returns Ok(DegradationLevel) indicating current resource state.
1834    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835        self.resource_guard.check()
1836    }
1837
1838    /// Check resources with logging.
1839    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840        let level = self.resource_guard.check()?;
1841
1842        if level != DegradationLevel::Normal {
1843            warn!(
1844                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845                phase,
1846                level,
1847                self.resource_guard.current_memory_mb(),
1848                self.resource_guard.available_disk_mb()
1849            );
1850        }
1851
1852        Ok(level)
1853    }
1854
1855    /// Get current degradation actions based on resource state.
1856    fn get_degradation_actions(&self) -> DegradationActions {
1857        self.resource_guard.get_actions()
1858    }
1859
1860    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1861    fn check_memory_limit(&self) -> SynthResult<()> {
1862        self.check_resources()?;
1863        Ok(())
1864    }
1865
1866    /// Run the complete generation workflow.
1867    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868        info!("Starting enhanced generation workflow");
1869        info!(
1870            "Config: industry={:?}, period_months={}, companies={}",
1871            self.config.global.industry,
1872            self.config.global.period_months,
1873            self.config.companies.len()
1874        );
1875
1876        // Set decimal serialization mode (thread-local, affects JSON output).
1877        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1878        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879        datasynth_core::serde_decimal::set_numeric_native(is_native);
1880        struct NumericModeGuard;
1881        impl Drop for NumericModeGuard {
1882            fn drop(&mut self) {
1883                datasynth_core::serde_decimal::set_numeric_native(false);
1884            }
1885        }
1886        let _numeric_guard = if is_native {
1887            Some(NumericModeGuard)
1888        } else {
1889            None
1890        };
1891
1892        // Initial resource check before starting
1893        let initial_level = self.check_resources_with_log("initial")?;
1894        if initial_level == DegradationLevel::Emergency {
1895            return Err(SynthError::resource(
1896                "Insufficient resources to start generation",
1897            ));
1898        }
1899
1900        let mut stats = EnhancedGenerationStatistics {
1901            companies_count: self.config.companies.len(),
1902            period_months: self.config.global.period_months,
1903            ..Default::default()
1904        };
1905
1906        // Phase 1: Chart of Accounts
1907        let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909        // Phase 2: Master Data
1910        self.phase_master_data(&mut stats)?;
1911
1912        // Emit master data to stream sink
1913        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917        // Phase 3: Document Flows + Subledger Linking
1918        let (mut document_flows, mut subledger, fa_journal_entries) =
1919            self.phase_document_flows(&mut stats)?;
1920
1921        // Emit document flows to stream sink
1922        self.emit_phase_items(
1923            "document_flows",
1924            "PurchaseOrder",
1925            &document_flows.purchase_orders,
1926        );
1927        self.emit_phase_items(
1928            "document_flows",
1929            "GoodsReceipt",
1930            &document_flows.goods_receipts,
1931        );
1932        self.emit_phase_items(
1933            "document_flows",
1934            "VendorInvoice",
1935            &document_flows.vendor_invoices,
1936        );
1937        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940        // Phase 3b: Opening Balances (before JE generation)
1941        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943        // Phase 3c: Convert opening balances to journal entries and prepend them.
1944        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1945        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1946        // balance map type.
1947        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948            .iter()
1949            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950            .collect();
1951        if !opening_balance_jes.is_empty() {
1952            debug!(
1953                "Prepending {} opening balance JEs to entries",
1954                opening_balance_jes.len()
1955            );
1956        }
1957
1958        // Phase 4: Journal Entries
1959        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1962        // starts from the correct initial state.
1963        if !opening_balance_jes.is_empty() {
1964            let mut combined = opening_balance_jes;
1965            combined.extend(entries);
1966            entries = combined;
1967        }
1968
1969        // Phase 4c: Append FA acquisition journal entries to main entries
1970        if !fa_journal_entries.is_empty() {
1971            debug!(
1972                "Appending {} FA acquisition JEs to main entries",
1973                fa_journal_entries.len()
1974            );
1975            entries.extend(fa_journal_entries);
1976        }
1977
1978        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1979        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981        // Get current degradation actions for optional phases
1982        let actions = self.get_degradation_actions();
1983
1984        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1985        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1988        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1989        if !sourcing.contracts.is_empty() {
1990            let mut linked_count = 0usize;
1991            // Collect (vendor_id, po_id) pairs from P2P chains
1992            let po_vendor_pairs: Vec<(String, String)> = document_flows
1993                .p2p_chains
1994                .iter()
1995                .map(|chain| {
1996                    (
1997                        chain.purchase_order.vendor_id.clone(),
1998                        chain.purchase_order.header.document_id.clone(),
1999                    )
2000                })
2001                .collect();
2002
2003            for chain in &mut document_flows.p2p_chains {
2004                if chain.purchase_order.contract_id.is_none() {
2005                    if let Some(contract) = sourcing
2006                        .contracts
2007                        .iter()
2008                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009                    {
2010                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011                        linked_count += 1;
2012                    }
2013                }
2014            }
2015
2016            // Populate reverse FK: purchase_order_ids on each contract
2017            for contract in &mut sourcing.contracts {
2018                let po_ids: Vec<String> = po_vendor_pairs
2019                    .iter()
2020                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021                    .map(|(_, po_id)| po_id.clone())
2022                    .collect();
2023                if !po_ids.is_empty() {
2024                    contract.purchase_order_ids = po_ids;
2025                }
2026            }
2027
2028            if linked_count > 0 {
2029                debug!(
2030                    "Linked {} purchase orders to S2C contracts by vendor match",
2031                    linked_count
2032                );
2033            }
2034        }
2035
2036        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2037        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039        // Phase 5c: Append IC journal entries to main entries
2040        if !intercompany.seller_journal_entries.is_empty()
2041            || !intercompany.buyer_journal_entries.is_empty()
2042        {
2043            let ic_je_count = intercompany.seller_journal_entries.len()
2044                + intercompany.buyer_journal_entries.len();
2045            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047            debug!(
2048                "Appended {} IC journal entries to main entries",
2049                ic_je_count
2050            );
2051        }
2052
2053        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2054        if !intercompany.elimination_entries.is_empty() {
2055            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056                &intercompany.elimination_entries,
2057            );
2058            if !elim_jes.is_empty() {
2059                debug!(
2060                    "Appended {} elimination journal entries to main entries",
2061                    elim_jes.len()
2062                );
2063                // IC elimination net-zero assertion (v2.5 hardening)
2064                let elim_debit: rust_decimal::Decimal =
2065                    elim_jes.iter().map(|je| je.total_debit()).sum();
2066                let elim_credit: rust_decimal::Decimal =
2067                    elim_jes.iter().map(|je| je.total_credit()).sum();
2068                let elim_diff = (elim_debit - elim_credit).abs();
2069                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2070                if elim_diff > tolerance {
2071                    return Err(datasynth_core::error::SynthError::generation(format!(
2072                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2073                        elim_debit, elim_credit, elim_diff, tolerance
2074                    )));
2075                }
2076                debug!(
2077                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2078                    elim_debit, elim_credit, elim_diff
2079                );
2080                entries.extend(elim_jes);
2081            }
2082        }
2083
2084        // Phase 5e: Wire IC source documents into document flow snapshot
2085        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2086            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2087                document_flows
2088                    .customer_invoices
2089                    .extend(ic_docs.seller_invoices.iter().cloned());
2090                document_flows
2091                    .purchase_orders
2092                    .extend(ic_docs.buyer_orders.iter().cloned());
2093                document_flows
2094                    .goods_receipts
2095                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2096                document_flows
2097                    .vendor_invoices
2098                    .extend(ic_docs.buyer_invoices.iter().cloned());
2099                debug!(
2100                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2101                    ic_docs.seller_invoices.len(),
2102                    ic_docs.buyer_orders.len(),
2103                    ic_docs.buyer_goods_receipts.len(),
2104                    ic_docs.buyer_invoices.len(),
2105                );
2106            }
2107        }
2108
2109        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2110        let hr = self.phase_hr_data(&mut stats)?;
2111
2112        // Phase 6b: Generate JEs from payroll runs
2113        if !hr.payroll_runs.is_empty() {
2114            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2115            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2116            entries.extend(payroll_jes);
2117        }
2118
2119        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2120        if !hr.pension_journal_entries.is_empty() {
2121            debug!(
2122                "Generated {} JEs from pension plans",
2123                hr.pension_journal_entries.len()
2124            );
2125            entries.extend(hr.pension_journal_entries.iter().cloned());
2126        }
2127
2128        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2129        if !hr.stock_comp_journal_entries.is_empty() {
2130            debug!(
2131                "Generated {} JEs from stock-based compensation",
2132                hr.stock_comp_journal_entries.len()
2133            );
2134            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2135        }
2136
2137        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2138        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2139
2140        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2141        if !manufacturing_snap.production_orders.is_empty() {
2142            let currency = self
2143                .config
2144                .companies
2145                .first()
2146                .map(|c| c.currency.as_str())
2147                .unwrap_or("USD");
2148            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2149                &manufacturing_snap.production_orders,
2150                &manufacturing_snap.quality_inspections,
2151                currency,
2152            );
2153            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2154            entries.extend(mfg_jes);
2155        }
2156
2157        // Phase 7a-warranty: Generate warranty provisions per company
2158        if !manufacturing_snap.quality_inspections.is_empty() {
2159            let framework = match self.config.accounting_standards.framework {
2160                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2161                _ => "US_GAAP",
2162            };
2163            for company in &self.config.companies {
2164                let company_orders: Vec<_> = manufacturing_snap
2165                    .production_orders
2166                    .iter()
2167                    .filter(|o| o.company_code == company.code)
2168                    .cloned()
2169                    .collect();
2170                let company_inspections: Vec<_> = manufacturing_snap
2171                    .quality_inspections
2172                    .iter()
2173                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2174                    .cloned()
2175                    .collect();
2176                if company_inspections.is_empty() {
2177                    continue;
2178                }
2179                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2180                let warranty_result = warranty_gen.generate(
2181                    &company.code,
2182                    &company_orders,
2183                    &company_inspections,
2184                    &company.currency,
2185                    framework,
2186                );
2187                if !warranty_result.journal_entries.is_empty() {
2188                    debug!(
2189                        "Generated {} warranty provision JEs for {}",
2190                        warranty_result.journal_entries.len(),
2191                        company.code
2192                    );
2193                    entries.extend(warranty_result.journal_entries);
2194                }
2195            }
2196        }
2197
2198        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2199        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2200        {
2201            let cogs_currency = self
2202                .config
2203                .companies
2204                .first()
2205                .map(|c| c.currency.as_str())
2206                .unwrap_or("USD");
2207            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2208                &document_flows.deliveries,
2209                &manufacturing_snap.production_orders,
2210                cogs_currency,
2211            );
2212            if !cogs_jes.is_empty() {
2213                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2214                entries.extend(cogs_jes);
2215            }
2216        }
2217
2218        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2219        //
2220        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2221        // subledger inventory positions.  Here we reconcile them so that position balances
2222        // reflect the actual stock movements within the generation period.
2223        if !manufacturing_snap.inventory_movements.is_empty()
2224            && !subledger.inventory_positions.is_empty()
2225        {
2226            use datasynth_core::models::MovementType as MfgMovementType;
2227            let mut receipt_count = 0usize;
2228            let mut issue_count = 0usize;
2229            for movement in &manufacturing_snap.inventory_movements {
2230                // Find a matching position by material code and company
2231                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2232                    p.material_id == movement.material_code
2233                        && p.company_code == movement.entity_code
2234                }) {
2235                    match movement.movement_type {
2236                        MfgMovementType::GoodsReceipt => {
2237                            // Increase stock and update weighted-average cost
2238                            pos.add_quantity(
2239                                movement.quantity,
2240                                movement.value,
2241                                movement.movement_date,
2242                            );
2243                            receipt_count += 1;
2244                        }
2245                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2246                            // Decrease stock (best-effort; silently skip if insufficient)
2247                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2248                            issue_count += 1;
2249                        }
2250                        _ => {}
2251                    }
2252                }
2253            }
2254            debug!(
2255                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2256                manufacturing_snap.inventory_movements.len(),
2257                receipt_count,
2258                issue_count,
2259            );
2260        }
2261
2262        // Update final entry/line-item stats after all JE-generating phases
2263        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2264        if !entries.is_empty() {
2265            stats.total_entries = entries.len() as u64;
2266            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2267            debug!(
2268                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2269                stats.total_entries, stats.total_line_items
2270            );
2271        }
2272
2273        // Phase 7b: Apply internal controls to journal entries
2274        if self.config.internal_controls.enabled && !entries.is_empty() {
2275            info!("Phase 7b: Applying internal controls to journal entries");
2276            let control_config = ControlGeneratorConfig {
2277                exception_rate: self.config.internal_controls.exception_rate,
2278                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2279                enable_sox_marking: true,
2280                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2281                    self.config.internal_controls.sox_materiality_threshold,
2282                )
2283                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2284                ..Default::default()
2285            };
2286            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2287            for entry in &mut entries {
2288                control_gen.apply_controls(entry, &coa);
2289            }
2290            let with_controls = entries
2291                .iter()
2292                .filter(|e| !e.header.control_ids.is_empty())
2293                .count();
2294            info!(
2295                "Applied controls to {} entries ({} with control IDs assigned)",
2296                entries.len(),
2297                with_controls
2298            );
2299        }
2300
2301        // Phase 7c: Extract SoD violations from annotated journal entries.
2302        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2303        // Here we materialise those flags into standalone SodViolation records.
2304        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2305            .iter()
2306            .filter(|e| e.header.sod_violation)
2307            .filter_map(|e| {
2308                e.header.sod_conflict_type.map(|ct| {
2309                    use datasynth_core::models::{RiskLevel, SodViolation};
2310                    let severity = match ct {
2311                        datasynth_core::models::SodConflictType::PaymentReleaser
2312                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2313                            RiskLevel::Critical
2314                        }
2315                        datasynth_core::models::SodConflictType::PreparerApprover
2316                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2317                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2318                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2319                            RiskLevel::High
2320                        }
2321                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2322                            RiskLevel::Medium
2323                        }
2324                    };
2325                    let action = format!(
2326                        "SoD conflict {:?} on entry {} ({})",
2327                        ct, e.header.document_id, e.header.company_code
2328                    );
2329                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2330                })
2331            })
2332            .collect();
2333        if !sod_violations.is_empty() {
2334            info!(
2335                "Phase 7c: Extracted {} SoD violations from {} entries",
2336                sod_violations.len(),
2337                entries.len()
2338            );
2339        }
2340
2341        // Emit journal entries to stream sink (after all JE-generating phases)
2342        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2343
2344        // Phase 8: Anomaly Injection (after all JE-generating phases)
2345        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2346
2347        // Emit anomaly labels to stream sink
2348        self.emit_phase_items(
2349            "anomaly_injection",
2350            "LabeledAnomaly",
2351            &anomaly_labels.labels,
2352        );
2353
2354        // Propagate fraud labels from journal entries to source documents.
2355        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2356        // instead of tracing through document_references.json.
2357        {
2358            use std::collections::HashMap;
2359            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2360            //
2361            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2362            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2363            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2364            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2365            // we register BOTH the prefixed form (raw reference) AND the bare form
2366            // (post-colon portion) in the map. Also register the JE's document_id
2367            // UUID so documents that set `journal_entry_id` match via that path.
2368            //
2369            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2370            // looked up "foo", silently producing 0 propagations.
2371            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2372            for je in &entries {
2373                if je.header.is_fraud {
2374                    if let Some(ref fraud_type) = je.header.fraud_type {
2375                        if let Some(ref reference) = je.header.reference {
2376                            // Register the full reference ("GR:PO-2024-000001")
2377                            fraud_map.insert(reference.clone(), *fraud_type);
2378                            // Also register the bare document ID ("PO-2024-000001")
2379                            // by stripping the "PREFIX:" if present.
2380                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2381                                if !bare.is_empty() {
2382                                    fraud_map.insert(bare.to_string(), *fraud_type);
2383                                }
2384                            }
2385                        }
2386                        // Also tag via journal_entry_id on document headers
2387                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2388                    }
2389                }
2390            }
2391            if !fraud_map.is_empty() {
2392                let mut propagated = 0usize;
2393                // Use DocumentHeader::propagate_fraud method for each doc type
2394                macro_rules! propagate_to {
2395                    ($collection:expr) => {
2396                        for doc in &mut $collection {
2397                            if doc.header.propagate_fraud(&fraud_map) {
2398                                propagated += 1;
2399                            }
2400                        }
2401                    };
2402                }
2403                propagate_to!(document_flows.purchase_orders);
2404                propagate_to!(document_flows.goods_receipts);
2405                propagate_to!(document_flows.vendor_invoices);
2406                propagate_to!(document_flows.payments);
2407                propagate_to!(document_flows.sales_orders);
2408                propagate_to!(document_flows.deliveries);
2409                propagate_to!(document_flows.customer_invoices);
2410                if propagated > 0 {
2411                    info!(
2412                        "Propagated fraud labels to {} document flow records",
2413                        propagated
2414                    );
2415                }
2416            }
2417        }
2418
2419        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2420        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2421
2422        // Emit red flags to stream sink
2423        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2424
2425        // Phase 26b: Collusion Ring Generation (after red flags)
2426        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2427
2428        // Emit collusion rings to stream sink
2429        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2430
2431        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2432        let balance_validation = self.phase_balance_validation(&entries)?;
2433
2434        // Phase 9b: GL-to-Subledger Reconciliation
2435        let subledger_reconciliation =
2436            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2437
2438        // Phase 10: Data Quality Injection
2439        let (data_quality_stats, quality_issues) =
2440            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2441
2442        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2443        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2444
2445        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2446        {
2447            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2448
2449            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2450            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2451            let mut unbalanced_clean = 0usize;
2452            for je in &entries {
2453                if je.header.is_fraud || je.header.is_anomaly {
2454                    continue;
2455                }
2456                let diff = (je.total_debit() - je.total_credit()).abs();
2457                if diff > tolerance {
2458                    unbalanced_clean += 1;
2459                    if unbalanced_clean <= 3 {
2460                        warn!(
2461                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2462                            je.header.document_id,
2463                            je.total_debit(),
2464                            je.total_credit(),
2465                            diff
2466                        );
2467                    }
2468                }
2469            }
2470            if unbalanced_clean > 0 {
2471                return Err(datasynth_core::error::SynthError::generation(format!(
2472                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2473                     First few logged above. Tolerance={}",
2474                    unbalanced_clean, tolerance
2475                )));
2476            }
2477            debug!(
2478                "Phase 10c: All {} non-anomaly JEs individually balanced",
2479                entries
2480                    .iter()
2481                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2482                    .count()
2483            );
2484
2485            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2486            let company_codes: Vec<String> = self
2487                .config
2488                .companies
2489                .iter()
2490                .map(|c| c.code.clone())
2491                .collect();
2492            for company_code in &company_codes {
2493                let mut assets = rust_decimal::Decimal::ZERO;
2494                let mut liab_equity = rust_decimal::Decimal::ZERO;
2495
2496                for entry in &entries {
2497                    if entry.header.company_code != *company_code {
2498                        continue;
2499                    }
2500                    for line in &entry.lines {
2501                        let acct = &line.gl_account;
2502                        let net = line.debit_amount - line.credit_amount;
2503                        // Asset accounts (1xxx): normal debit balance
2504                        if acct.starts_with('1') {
2505                            assets += net;
2506                        }
2507                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2508                        else if acct.starts_with('2') || acct.starts_with('3') {
2509                            liab_equity -= net; // credit-normal, so negate debit-net
2510                        }
2511                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2512                        // so they net to zero after closing entries
2513                    }
2514                }
2515
2516                let bs_diff = (assets - liab_equity).abs();
2517                if bs_diff > tolerance {
2518                    warn!(
2519                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2520                         revenue/expense closing entries may not fully offset",
2521                        company_code, assets, liab_equity, bs_diff
2522                    );
2523                    // Warn rather than error: multi-period datasets may have timing
2524                    // differences from accruals/deferrals that resolve in later periods.
2525                    // The TB footing check (Assert 1) is the hard gate.
2526                } else {
2527                    debug!(
2528                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2529                        company_code, assets, liab_equity, bs_diff
2530                    );
2531                }
2532            }
2533
2534            info!("Phase 10c: All generation-time accounting assertions passed");
2535        }
2536
2537        // Phase 11: Audit Data
2538        let audit = self.phase_audit_data(&entries, &mut stats)?;
2539
2540        // Phase 12: Banking KYC/AML Data
2541        let mut banking = self.phase_banking_data(&mut stats)?;
2542
2543        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2544        // Creates coherence between the accounting layer (payments, JEs) and the
2545        // banking layer (bank transactions). A vendor invoice payment now appears
2546        // on both sides with cross-references and fraud labels propagated.
2547        if self.phase_config.generate_banking
2548            && !document_flows.payments.is_empty()
2549            && !banking.accounts.is_empty()
2550        {
2551            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2552            if bridge_rate > 0.0 {
2553                let mut bridge =
2554                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2555                        self.seed,
2556                    );
2557                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2558                    &document_flows.payments,
2559                    &banking.customers,
2560                    &banking.accounts,
2561                    bridge_rate,
2562                );
2563                info!(
2564                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2565                    bridge_stats.bridged_count,
2566                    bridge_stats.transactions_emitted,
2567                    bridge_stats.fraud_propagated,
2568                );
2569                let bridged_count = bridged_txns.len();
2570                banking.transactions.extend(bridged_txns);
2571
2572                // Re-run velocity computation so bridged txns also get features
2573                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2574                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2575                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2576                        &mut banking.transactions,
2577                    );
2578                }
2579
2580                // Recompute suspicious count after bridging
2581                banking.suspicious_count = banking
2582                    .transactions
2583                    .iter()
2584                    .filter(|t| t.is_suspicious)
2585                    .count();
2586                stats.banking_transaction_count = banking.transactions.len();
2587                stats.banking_suspicious_count = banking.suspicious_count;
2588            }
2589        }
2590
2591        // Phase 13: Graph Export
2592        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2593
2594        // Phase 14: LLM Enrichment
2595        self.phase_llm_enrichment(&mut stats);
2596
2597        // Phase 15: Diffusion Enhancement
2598        self.phase_diffusion_enhancement(&mut stats);
2599
2600        // Phase 16: Causal Overlay
2601        self.phase_causal_overlay(&mut stats);
2602
2603        // Phase 17: Bank Reconciliation + Financial Statements
2604        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2605        // provision data (from accounting_standards / tax snapshots) can be wired in.
2606        let mut financial_reporting = self.phase_financial_reporting(
2607            &document_flows,
2608            &entries,
2609            &coa,
2610            &hr,
2611            &audit,
2612            &mut stats,
2613        )?;
2614
2615        // BS coherence check: assets = liabilities + equity
2616        {
2617            use datasynth_core::models::StatementType;
2618            for stmt in &financial_reporting.consolidated_statements {
2619                if stmt.statement_type == StatementType::BalanceSheet {
2620                    let total_assets: rust_decimal::Decimal = stmt
2621                        .line_items
2622                        .iter()
2623                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2624                        .map(|li| li.amount)
2625                        .sum();
2626                    let total_le: rust_decimal::Decimal = stmt
2627                        .line_items
2628                        .iter()
2629                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2630                        .map(|li| li.amount)
2631                        .sum();
2632                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2633                        warn!(
2634                            "BS equation imbalance: assets={}, L+E={}",
2635                            total_assets, total_le
2636                        );
2637                    }
2638                }
2639            }
2640        }
2641
2642        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2643        let accounting_standards =
2644            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2645
2646        // Phase 18a: Merge ECL journal entries into main GL
2647        if !accounting_standards.ecl_journal_entries.is_empty() {
2648            debug!(
2649                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2650                accounting_standards.ecl_journal_entries.len()
2651            );
2652            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2653        }
2654
2655        // Phase 18a: Merge provision journal entries into main GL
2656        if !accounting_standards.provision_journal_entries.is_empty() {
2657            debug!(
2658                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2659                accounting_standards.provision_journal_entries.len()
2660            );
2661            entries.extend(
2662                accounting_standards
2663                    .provision_journal_entries
2664                    .iter()
2665                    .cloned(),
2666            );
2667        }
2668
2669        // Phase 18b: OCPM Events (after all process data is available)
2670        let ocpm = self.phase_ocpm_events(
2671            &document_flows,
2672            &sourcing,
2673            &hr,
2674            &manufacturing_snap,
2675            &banking,
2676            &audit,
2677            &financial_reporting,
2678            &mut stats,
2679        )?;
2680
2681        // Emit OCPM events to stream sink
2682        if let Some(ref event_log) = ocpm.event_log {
2683            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2684        }
2685
2686        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
2687        if let Some(ref event_log) = ocpm.event_log {
2688            // Build reverse index: document_ref → (event_id, case_id, object_ids)
2689            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2690                std::collections::HashMap::new();
2691            for (idx, event) in event_log.events.iter().enumerate() {
2692                if let Some(ref doc_ref) = event.document_ref {
2693                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2694                }
2695            }
2696
2697            if !doc_index.is_empty() {
2698                let mut annotated = 0usize;
2699                for entry in &mut entries {
2700                    let doc_id_str = entry.header.document_id.to_string();
2701                    // Collect matching event indices from document_id and reference
2702                    let mut matched_indices: Vec<usize> = Vec::new();
2703                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2704                        matched_indices.extend(indices);
2705                    }
2706                    if let Some(ref reference) = entry.header.reference {
2707                        let bare_ref = reference
2708                            .find(':')
2709                            .map(|i| &reference[i + 1..])
2710                            .unwrap_or(reference.as_str());
2711                        if let Some(indices) = doc_index.get(bare_ref) {
2712                            for &idx in indices {
2713                                if !matched_indices.contains(&idx) {
2714                                    matched_indices.push(idx);
2715                                }
2716                            }
2717                        }
2718                    }
2719                    // Apply matches to JE header
2720                    if !matched_indices.is_empty() {
2721                        for &idx in &matched_indices {
2722                            let event = &event_log.events[idx];
2723                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2724                                entry.header.ocpm_event_ids.push(event.event_id);
2725                            }
2726                            for obj_ref in &event.object_refs {
2727                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2728                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
2729                                }
2730                            }
2731                            if entry.header.ocpm_case_id.is_none() {
2732                                entry.header.ocpm_case_id = event.case_id;
2733                            }
2734                        }
2735                        annotated += 1;
2736                    }
2737                }
2738                debug!(
2739                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2740                    annotated
2741                );
2742            }
2743        }
2744
2745        // Phase 19: Sales Quotes, Management KPIs, Budgets
2746        let sales_kpi_budgets =
2747            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2748
2749        // Phase 22: Treasury Data Generation
2750        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2751        // are included in the pre-tax income used by phase_tax_generation.
2752        let treasury =
2753            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2754
2755        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2756        if !treasury.journal_entries.is_empty() {
2757            debug!(
2758                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2759                treasury.journal_entries.len()
2760            );
2761            entries.extend(treasury.journal_entries.iter().cloned());
2762        }
2763
2764        // Phase 20: Tax Generation
2765        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2766
2767        // Phase 20 JEs: Merge tax posting journal entries into main GL
2768        if !tax.tax_posting_journal_entries.is_empty() {
2769            debug!(
2770                "Merging {} tax posting JEs into GL",
2771                tax.tax_posting_journal_entries.len()
2772            );
2773            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2774        }
2775
2776        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2777        // Build supplementary cash flow items from upstream JE data (depreciation,
2778        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2779        {
2780            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2781
2782            let framework_str = {
2783                use datasynth_config::schema::AccountingFrameworkConfig;
2784                match self
2785                    .config
2786                    .accounting_standards
2787                    .framework
2788                    .unwrap_or_default()
2789                {
2790                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2791                        "IFRS"
2792                    }
2793                    _ => "US_GAAP",
2794                }
2795            };
2796
2797            // Sum depreciation debits (account 6000) from close JEs
2798            let depreciation_total: rust_decimal::Decimal = entries
2799                .iter()
2800                .filter(|je| je.header.document_type == "CL")
2801                .flat_map(|je| je.lines.iter())
2802                .filter(|l| l.gl_account.starts_with("6000"))
2803                .map(|l| l.debit_amount)
2804                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2805
2806            // Sum interest expense debits (account 7100)
2807            let interest_paid: rust_decimal::Decimal = entries
2808                .iter()
2809                .flat_map(|je| je.lines.iter())
2810                .filter(|l| l.gl_account.starts_with("7100"))
2811                .map(|l| l.debit_amount)
2812                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2813
2814            // Sum tax expense debits (account 8000)
2815            let tax_paid: rust_decimal::Decimal = entries
2816                .iter()
2817                .flat_map(|je| je.lines.iter())
2818                .filter(|l| l.gl_account.starts_with("8000"))
2819                .map(|l| l.debit_amount)
2820                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2821
2822            // Sum capex debits on fixed assets (account 1500)
2823            let capex: rust_decimal::Decimal = entries
2824                .iter()
2825                .flat_map(|je| je.lines.iter())
2826                .filter(|l| l.gl_account.starts_with("1500"))
2827                .map(|l| l.debit_amount)
2828                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2829
2830            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2831            let dividends_paid: rust_decimal::Decimal = entries
2832                .iter()
2833                .flat_map(|je| je.lines.iter())
2834                .filter(|l| l.gl_account == "2170")
2835                .map(|l| l.debit_amount)
2836                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2837
2838            let cf_data = CashFlowSourceData {
2839                depreciation_total,
2840                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2841                delta_ar: rust_decimal::Decimal::ZERO,
2842                delta_ap: rust_decimal::Decimal::ZERO,
2843                delta_inventory: rust_decimal::Decimal::ZERO,
2844                capex,
2845                debt_issuance: rust_decimal::Decimal::ZERO,
2846                debt_repayment: rust_decimal::Decimal::ZERO,
2847                interest_paid,
2848                tax_paid,
2849                dividends_paid,
2850                framework: framework_str.to_string(),
2851            };
2852
2853            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2854            if !enhanced_cf_items.is_empty() {
2855                // Merge into ALL cash flow statements (standalone + consolidated)
2856                use datasynth_core::models::StatementType;
2857                let merge_count = enhanced_cf_items.len();
2858                for stmt in financial_reporting
2859                    .financial_statements
2860                    .iter_mut()
2861                    .chain(financial_reporting.consolidated_statements.iter_mut())
2862                    .chain(
2863                        financial_reporting
2864                            .standalone_statements
2865                            .values_mut()
2866                            .flat_map(|v| v.iter_mut()),
2867                    )
2868                {
2869                    if stmt.statement_type == StatementType::CashFlowStatement {
2870                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2871                    }
2872                }
2873                info!(
2874                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2875                    merge_count
2876                );
2877            }
2878        }
2879
2880        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2881        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2882        self.generate_notes_to_financial_statements(
2883            &mut financial_reporting,
2884            &accounting_standards,
2885            &tax,
2886            &hr,
2887            &audit,
2888            &treasury,
2889        );
2890
2891        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2892        // When we have 2+ companies, derive segment data from actual journal entries
2893        // to complement or replace the FS-generator-based segments.
2894        if self.config.companies.len() >= 2 && !entries.is_empty() {
2895            let companies: Vec<(String, String)> = self
2896                .config
2897                .companies
2898                .iter()
2899                .map(|c| (c.code.clone(), c.name.clone()))
2900                .collect();
2901            let ic_elim: rust_decimal::Decimal =
2902                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2903            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2904                .unwrap_or(NaiveDate::MIN);
2905            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2906            let period_label = format!(
2907                "{}-{:02}",
2908                end_date.year(),
2909                (end_date - chrono::Days::new(1)).month()
2910            );
2911
2912            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2913            let (je_segments, je_recon) =
2914                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2915            if !je_segments.is_empty() {
2916                info!(
2917                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2918                    je_segments.len(),
2919                    ic_elim,
2920                );
2921                // Replace if existing segment_reports were empty; otherwise supplement
2922                if financial_reporting.segment_reports.is_empty() {
2923                    financial_reporting.segment_reports = je_segments;
2924                    financial_reporting.segment_reconciliations = vec![je_recon];
2925                } else {
2926                    financial_reporting.segment_reports.extend(je_segments);
2927                    financial_reporting.segment_reconciliations.push(je_recon);
2928                }
2929            }
2930        }
2931
2932        // Phase 21: ESG Data Generation
2933        let esg_snap =
2934            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2935
2936        // Phase 23: Project Accounting Data Generation
2937        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2938
2939        // Phase 24: Process Evolution + Organizational Events
2940        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2941
2942        // Phase 24b: Disruption Events
2943        let disruption_events = self.phase_disruption_events(&mut stats)?;
2944
2945        // Phase 27: Bi-Temporal Vendor Version Chains
2946        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2947
2948        // Phase 28: Entity Relationship Graph + Cross-Process Links
2949        let (entity_relationship_graph, cross_process_links) =
2950            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2951
2952        // Phase 29: Industry-specific GL accounts
2953        let industry_output = self.phase_industry_data(&mut stats);
2954
2955        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2956        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2957
2958        // Phase: Neural enhancement (optional — requires neural feature + config)
2959        if self.config.diffusion.enabled
2960            && (self.config.diffusion.backend == "neural"
2961                || self.config.diffusion.backend == "hybrid")
2962        {
2963            debug!(
2964                "Neural enhancement requested (backend={}). \
2965                 Train from generated data or load pre-trained model via config.",
2966                self.config.diffusion.backend
2967            );
2968            // Neural enhancement integrates via the DiffusionBackend trait:
2969            // 1. NeuralDiffusionTrainer::train() on generated amounts
2970            // 2. HybridGenerator blends rule-based + neural at configured weight
2971            // 3. TabularTransformer for conditional column prediction
2972            // 4. GnnGraphTrainer for entity relationship structure
2973            // Actual training requires the `neural` cargo feature on datasynth-core.
2974            // The orchestrator delegates to the diffusion module which is feature-gated.
2975            // Stats tracking handled by individual neural modules when invoked
2976        }
2977
2978        // Phase 19b: Hypergraph Export (after all data is available)
2979        self.phase_hypergraph_export(
2980            &coa,
2981            &entries,
2982            &document_flows,
2983            &sourcing,
2984            &hr,
2985            &manufacturing_snap,
2986            &banking,
2987            &audit,
2988            &financial_reporting,
2989            &ocpm,
2990            &compliance_regulations,
2991            &mut stats,
2992        )?;
2993
2994        // Phase 10c: Additional graph builders (approval, entity, banking)
2995        // These run after all data is available since they need banking/IC data.
2996        if self.phase_config.generate_graph_export {
2997            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2998        }
2999
3000        // Log informational messages for config sections not yet fully wired
3001        if self.config.streaming.enabled {
3002            info!("Note: streaming config is enabled but batch mode does not use it");
3003        }
3004        if self.config.vendor_network.enabled {
3005            debug!("Vendor network config available; relationship graph generation is partial");
3006        }
3007        if self.config.customer_segmentation.enabled {
3008            debug!("Customer segmentation config available; segment-aware generation is partial");
3009        }
3010
3011        // Log final resource statistics
3012        let resource_stats = self.resource_guard.stats();
3013        info!(
3014            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3015            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3016            resource_stats.disk.estimated_bytes_written,
3017            resource_stats.degradation_level
3018        );
3019
3020        // Flush any remaining stream sink data
3021        if let Some(ref sink) = self.phase_sink {
3022            if let Err(e) = sink.flush() {
3023                warn!("Stream sink flush failed: {e}");
3024            }
3025        }
3026
3027        // Build data lineage graph
3028        let lineage = self.build_lineage_graph();
3029
3030        // Evaluate quality gates if enabled in config
3031        let gate_result = if self.config.quality_gates.enabled {
3032            let profile_name = &self.config.quality_gates.profile;
3033            match datasynth_eval::gates::get_profile(profile_name) {
3034                Some(profile) => {
3035                    // Build an evaluation populated with actual generation metrics.
3036                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3037
3038                    // Populate balance sheet evaluation from balance validation results
3039                    if balance_validation.validated {
3040                        eval.coherence.balance =
3041                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3042                                equation_balanced: balance_validation.is_balanced,
3043                                max_imbalance: (balance_validation.total_debits
3044                                    - balance_validation.total_credits)
3045                                    .abs(),
3046                                periods_evaluated: 1,
3047                                periods_imbalanced: if balance_validation.is_balanced {
3048                                    0
3049                                } else {
3050                                    1
3051                                },
3052                                period_results: Vec::new(),
3053                                companies_evaluated: self.config.companies.len(),
3054                            });
3055                    }
3056
3057                    // Set coherence passes based on balance validation
3058                    eval.coherence.passes = balance_validation.is_balanced;
3059                    if !balance_validation.is_balanced {
3060                        eval.coherence
3061                            .failures
3062                            .push("Balance sheet equation not satisfied".to_string());
3063                    }
3064
3065                    // Set statistical score based on entry count (basic sanity)
3066                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3067                    eval.statistical.passes = !entries.is_empty();
3068
3069                    // Set quality score from data quality stats
3070                    eval.quality.overall_score = 0.9; // Default high for generated data
3071                    eval.quality.passes = true;
3072
3073                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3074                    info!(
3075                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3076                        profile_name, result.gates_passed, result.gates_total, result.summary
3077                    );
3078                    Some(result)
3079                }
3080                None => {
3081                    warn!(
3082                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3083                        profile_name
3084                    );
3085                    None
3086                }
3087            }
3088        } else {
3089            None
3090        };
3091
3092        // Generate internal controls if enabled
3093        let internal_controls = if self.config.internal_controls.enabled {
3094            InternalControl::standard_controls()
3095        } else {
3096            Vec::new()
3097        };
3098
3099        Ok(EnhancedGenerationResult {
3100            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3101            master_data: std::mem::take(&mut self.master_data),
3102            document_flows,
3103            subledger,
3104            ocpm,
3105            audit,
3106            banking,
3107            graph_export,
3108            sourcing,
3109            financial_reporting,
3110            hr,
3111            accounting_standards,
3112            manufacturing: manufacturing_snap,
3113            sales_kpi_budgets,
3114            tax,
3115            esg: esg_snap,
3116            treasury,
3117            project_accounting,
3118            process_evolution,
3119            organizational_events,
3120            disruption_events,
3121            intercompany,
3122            journal_entries: entries,
3123            anomaly_labels,
3124            balance_validation,
3125            data_quality_stats,
3126            quality_issues,
3127            statistics: stats,
3128            lineage: Some(lineage),
3129            gate_result,
3130            internal_controls,
3131            sod_violations,
3132            opening_balances,
3133            subledger_reconciliation,
3134            counterfactual_pairs,
3135            red_flags,
3136            collusion_rings,
3137            temporal_vendor_chains,
3138            entity_relationship_graph,
3139            cross_process_links,
3140            industry_output,
3141            compliance_regulations,
3142        })
3143    }
3144
3145    // ========================================================================
3146    // Generation Phase Methods
3147    // ========================================================================
3148
3149    /// Phase 1: Generate Chart of Accounts and update statistics.
3150    fn phase_chart_of_accounts(
3151        &mut self,
3152        stats: &mut EnhancedGenerationStatistics,
3153    ) -> SynthResult<Arc<ChartOfAccounts>> {
3154        info!("Phase 1: Generating Chart of Accounts");
3155        let coa = self.generate_coa()?;
3156        stats.accounts_count = coa.account_count();
3157        info!(
3158            "Chart of Accounts generated: {} accounts",
3159            stats.accounts_count
3160        );
3161        self.check_resources_with_log("post-coa")?;
3162        Ok(coa)
3163    }
3164
3165    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3166    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3167        if self.phase_config.generate_master_data {
3168            info!("Phase 2: Generating Master Data");
3169            self.generate_master_data()?;
3170            stats.vendor_count = self.master_data.vendors.len();
3171            stats.customer_count = self.master_data.customers.len();
3172            stats.material_count = self.master_data.materials.len();
3173            stats.asset_count = self.master_data.assets.len();
3174            stats.employee_count = self.master_data.employees.len();
3175            info!(
3176                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3177                stats.vendor_count, stats.customer_count, stats.material_count,
3178                stats.asset_count, stats.employee_count
3179            );
3180            self.check_resources_with_log("post-master-data")?;
3181        } else {
3182            debug!("Phase 2: Skipped (master data generation disabled)");
3183        }
3184        Ok(())
3185    }
3186
3187    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3188    fn phase_document_flows(
3189        &mut self,
3190        stats: &mut EnhancedGenerationStatistics,
3191    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3192        let mut document_flows = DocumentFlowSnapshot::default();
3193        let mut subledger = SubledgerSnapshot::default();
3194        // Dunning JEs (interest + charges) accumulated here and merged into the
3195        // main FA-JE list below so they appear in the GL.
3196        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3197
3198        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3199            info!("Phase 3: Generating Document Flows");
3200            self.generate_document_flows(&mut document_flows)?;
3201            stats.p2p_chain_count = document_flows.p2p_chains.len();
3202            stats.o2c_chain_count = document_flows.o2c_chains.len();
3203            info!(
3204                "Document flows generated: {} P2P chains, {} O2C chains",
3205                stats.p2p_chain_count, stats.o2c_chain_count
3206            );
3207
3208            // Phase 3b: Link document flows to subledgers (for data coherence)
3209            debug!("Phase 3b: Linking document flows to subledgers");
3210            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3211            stats.ap_invoice_count = subledger.ap_invoices.len();
3212            stats.ar_invoice_count = subledger.ar_invoices.len();
3213            debug!(
3214                "Subledgers linked: {} AP invoices, {} AR invoices",
3215                stats.ap_invoice_count, stats.ar_invoice_count
3216            );
3217
3218            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3219            // Without this step the subledger is systematically overstated because
3220            // amount_remaining is set at invoice creation and never reduced by
3221            // the payments that were generated in the document-flow phase.
3222            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3223            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3224            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3225            debug!("Payment settlements applied to AP and AR subledgers");
3226
3227            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3228            // The as-of date is the last day of the configured period.
3229            if let Ok(start_date) =
3230                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3231            {
3232                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3233                    - chrono::Days::new(1);
3234                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3235                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3236                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3237                // derived from JE-level aggregation and will typically differ. This is a known
3238                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3239                // generated independently. A future reconciliation phase should align them by
3240                // using subledger totals as the authoritative source for BS Receivables.
3241                for company in &self.config.companies {
3242                    let ar_report = ARAgingReport::from_invoices(
3243                        company.code.clone(),
3244                        &subledger.ar_invoices,
3245                        as_of_date,
3246                    );
3247                    subledger.ar_aging_reports.push(ar_report);
3248
3249                    let ap_report = APAgingReport::from_invoices(
3250                        company.code.clone(),
3251                        &subledger.ap_invoices,
3252                        as_of_date,
3253                    );
3254                    subledger.ap_aging_reports.push(ap_report);
3255                }
3256                debug!(
3257                    "AR/AP aging reports built: {} AR, {} AP",
3258                    subledger.ar_aging_reports.len(),
3259                    subledger.ap_aging_reports.len()
3260                );
3261
3262                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3263                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3264                {
3265                    use datasynth_generators::DunningGenerator;
3266                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3267                    for company in &self.config.companies {
3268                        let currency = company.currency.as_str();
3269                        // Collect mutable references to AR invoices for this company
3270                        // (dunning generator updates dunning_info on invoices in-place).
3271                        let mut company_invoices: Vec<
3272                            datasynth_core::models::subledger::ar::ARInvoice,
3273                        > = subledger
3274                            .ar_invoices
3275                            .iter()
3276                            .filter(|inv| inv.company_code == company.code)
3277                            .cloned()
3278                            .collect();
3279
3280                        if company_invoices.is_empty() {
3281                            continue;
3282                        }
3283
3284                        let result = dunning_gen.execute_dunning_run(
3285                            &company.code,
3286                            as_of_date,
3287                            &mut company_invoices,
3288                            currency,
3289                        );
3290
3291                        // Write back updated dunning info to the main AR invoice list
3292                        for updated in &company_invoices {
3293                            if let Some(orig) = subledger
3294                                .ar_invoices
3295                                .iter_mut()
3296                                .find(|i| i.invoice_number == updated.invoice_number)
3297                            {
3298                                orig.dunning_info = updated.dunning_info.clone();
3299                            }
3300                        }
3301
3302                        subledger.dunning_runs.push(result.dunning_run);
3303                        subledger.dunning_letters.extend(result.letters);
3304                        // Dunning JEs (interest + charges) collected into local buffer.
3305                        dunning_journal_entries.extend(result.journal_entries);
3306                    }
3307                    debug!(
3308                        "Dunning runs complete: {} runs, {} letters",
3309                        subledger.dunning_runs.len(),
3310                        subledger.dunning_letters.len()
3311                    );
3312                }
3313            }
3314
3315            self.check_resources_with_log("post-document-flows")?;
3316        } else {
3317            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3318        }
3319
3320        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3321        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3322        if !self.master_data.assets.is_empty() {
3323            debug!("Generating FA subledger records");
3324            let company_code = self
3325                .config
3326                .companies
3327                .first()
3328                .map(|c| c.code.as_str())
3329                .unwrap_or("1000");
3330            let currency = self
3331                .config
3332                .companies
3333                .first()
3334                .map(|c| c.currency.as_str())
3335                .unwrap_or("USD");
3336
3337            let mut fa_gen = datasynth_generators::FAGenerator::new(
3338                datasynth_generators::FAGeneratorConfig::default(),
3339                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3340            );
3341
3342            for asset in &self.master_data.assets {
3343                let (record, je) = fa_gen.generate_asset_acquisition(
3344                    company_code,
3345                    &format!("{:?}", asset.asset_class),
3346                    &asset.description,
3347                    asset.acquisition_date,
3348                    currency,
3349                    asset.cost_center.as_deref(),
3350                );
3351                subledger.fa_records.push(record);
3352                fa_journal_entries.push(je);
3353            }
3354
3355            stats.fa_subledger_count = subledger.fa_records.len();
3356            debug!(
3357                "FA subledger records generated: {} (with {} acquisition JEs)",
3358                stats.fa_subledger_count,
3359                fa_journal_entries.len()
3360            );
3361        }
3362
3363        // Generate Inventory subledger records from master data materials
3364        if !self.master_data.materials.is_empty() {
3365            debug!("Generating Inventory subledger records");
3366            let first_company = self.config.companies.first();
3367            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3368            let inv_currency = first_company
3369                .map(|c| c.currency.clone())
3370                .unwrap_or_else(|| "USD".to_string());
3371
3372            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3373                datasynth_generators::InventoryGeneratorConfig::default(),
3374                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3375                inv_currency.clone(),
3376            );
3377
3378            for (i, material) in self.master_data.materials.iter().enumerate() {
3379                let plant = format!("PLANT{:02}", (i % 3) + 1);
3380                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3381                let initial_qty = rust_decimal::Decimal::from(
3382                    material
3383                        .safety_stock
3384                        .to_string()
3385                        .parse::<i64>()
3386                        .unwrap_or(100),
3387                );
3388
3389                let position = inv_gen.generate_position(
3390                    company_code,
3391                    &plant,
3392                    &storage_loc,
3393                    &material.material_id,
3394                    &material.description,
3395                    initial_qty,
3396                    Some(material.standard_cost),
3397                    &inv_currency,
3398                );
3399                subledger.inventory_positions.push(position);
3400            }
3401
3402            stats.inventory_subledger_count = subledger.inventory_positions.len();
3403            debug!(
3404                "Inventory subledger records generated: {}",
3405                stats.inventory_subledger_count
3406            );
3407        }
3408
3409        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3410        if !subledger.fa_records.is_empty() {
3411            if let Ok(start_date) =
3412                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3413            {
3414                let company_code = self
3415                    .config
3416                    .companies
3417                    .first()
3418                    .map(|c| c.code.as_str())
3419                    .unwrap_or("1000");
3420                let fiscal_year = start_date.year();
3421                let start_period = start_date.month();
3422                let end_period =
3423                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3424
3425                let depr_cfg = FaDepreciationScheduleConfig {
3426                    fiscal_year,
3427                    start_period,
3428                    end_period,
3429                    seed_offset: 800,
3430                };
3431                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3432                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3433                let run_count = runs.len();
3434                subledger.depreciation_runs = runs;
3435                debug!(
3436                    "Depreciation runs generated: {} runs for {} periods",
3437                    run_count, self.config.global.period_months
3438                );
3439            }
3440        }
3441
3442        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3443        if !subledger.inventory_positions.is_empty() {
3444            if let Ok(start_date) =
3445                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3446            {
3447                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3448                    - chrono::Days::new(1);
3449
3450                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3451                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3452
3453                for company in &self.config.companies {
3454                    let result = inv_val_gen.generate(
3455                        &company.code,
3456                        &subledger.inventory_positions,
3457                        as_of_date,
3458                    );
3459                    subledger.inventory_valuations.push(result);
3460                }
3461                debug!(
3462                    "Inventory valuations generated: {} company reports",
3463                    subledger.inventory_valuations.len()
3464                );
3465            }
3466        }
3467
3468        Ok((document_flows, subledger, fa_journal_entries))
3469    }
3470
3471    /// Phase 3c: Generate OCPM events from document flows.
3472    #[allow(clippy::too_many_arguments)]
3473    fn phase_ocpm_events(
3474        &mut self,
3475        document_flows: &DocumentFlowSnapshot,
3476        sourcing: &SourcingSnapshot,
3477        hr: &HrSnapshot,
3478        manufacturing: &ManufacturingSnapshot,
3479        banking: &BankingSnapshot,
3480        audit: &AuditSnapshot,
3481        financial_reporting: &FinancialReportingSnapshot,
3482        stats: &mut EnhancedGenerationStatistics,
3483    ) -> SynthResult<OcpmSnapshot> {
3484        let degradation = self.check_resources()?;
3485        if degradation >= DegradationLevel::Reduced {
3486            debug!(
3487                "Phase skipped due to resource pressure (degradation: {:?})",
3488                degradation
3489            );
3490            return Ok(OcpmSnapshot::default());
3491        }
3492        if self.phase_config.generate_ocpm_events {
3493            info!("Phase 3c: Generating OCPM Events");
3494            let ocpm_snapshot = self.generate_ocpm_events(
3495                document_flows,
3496                sourcing,
3497                hr,
3498                manufacturing,
3499                banking,
3500                audit,
3501                financial_reporting,
3502            )?;
3503            stats.ocpm_event_count = ocpm_snapshot.event_count;
3504            stats.ocpm_object_count = ocpm_snapshot.object_count;
3505            stats.ocpm_case_count = ocpm_snapshot.case_count;
3506            info!(
3507                "OCPM events generated: {} events, {} objects, {} cases",
3508                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3509            );
3510            self.check_resources_with_log("post-ocpm")?;
3511            Ok(ocpm_snapshot)
3512        } else {
3513            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3514            Ok(OcpmSnapshot::default())
3515        }
3516    }
3517
3518    /// Phase 4: Generate journal entries from document flows and standalone generation.
3519    fn phase_journal_entries(
3520        &mut self,
3521        coa: &Arc<ChartOfAccounts>,
3522        document_flows: &DocumentFlowSnapshot,
3523        _stats: &mut EnhancedGenerationStatistics,
3524    ) -> SynthResult<Vec<JournalEntry>> {
3525        let mut entries = Vec::new();
3526
3527        // Phase 4a: Generate JEs from document flows (for data coherence)
3528        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3529            debug!("Phase 4a: Generating JEs from document flows");
3530            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3531            debug!("Generated {} JEs from document flows", flow_entries.len());
3532            entries.extend(flow_entries);
3533        }
3534
3535        // Phase 4b: Generate standalone journal entries
3536        if self.phase_config.generate_journal_entries {
3537            info!("Phase 4: Generating Journal Entries");
3538            let je_entries = self.generate_journal_entries(coa)?;
3539            info!("Generated {} standalone journal entries", je_entries.len());
3540            entries.extend(je_entries);
3541        } else {
3542            debug!("Phase 4: Skipped (journal entry generation disabled)");
3543        }
3544
3545        if !entries.is_empty() {
3546            // Note: stats.total_entries/total_line_items are set in generate()
3547            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3548            self.check_resources_with_log("post-journal-entries")?;
3549        }
3550
3551        Ok(entries)
3552    }
3553
3554    /// Phase 5: Inject anomalies into journal entries.
3555    fn phase_anomaly_injection(
3556        &mut self,
3557        entries: &mut [JournalEntry],
3558        actions: &DegradationActions,
3559        stats: &mut EnhancedGenerationStatistics,
3560    ) -> SynthResult<AnomalyLabels> {
3561        if self.phase_config.inject_anomalies
3562            && !entries.is_empty()
3563            && !actions.skip_anomaly_injection
3564        {
3565            info!("Phase 5: Injecting Anomalies");
3566            let result = self.inject_anomalies(entries)?;
3567            stats.anomalies_injected = result.labels.len();
3568            info!("Injected {} anomalies", stats.anomalies_injected);
3569            self.check_resources_with_log("post-anomaly-injection")?;
3570            Ok(result)
3571        } else if actions.skip_anomaly_injection {
3572            warn!("Phase 5: Skipped due to resource degradation");
3573            Ok(AnomalyLabels::default())
3574        } else {
3575            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3576            Ok(AnomalyLabels::default())
3577        }
3578    }
3579
3580    /// Phase 6: Validate balance sheet equation on journal entries.
3581    fn phase_balance_validation(
3582        &mut self,
3583        entries: &[JournalEntry],
3584    ) -> SynthResult<BalanceValidationResult> {
3585        if self.phase_config.validate_balances && !entries.is_empty() {
3586            debug!("Phase 6: Validating Balances");
3587            let balance_validation = self.validate_journal_entries(entries)?;
3588            if balance_validation.is_balanced {
3589                debug!("Balance validation passed");
3590            } else {
3591                warn!(
3592                    "Balance validation found {} errors",
3593                    balance_validation.validation_errors.len()
3594                );
3595            }
3596            Ok(balance_validation)
3597        } else {
3598            Ok(BalanceValidationResult::default())
3599        }
3600    }
3601
3602    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3603    fn phase_data_quality_injection(
3604        &mut self,
3605        entries: &mut [JournalEntry],
3606        actions: &DegradationActions,
3607        stats: &mut EnhancedGenerationStatistics,
3608    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3609        if self.phase_config.inject_data_quality
3610            && !entries.is_empty()
3611            && !actions.skip_data_quality
3612        {
3613            info!("Phase 7: Injecting Data Quality Variations");
3614            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3615            stats.data_quality_issues = dq_stats.records_with_issues;
3616            info!("Injected {} data quality issues", stats.data_quality_issues);
3617            self.check_resources_with_log("post-data-quality")?;
3618            Ok((dq_stats, quality_issues))
3619        } else if actions.skip_data_quality {
3620            warn!("Phase 7: Skipped due to resource degradation");
3621            Ok((DataQualityStats::default(), Vec::new()))
3622        } else {
3623            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3624            Ok((DataQualityStats::default(), Vec::new()))
3625        }
3626    }
3627
3628    /// Phase 10b: Generate period-close journal entries.
3629    ///
3630    /// Generates:
3631    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3632    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3633    ///    for the configured period.
3634    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3635    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3636    ///    earnings via the Income Summary (3600) clearing account.
3637    fn phase_period_close(
3638        &mut self,
3639        entries: &mut Vec<JournalEntry>,
3640        subledger: &SubledgerSnapshot,
3641        stats: &mut EnhancedGenerationStatistics,
3642    ) -> SynthResult<()> {
3643        if !self.phase_config.generate_period_close || entries.is_empty() {
3644            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3645            return Ok(());
3646        }
3647
3648        info!("Phase 10b: Generating period-close journal entries");
3649
3650        use datasynth_core::accounts::{
3651            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3652        };
3653        use rust_decimal::Decimal;
3654
3655        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3656            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3657        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3658        // Posting date for close entries is the last day of the period
3659        let close_date = end_date - chrono::Days::new(1);
3660
3661        // Statutory tax rate (21% — configurable rates come in later tiers)
3662        let tax_rate = Decimal::new(21, 2); // 0.21
3663
3664        // Collect company codes from config
3665        let company_codes: Vec<String> = self
3666            .config
3667            .companies
3668            .iter()
3669            .map(|c| c.code.clone())
3670            .collect();
3671
3672        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3673        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3674        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3675
3676        // --- Depreciation JEs (per asset) ---
3677        // Compute period depreciation for each active fixed asset using straight-line method.
3678        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3679        let period_months = self.config.global.period_months;
3680        for asset in &subledger.fa_records {
3681            // Skip assets that are inactive / fully depreciated / non-depreciable
3682            use datasynth_core::models::subledger::fa::AssetStatus;
3683            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3684                continue;
3685            }
3686            let useful_life_months = asset.useful_life_months();
3687            if useful_life_months == 0 {
3688                // Land or CIP — not depreciated
3689                continue;
3690            }
3691            let salvage_value = asset.salvage_value();
3692            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3693            if depreciable_base == Decimal::ZERO {
3694                continue;
3695            }
3696            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3697                * Decimal::from(period_months))
3698            .round_dp(2);
3699            if period_depr <= Decimal::ZERO {
3700                continue;
3701            }
3702
3703            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3704            depr_header.document_type = "CL".to_string();
3705            depr_header.header_text = Some(format!(
3706                "Depreciation - {} {}",
3707                asset.asset_number, asset.description
3708            ));
3709            depr_header.created_by = "CLOSE_ENGINE".to_string();
3710            depr_header.source = TransactionSource::Automated;
3711            depr_header.business_process = Some(BusinessProcess::R2R);
3712
3713            let doc_id = depr_header.document_id;
3714            let mut depr_je = JournalEntry::new(depr_header);
3715
3716            // DR Depreciation Expense (6000)
3717            depr_je.add_line(JournalEntryLine::debit(
3718                doc_id,
3719                1,
3720                expense_accounts::DEPRECIATION.to_string(),
3721                period_depr,
3722            ));
3723            // CR Accumulated Depreciation (1510)
3724            depr_je.add_line(JournalEntryLine::credit(
3725                doc_id,
3726                2,
3727                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3728                period_depr,
3729            ));
3730
3731            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3732            close_jes.push(depr_je);
3733        }
3734
3735        if !subledger.fa_records.is_empty() {
3736            debug!(
3737                "Generated {} depreciation JEs from {} FA records",
3738                close_jes.len(),
3739                subledger.fa_records.len()
3740            );
3741        }
3742
3743        // --- Accrual entries (standard period-end accruals per company) ---
3744        // Generate standard accrued expense entries (utilities, rent, interest) using
3745        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3746        {
3747            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3748            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3749
3750            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3751            let accrual_items: &[(&str, &str, &str)] = &[
3752                ("Accrued Utilities", "6200", "2100"),
3753                ("Accrued Rent", "6300", "2100"),
3754                ("Accrued Interest", "6100", "2150"),
3755            ];
3756
3757            for company_code in &company_codes {
3758                // Estimate company revenue from existing JEs
3759                let company_revenue: Decimal = entries
3760                    .iter()
3761                    .filter(|e| e.header.company_code == *company_code)
3762                    .flat_map(|e| e.lines.iter())
3763                    .filter(|l| l.gl_account.starts_with('4'))
3764                    .map(|l| l.credit_amount - l.debit_amount)
3765                    .fold(Decimal::ZERO, |acc, v| acc + v);
3766
3767                if company_revenue <= Decimal::ZERO {
3768                    continue;
3769                }
3770
3771                // Use 0.5% of period revenue per accrual item as a proxy
3772                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3773                if accrual_base <= Decimal::ZERO {
3774                    continue;
3775                }
3776
3777                for (description, expense_acct, liability_acct) in accrual_items {
3778                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3779                        company_code,
3780                        description,
3781                        accrual_base,
3782                        expense_acct,
3783                        liability_acct,
3784                        close_date,
3785                        None,
3786                    );
3787                    close_jes.push(accrual_je);
3788                    if let Some(rev_je) = reversal_je {
3789                        close_jes.push(rev_je);
3790                    }
3791                }
3792            }
3793
3794            debug!(
3795                "Generated accrual entries for {} companies",
3796                company_codes.len()
3797            );
3798        }
3799
3800        for company_code in &company_codes {
3801            // Calculate net income for this company from existing JEs:
3802            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3803            // Revenue (4xxx): credit-normal, so net = credits - debits
3804            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3805            let mut total_revenue = Decimal::ZERO;
3806            let mut total_expenses = Decimal::ZERO;
3807
3808            for entry in entries.iter() {
3809                if entry.header.company_code != *company_code {
3810                    continue;
3811                }
3812                for line in &entry.lines {
3813                    let category = AccountCategory::from_account(&line.gl_account);
3814                    match category {
3815                        AccountCategory::Revenue => {
3816                            // Revenue is credit-normal: net revenue = credits - debits
3817                            total_revenue += line.credit_amount - line.debit_amount;
3818                        }
3819                        AccountCategory::Cogs
3820                        | AccountCategory::OperatingExpense
3821                        | AccountCategory::OtherIncomeExpense
3822                        | AccountCategory::Tax => {
3823                            // Expenses are debit-normal: net expense = debits - credits
3824                            total_expenses += line.debit_amount - line.credit_amount;
3825                        }
3826                        _ => {}
3827                    }
3828                }
3829            }
3830
3831            let pre_tax_income = total_revenue - total_expenses;
3832
3833            // Skip if no income statement activity
3834            if pre_tax_income == Decimal::ZERO {
3835                debug!(
3836                    "Company {}: no pre-tax income, skipping period close",
3837                    company_code
3838                );
3839                continue;
3840            }
3841
3842            // --- Tax provision / DTA JE ---
3843            if pre_tax_income > Decimal::ZERO {
3844                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3845                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3846
3847                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3848                tax_header.document_type = "CL".to_string();
3849                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3850                tax_header.created_by = "CLOSE_ENGINE".to_string();
3851                tax_header.source = TransactionSource::Automated;
3852                tax_header.business_process = Some(BusinessProcess::R2R);
3853
3854                let doc_id = tax_header.document_id;
3855                let mut tax_je = JournalEntry::new(tax_header);
3856
3857                // DR Tax Expense (8000)
3858                tax_je.add_line(JournalEntryLine::debit(
3859                    doc_id,
3860                    1,
3861                    tax_accounts::TAX_EXPENSE.to_string(),
3862                    tax_amount,
3863                ));
3864                // CR Income Tax Payable (2130)
3865                tax_je.add_line(JournalEntryLine::credit(
3866                    doc_id,
3867                    2,
3868                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3869                    tax_amount,
3870                ));
3871
3872                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3873                close_jes.push(tax_je);
3874            } else {
3875                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3876                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3877                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3878                if dta_amount > Decimal::ZERO {
3879                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3880                    dta_header.document_type = "CL".to_string();
3881                    dta_header.header_text =
3882                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3883                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3884                    dta_header.source = TransactionSource::Automated;
3885                    dta_header.business_process = Some(BusinessProcess::R2R);
3886
3887                    let doc_id = dta_header.document_id;
3888                    let mut dta_je = JournalEntry::new(dta_header);
3889
3890                    // DR Deferred Tax Asset (1600)
3891                    dta_je.add_line(JournalEntryLine::debit(
3892                        doc_id,
3893                        1,
3894                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3895                        dta_amount,
3896                    ));
3897                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3898                    // reflecting the benefit of the future deductible temporary difference.
3899                    dta_je.add_line(JournalEntryLine::credit(
3900                        doc_id,
3901                        2,
3902                        tax_accounts::TAX_EXPENSE.to_string(),
3903                        dta_amount,
3904                    ));
3905
3906                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3907                    close_jes.push(dta_je);
3908                    debug!(
3909                        "Company {}: loss year — recognised DTA of {}",
3910                        company_code, dta_amount
3911                    );
3912                }
3913            }
3914
3915            // --- Dividend JEs (v2.4) ---
3916            // If the entity is profitable after tax, declare a 10% dividend payout.
3917            // This runs AFTER tax provision so the dividend is based on post-tax income
3918            // but BEFORE the retained earnings close so the RE transfer reflects the
3919            // reduced balance.
3920            let tax_provision = if pre_tax_income > Decimal::ZERO {
3921                (pre_tax_income * tax_rate).round_dp(2)
3922            } else {
3923                Decimal::ZERO
3924            };
3925            let net_income = pre_tax_income - tax_provision;
3926
3927            if net_income > Decimal::ZERO {
3928                use datasynth_generators::DividendGenerator;
3929                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3930                let mut div_gen = DividendGenerator::new(self.seed + 460);
3931                let currency_str = self
3932                    .config
3933                    .companies
3934                    .iter()
3935                    .find(|c| c.code == *company_code)
3936                    .map(|c| c.currency.as_str())
3937                    .unwrap_or("USD");
3938                let div_result = div_gen.generate(
3939                    company_code,
3940                    close_date,
3941                    Decimal::new(1, 0), // $1 per share placeholder
3942                    dividend_amount,
3943                    currency_str,
3944                );
3945                let div_je_count = div_result.journal_entries.len();
3946                close_jes.extend(div_result.journal_entries);
3947                debug!(
3948                    "Company {}: declared dividend of {} ({} JEs)",
3949                    company_code, dividend_amount, div_je_count
3950                );
3951            }
3952
3953            // --- Income statement closing JE ---
3954            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3955            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3956            // close the pre-tax loss into Retained Earnings as-is.
3957            if net_income != Decimal::ZERO {
3958                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3959                close_header.document_type = "CL".to_string();
3960                close_header.header_text =
3961                    Some(format!("Income statement close - {}", company_code));
3962                close_header.created_by = "CLOSE_ENGINE".to_string();
3963                close_header.source = TransactionSource::Automated;
3964                close_header.business_process = Some(BusinessProcess::R2R);
3965
3966                let doc_id = close_header.document_id;
3967                let mut close_je = JournalEntry::new(close_header);
3968
3969                let abs_net_income = net_income.abs();
3970
3971                if net_income > Decimal::ZERO {
3972                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3973                    close_je.add_line(JournalEntryLine::debit(
3974                        doc_id,
3975                        1,
3976                        equity_accounts::INCOME_SUMMARY.to_string(),
3977                        abs_net_income,
3978                    ));
3979                    close_je.add_line(JournalEntryLine::credit(
3980                        doc_id,
3981                        2,
3982                        equity_accounts::RETAINED_EARNINGS.to_string(),
3983                        abs_net_income,
3984                    ));
3985                } else {
3986                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3987                    close_je.add_line(JournalEntryLine::debit(
3988                        doc_id,
3989                        1,
3990                        equity_accounts::RETAINED_EARNINGS.to_string(),
3991                        abs_net_income,
3992                    ));
3993                    close_je.add_line(JournalEntryLine::credit(
3994                        doc_id,
3995                        2,
3996                        equity_accounts::INCOME_SUMMARY.to_string(),
3997                        abs_net_income,
3998                    ));
3999                }
4000
4001                debug_assert!(
4002                    close_je.is_balanced(),
4003                    "Income statement closing JE must be balanced"
4004                );
4005                close_jes.push(close_je);
4006            }
4007        }
4008
4009        let close_count = close_jes.len();
4010        if close_count > 0 {
4011            info!("Generated {} period-close journal entries", close_count);
4012            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4013            entries.extend(close_jes);
4014            stats.period_close_je_count = close_count;
4015
4016            // Update total entry/line-item stats
4017            stats.total_entries = entries.len() as u64;
4018            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4019        } else {
4020            debug!("No period-close entries generated (no income statement activity)");
4021        }
4022
4023        Ok(())
4024    }
4025
4026    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4027    fn phase_audit_data(
4028        &mut self,
4029        entries: &[JournalEntry],
4030        stats: &mut EnhancedGenerationStatistics,
4031    ) -> SynthResult<AuditSnapshot> {
4032        if self.phase_config.generate_audit {
4033            info!("Phase 8: Generating Audit Data");
4034            let audit_snapshot = self.generate_audit_data(entries)?;
4035            stats.audit_engagement_count = audit_snapshot.engagements.len();
4036            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4037            stats.audit_evidence_count = audit_snapshot.evidence.len();
4038            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4039            stats.audit_finding_count = audit_snapshot.findings.len();
4040            stats.audit_judgment_count = audit_snapshot.judgments.len();
4041            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4042            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4043            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4044            stats.audit_sample_count = audit_snapshot.samples.len();
4045            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4046            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4047            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4048            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4049            stats.audit_related_party_transaction_count =
4050                audit_snapshot.related_party_transactions.len();
4051            info!(
4052                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4053                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4054                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4055                 {} RP transactions",
4056                stats.audit_engagement_count,
4057                stats.audit_workpaper_count,
4058                stats.audit_evidence_count,
4059                stats.audit_risk_count,
4060                stats.audit_finding_count,
4061                stats.audit_judgment_count,
4062                stats.audit_confirmation_count,
4063                stats.audit_procedure_step_count,
4064                stats.audit_sample_count,
4065                stats.audit_analytical_result_count,
4066                stats.audit_ia_function_count,
4067                stats.audit_ia_report_count,
4068                stats.audit_related_party_count,
4069                stats.audit_related_party_transaction_count,
4070            );
4071            self.check_resources_with_log("post-audit")?;
4072            Ok(audit_snapshot)
4073        } else {
4074            debug!("Phase 8: Skipped (audit generation disabled)");
4075            Ok(AuditSnapshot::default())
4076        }
4077    }
4078
4079    /// Phase 9: Generate banking KYC/AML data.
4080    fn phase_banking_data(
4081        &mut self,
4082        stats: &mut EnhancedGenerationStatistics,
4083    ) -> SynthResult<BankingSnapshot> {
4084        if self.phase_config.generate_banking {
4085            info!("Phase 9: Generating Banking KYC/AML Data");
4086            let banking_snapshot = self.generate_banking_data()?;
4087            stats.banking_customer_count = banking_snapshot.customers.len();
4088            stats.banking_account_count = banking_snapshot.accounts.len();
4089            stats.banking_transaction_count = banking_snapshot.transactions.len();
4090            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4091            info!(
4092                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4093                stats.banking_customer_count, stats.banking_account_count,
4094                stats.banking_transaction_count, stats.banking_suspicious_count
4095            );
4096            self.check_resources_with_log("post-banking")?;
4097            Ok(banking_snapshot)
4098        } else {
4099            debug!("Phase 9: Skipped (banking generation disabled)");
4100            Ok(BankingSnapshot::default())
4101        }
4102    }
4103
4104    /// Phase 10: Export accounting network graphs for ML training.
4105    fn phase_graph_export(
4106        &mut self,
4107        entries: &[JournalEntry],
4108        coa: &Arc<ChartOfAccounts>,
4109        stats: &mut EnhancedGenerationStatistics,
4110    ) -> SynthResult<GraphExportSnapshot> {
4111        if self.phase_config.generate_graph_export && !entries.is_empty() {
4112            info!("Phase 10: Exporting Accounting Network Graphs");
4113            match self.export_graphs(entries, coa, stats) {
4114                Ok(snapshot) => {
4115                    info!(
4116                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4117                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4118                    );
4119                    Ok(snapshot)
4120                }
4121                Err(e) => {
4122                    warn!("Phase 10: Graph export failed: {}", e);
4123                    Ok(GraphExportSnapshot::default())
4124                }
4125            }
4126        } else {
4127            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4128            Ok(GraphExportSnapshot::default())
4129        }
4130    }
4131
4132    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4133    #[allow(clippy::too_many_arguments)]
4134    fn phase_hypergraph_export(
4135        &self,
4136        coa: &Arc<ChartOfAccounts>,
4137        entries: &[JournalEntry],
4138        document_flows: &DocumentFlowSnapshot,
4139        sourcing: &SourcingSnapshot,
4140        hr: &HrSnapshot,
4141        manufacturing: &ManufacturingSnapshot,
4142        banking: &BankingSnapshot,
4143        audit: &AuditSnapshot,
4144        financial_reporting: &FinancialReportingSnapshot,
4145        ocpm: &OcpmSnapshot,
4146        compliance: &ComplianceRegulationsSnapshot,
4147        stats: &mut EnhancedGenerationStatistics,
4148    ) -> SynthResult<()> {
4149        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4150            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4151            match self.export_hypergraph(
4152                coa,
4153                entries,
4154                document_flows,
4155                sourcing,
4156                hr,
4157                manufacturing,
4158                banking,
4159                audit,
4160                financial_reporting,
4161                ocpm,
4162                compliance,
4163                stats,
4164            ) {
4165                Ok(info) => {
4166                    info!(
4167                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4168                        info.node_count, info.edge_count, info.hyperedge_count
4169                    );
4170                }
4171                Err(e) => {
4172                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4173                }
4174            }
4175        } else {
4176            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4177        }
4178        Ok(())
4179    }
4180
4181    /// Phase 11: LLM Enrichment.
4182    ///
4183    /// Uses an LLM provider (mock by default) to enrich vendor names with
4184    /// realistic, context-aware names. This phase is non-blocking: failures
4185    /// log a warning but do not stop the generation pipeline.
4186    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4187        if !self.config.llm.enabled {
4188            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4189            return;
4190        }
4191
4192        info!("Phase 11: Starting LLM Enrichment");
4193        let start = std::time::Instant::now();
4194
4195        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4196            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4197            // and the corresponding API key environment variable is present.
4198            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4199                let schema_provider = &self.config.llm.provider;
4200                let api_key_env = match schema_provider.as_str() {
4201                    "openai" => Some("OPENAI_API_KEY"),
4202                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4203                    "custom" => Some("LLM_API_KEY"),
4204                    _ => None,
4205                };
4206                if let Some(key_env) = api_key_env {
4207                    if std::env::var(key_env).is_ok() {
4208                        let llm_config = datasynth_core::llm::LlmConfig {
4209                            model: self.config.llm.model.clone(),
4210                            api_key_env: key_env.to_string(),
4211                            ..datasynth_core::llm::LlmConfig::default()
4212                        };
4213                        match HttpLlmProvider::new(llm_config) {
4214                            Ok(p) => Arc::new(p),
4215                            Err(e) => {
4216                                warn!(
4217                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4218                                    e
4219                                );
4220                                Arc::new(MockLlmProvider::new(self.seed))
4221                            }
4222                        }
4223                    } else {
4224                        Arc::new(MockLlmProvider::new(self.seed))
4225                    }
4226                } else {
4227                    Arc::new(MockLlmProvider::new(self.seed))
4228                }
4229            };
4230            let enricher = VendorLlmEnricher::new(provider);
4231
4232            let industry = format!("{:?}", self.config.global.industry);
4233            let max_enrichments = self
4234                .config
4235                .llm
4236                .max_vendor_enrichments
4237                .min(self.master_data.vendors.len());
4238
4239            let mut enriched_count = 0usize;
4240            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4241                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4242                    Ok(name) => {
4243                        vendor.name = name;
4244                        enriched_count += 1;
4245                    }
4246                    Err(e) => {
4247                        warn!(
4248                            "LLM vendor enrichment failed for {}: {}",
4249                            vendor.vendor_id, e
4250                        );
4251                    }
4252                }
4253            }
4254
4255            enriched_count
4256        }));
4257
4258        match result {
4259            Ok(enriched_count) => {
4260                stats.llm_vendors_enriched = enriched_count;
4261                let elapsed = start.elapsed();
4262                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4263                info!(
4264                    "Phase 11 complete: {} vendors enriched in {}ms",
4265                    enriched_count, stats.llm_enrichment_ms
4266                );
4267            }
4268            Err(_) => {
4269                let elapsed = start.elapsed();
4270                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4271                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4272            }
4273        }
4274    }
4275
4276    /// Phase 12: Diffusion Enhancement.
4277    ///
4278    /// Generates a sample set using the statistical diffusion backend to
4279    /// demonstrate distribution-matching data generation. This phase is
4280    /// non-blocking: failures log a warning but do not stop the pipeline.
4281    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4282        if !self.config.diffusion.enabled {
4283            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4284            return;
4285        }
4286
4287        info!("Phase 12: Starting Diffusion Enhancement");
4288        let start = std::time::Instant::now();
4289
4290        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4291            // Target distribution: transaction amounts (log-normal-like)
4292            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4293            let stds = vec![2000.0, 1.5, 1.0];
4294
4295            let diffusion_config = DiffusionConfig {
4296                n_steps: self.config.diffusion.n_steps,
4297                seed: self.seed,
4298                ..Default::default()
4299            };
4300
4301            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4302
4303            let n_samples = self.config.diffusion.sample_size;
4304            let n_features = 3; // amount, line_items, approval_level
4305            let samples = backend.generate(n_samples, n_features, self.seed);
4306
4307            samples.len()
4308        }));
4309
4310        match result {
4311            Ok(sample_count) => {
4312                stats.diffusion_samples_generated = sample_count;
4313                let elapsed = start.elapsed();
4314                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4315                info!(
4316                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4317                    sample_count, stats.diffusion_enhancement_ms
4318                );
4319            }
4320            Err(_) => {
4321                let elapsed = start.elapsed();
4322                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4323                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4324            }
4325        }
4326    }
4327
4328    /// Phase 13: Causal Overlay.
4329    ///
4330    /// Builds a structural causal model from a built-in template (e.g.,
4331    /// fraud_detection) and generates causal samples. Optionally validates
4332    /// that the output respects the causal structure. This phase is
4333    /// non-blocking: failures log a warning but do not stop the pipeline.
4334    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4335        if !self.config.causal.enabled {
4336            debug!("Phase 13: Skipped (causal generation disabled)");
4337            return;
4338        }
4339
4340        info!("Phase 13: Starting Causal Overlay");
4341        let start = std::time::Instant::now();
4342
4343        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4344            // Select template based on config
4345            let graph = match self.config.causal.template.as_str() {
4346                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4347                _ => CausalGraph::fraud_detection_template(),
4348            };
4349
4350            let scm = StructuralCausalModel::new(graph.clone())
4351                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4352
4353            let n_samples = self.config.causal.sample_size;
4354            let samples = scm
4355                .generate(n_samples, self.seed)
4356                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4357
4358            // Optionally validate causal structure
4359            let validation_passed = if self.config.causal.validate {
4360                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4361                if report.valid {
4362                    info!(
4363                        "Causal validation passed: all {} checks OK",
4364                        report.checks.len()
4365                    );
4366                } else {
4367                    warn!(
4368                        "Causal validation: {} violations detected: {:?}",
4369                        report.violations.len(),
4370                        report.violations
4371                    );
4372                }
4373                Some(report.valid)
4374            } else {
4375                None
4376            };
4377
4378            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4379        }));
4380
4381        match result {
4382            Ok(Ok((sample_count, validation_passed))) => {
4383                stats.causal_samples_generated = sample_count;
4384                stats.causal_validation_passed = validation_passed;
4385                let elapsed = start.elapsed();
4386                stats.causal_generation_ms = elapsed.as_millis() as u64;
4387                info!(
4388                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4389                    sample_count, stats.causal_generation_ms, validation_passed,
4390                );
4391            }
4392            Ok(Err(e)) => {
4393                let elapsed = start.elapsed();
4394                stats.causal_generation_ms = elapsed.as_millis() as u64;
4395                warn!("Phase 13: Causal generation failed: {}", e);
4396            }
4397            Err(_) => {
4398                let elapsed = start.elapsed();
4399                stats.causal_generation_ms = elapsed.as_millis() as u64;
4400                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4401            }
4402        }
4403    }
4404
4405    /// Phase 14: Generate S2C sourcing data.
4406    fn phase_sourcing_data(
4407        &mut self,
4408        stats: &mut EnhancedGenerationStatistics,
4409    ) -> SynthResult<SourcingSnapshot> {
4410        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4411            debug!("Phase 14: Skipped (sourcing generation disabled)");
4412            return Ok(SourcingSnapshot::default());
4413        }
4414        let degradation = self.check_resources()?;
4415        if degradation >= DegradationLevel::Reduced {
4416            debug!(
4417                "Phase skipped due to resource pressure (degradation: {:?})",
4418                degradation
4419            );
4420            return Ok(SourcingSnapshot::default());
4421        }
4422
4423        info!("Phase 14: Generating S2C Sourcing Data");
4424        let seed = self.seed;
4425
4426        // Gather vendor data from master data
4427        let vendor_ids: Vec<String> = self
4428            .master_data
4429            .vendors
4430            .iter()
4431            .map(|v| v.vendor_id.clone())
4432            .collect();
4433        if vendor_ids.is_empty() {
4434            debug!("Phase 14: Skipped (no vendors available)");
4435            return Ok(SourcingSnapshot::default());
4436        }
4437
4438        let categories: Vec<(String, String)> = vec![
4439            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4440            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4441            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4442            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4443            ("CAT-LOG".to_string(), "Logistics".to_string()),
4444        ];
4445        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4446            .iter()
4447            .map(|(id, name)| {
4448                (
4449                    id.clone(),
4450                    name.clone(),
4451                    rust_decimal::Decimal::from(100_000),
4452                )
4453            })
4454            .collect();
4455
4456        let company_code = self
4457            .config
4458            .companies
4459            .first()
4460            .map(|c| c.code.as_str())
4461            .unwrap_or("1000");
4462        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4463            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4464        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4465        let fiscal_year = start_date.year() as u16;
4466        let owner_ids: Vec<String> = self
4467            .master_data
4468            .employees
4469            .iter()
4470            .take(5)
4471            .map(|e| e.employee_id.clone())
4472            .collect();
4473        let owner_id = owner_ids
4474            .first()
4475            .map(std::string::String::as_str)
4476            .unwrap_or("BUYER-001");
4477
4478        // Step 1: Spend Analysis
4479        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4480        let spend_analyses =
4481            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4482
4483        // Step 2: Sourcing Projects
4484        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4485        let sourcing_projects = if owner_ids.is_empty() {
4486            Vec::new()
4487        } else {
4488            project_gen.generate(
4489                company_code,
4490                &categories_with_spend,
4491                &owner_ids,
4492                start_date,
4493                self.config.global.period_months,
4494            )
4495        };
4496        stats.sourcing_project_count = sourcing_projects.len();
4497
4498        // Step 3: Qualifications
4499        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4500        let mut qual_gen = QualificationGenerator::new(seed + 2);
4501        let qualifications = qual_gen.generate(
4502            company_code,
4503            &qual_vendor_ids,
4504            sourcing_projects.first().map(|p| p.project_id.as_str()),
4505            owner_id,
4506            start_date,
4507        );
4508
4509        // Step 4: RFx Events
4510        let mut rfx_gen = RfxGenerator::new(seed + 3);
4511        let rfx_events: Vec<RfxEvent> = sourcing_projects
4512            .iter()
4513            .map(|proj| {
4514                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4515                rfx_gen.generate(
4516                    company_code,
4517                    &proj.project_id,
4518                    &proj.category_id,
4519                    &qualified_vids,
4520                    owner_id,
4521                    start_date,
4522                    50000.0,
4523                )
4524            })
4525            .collect();
4526        stats.rfx_event_count = rfx_events.len();
4527
4528        // Step 5: Bids
4529        let mut bid_gen = BidGenerator::new(seed + 4);
4530        let mut all_bids = Vec::new();
4531        for rfx in &rfx_events {
4532            let bidder_count = vendor_ids.len().clamp(2, 5);
4533            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4534            let bids = bid_gen.generate(rfx, &responding, start_date);
4535            all_bids.extend(bids);
4536        }
4537        stats.bid_count = all_bids.len();
4538
4539        // Step 6: Bid Evaluations
4540        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4541        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4542            .iter()
4543            .map(|rfx| {
4544                let rfx_bids: Vec<SupplierBid> = all_bids
4545                    .iter()
4546                    .filter(|b| b.rfx_id == rfx.rfx_id)
4547                    .cloned()
4548                    .collect();
4549                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4550            })
4551            .collect();
4552
4553        // Step 7: Contracts from winning bids
4554        let mut contract_gen = ContractGenerator::new(seed + 6);
4555        let contracts: Vec<ProcurementContract> = bid_evaluations
4556            .iter()
4557            .zip(rfx_events.iter())
4558            .filter_map(|(eval, rfx)| {
4559                eval.ranked_bids.first().and_then(|winner| {
4560                    all_bids
4561                        .iter()
4562                        .find(|b| b.bid_id == winner.bid_id)
4563                        .map(|winning_bid| {
4564                            contract_gen.generate_from_bid(
4565                                winning_bid,
4566                                Some(&rfx.sourcing_project_id),
4567                                &rfx.category_id,
4568                                owner_id,
4569                                start_date,
4570                            )
4571                        })
4572                })
4573            })
4574            .collect();
4575        stats.contract_count = contracts.len();
4576
4577        // Step 8: Catalog Items
4578        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4579        let catalog_items = catalog_gen.generate(&contracts);
4580        stats.catalog_item_count = catalog_items.len();
4581
4582        // Step 9: Scorecards
4583        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4584        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4585            .iter()
4586            .fold(
4587                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4588                |mut acc, c| {
4589                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4590                    acc
4591                },
4592            )
4593            .into_iter()
4594            .collect();
4595        let scorecards = scorecard_gen.generate(
4596            company_code,
4597            &vendor_contracts,
4598            start_date,
4599            end_date,
4600            owner_id,
4601        );
4602        stats.scorecard_count = scorecards.len();
4603
4604        // Back-populate cross-references on sourcing projects (Task 35)
4605        // Link each project to its RFx events, contracts, and spend analyses
4606        let mut sourcing_projects = sourcing_projects;
4607        for project in &mut sourcing_projects {
4608            // Link RFx events generated for this project
4609            project.rfx_ids = rfx_events
4610                .iter()
4611                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4612                .map(|rfx| rfx.rfx_id.clone())
4613                .collect();
4614
4615            // Link contract awarded from this project's RFx
4616            project.contract_id = contracts
4617                .iter()
4618                .find(|c| {
4619                    c.sourcing_project_id
4620                        .as_deref()
4621                        .is_some_and(|sp| sp == project.project_id)
4622                })
4623                .map(|c| c.contract_id.clone());
4624
4625            // Link spend analysis for matching category (use category_id as the reference)
4626            project.spend_analysis_id = spend_analyses
4627                .iter()
4628                .find(|sa| sa.category_id == project.category_id)
4629                .map(|sa| sa.category_id.clone());
4630        }
4631
4632        info!(
4633            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4634            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4635            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4636        );
4637        self.check_resources_with_log("post-sourcing")?;
4638
4639        Ok(SourcingSnapshot {
4640            spend_analyses,
4641            sourcing_projects,
4642            qualifications,
4643            rfx_events,
4644            bids: all_bids,
4645            bid_evaluations,
4646            contracts,
4647            catalog_items,
4648            scorecards,
4649        })
4650    }
4651
4652    /// Build a [`GroupStructure`] from the current company configuration.
4653    ///
4654    /// The first company in the configuration is treated as the ultimate parent.
4655    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4656    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4657    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4658        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4659
4660        let parent_code = self
4661            .config
4662            .companies
4663            .first()
4664            .map(|c| c.code.clone())
4665            .unwrap_or_else(|| "PARENT".to_string());
4666
4667        let mut group = GroupStructure::new(parent_code);
4668
4669        for company in self.config.companies.iter().skip(1) {
4670            let sub =
4671                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4672            group.add_subsidiary(sub);
4673        }
4674
4675        group
4676    }
4677
4678    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4679    fn phase_intercompany(
4680        &mut self,
4681        journal_entries: &[JournalEntry],
4682        stats: &mut EnhancedGenerationStatistics,
4683    ) -> SynthResult<IntercompanySnapshot> {
4684        // Skip if intercompany is disabled in config
4685        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4686            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4687            return Ok(IntercompanySnapshot::default());
4688        }
4689
4690        // Intercompany requires at least 2 companies
4691        if self.config.companies.len() < 2 {
4692            debug!(
4693                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4694                self.config.companies.len()
4695            );
4696            return Ok(IntercompanySnapshot::default());
4697        }
4698
4699        info!("Phase 14b: Generating Intercompany Transactions");
4700
4701        // Build the group structure early — used by ISA 600 component auditor scope
4702        // and consolidated financial statement generators downstream.
4703        let group_structure = self.build_group_structure();
4704        debug!(
4705            "Group structure built: parent={}, subsidiaries={}",
4706            group_structure.parent_entity,
4707            group_structure.subsidiaries.len()
4708        );
4709
4710        let seed = self.seed;
4711        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4712            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4713        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4714
4715        // Build ownership structure from company configs
4716        // First company is treated as the parent, remaining are subsidiaries
4717        let parent_code = self.config.companies[0].code.clone();
4718        let mut ownership_structure =
4719            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4720
4721        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4722            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4723                format!("REL{:03}", i + 1),
4724                parent_code.clone(),
4725                company.code.clone(),
4726                rust_decimal::Decimal::from(100), // Default 100% ownership
4727                start_date,
4728            );
4729            ownership_structure.add_relationship(relationship);
4730        }
4731
4732        // Convert config transfer pricing method to core model enum
4733        let tp_method = match self.config.intercompany.transfer_pricing_method {
4734            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4735                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4736            }
4737            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4738                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4739            }
4740            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4741                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4742            }
4743            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4744                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4745            }
4746            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4747                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4748            }
4749        };
4750
4751        // Build IC generator config from schema config
4752        let ic_currency = self
4753            .config
4754            .companies
4755            .first()
4756            .map(|c| c.currency.clone())
4757            .unwrap_or_else(|| "USD".to_string());
4758        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4759            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4760            transfer_pricing_method: tp_method,
4761            markup_percent: rust_decimal::Decimal::from_f64_retain(
4762                self.config.intercompany.markup_percent,
4763            )
4764            .unwrap_or(rust_decimal::Decimal::from(5)),
4765            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4766            default_currency: ic_currency,
4767            ..Default::default()
4768        };
4769
4770        // Create IC generator
4771        let mut ic_generator = datasynth_generators::ICGenerator::new(
4772            ic_gen_config,
4773            ownership_structure.clone(),
4774            seed + 50,
4775        );
4776
4777        // Generate IC transactions for the period
4778        // Use ~3 transactions per day as a reasonable default
4779        let transactions_per_day = 3;
4780        let matched_pairs = ic_generator.generate_transactions_for_period(
4781            start_date,
4782            end_date,
4783            transactions_per_day,
4784        );
4785
4786        // Generate IC source P2P/O2C documents
4787        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4788        debug!(
4789            "Generated {} IC seller invoices, {} IC buyer POs",
4790            ic_doc_chains.seller_invoices.len(),
4791            ic_doc_chains.buyer_orders.len()
4792        );
4793
4794        // Generate journal entries from matched pairs
4795        let mut seller_entries = Vec::new();
4796        let mut buyer_entries = Vec::new();
4797        let fiscal_year = start_date.year();
4798
4799        for pair in &matched_pairs {
4800            let fiscal_period = pair.posting_date.month();
4801            let (seller_je, buyer_je) =
4802                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4803            seller_entries.push(seller_je);
4804            buyer_entries.push(buyer_je);
4805        }
4806
4807        // Run matching engine
4808        let matching_config = datasynth_generators::ICMatchingConfig {
4809            base_currency: self
4810                .config
4811                .companies
4812                .first()
4813                .map(|c| c.currency.clone())
4814                .unwrap_or_else(|| "USD".to_string()),
4815            ..Default::default()
4816        };
4817        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4818        matching_engine.load_matched_pairs(&matched_pairs);
4819        let matching_result = matching_engine.run_matching(end_date);
4820
4821        // Generate elimination entries if configured
4822        let mut elimination_entries = Vec::new();
4823        if self.config.intercompany.generate_eliminations {
4824            let elim_config = datasynth_generators::EliminationConfig {
4825                consolidation_entity: "GROUP".to_string(),
4826                base_currency: self
4827                    .config
4828                    .companies
4829                    .first()
4830                    .map(|c| c.currency.clone())
4831                    .unwrap_or_else(|| "USD".to_string()),
4832                ..Default::default()
4833            };
4834
4835            let mut elim_generator =
4836                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4837
4838            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4839            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4840                matching_result
4841                    .matched_balances
4842                    .iter()
4843                    .chain(matching_result.unmatched_balances.iter())
4844                    .cloned()
4845                    .collect();
4846
4847            // Build investment and equity maps from the group structure so that the
4848            // elimination generator can produce equity-investment elimination entries
4849            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4850            //
4851            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4852            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4853            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4854            //
4855            // Net assets are derived from the journal entries using account-range heuristics:
4856            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4857            // no JE data is available (IC phase runs early in the generation pipeline).
4858            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4859                std::collections::HashMap::new();
4860            let mut equity_amounts: std::collections::HashMap<
4861                String,
4862                std::collections::HashMap<String, rust_decimal::Decimal>,
4863            > = std::collections::HashMap::new();
4864            {
4865                use rust_decimal::Decimal;
4866                let hundred = Decimal::from(100u32);
4867                let ten_pct = Decimal::new(10, 2); // 0.10
4868                let thirty_pct = Decimal::new(30, 2); // 0.30
4869                let sixty_pct = Decimal::new(60, 2); // 0.60
4870                let parent_code = &group_structure.parent_entity;
4871                for sub in &group_structure.subsidiaries {
4872                    let net_assets = {
4873                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4874                        if na > Decimal::ZERO {
4875                            na
4876                        } else {
4877                            Decimal::from(1_000_000u64)
4878                        }
4879                    };
4880                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4881                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4882                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4883
4884                    // Split subsidiary equity into conventional components:
4885                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4886                    let mut eq_map = std::collections::HashMap::new();
4887                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4888                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4889                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4890                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4891                }
4892            }
4893
4894            let journal = elim_generator.generate_eliminations(
4895                &fiscal_period,
4896                end_date,
4897                &all_balances,
4898                &matched_pairs,
4899                &investment_amounts,
4900                &equity_amounts,
4901            );
4902
4903            elimination_entries = journal.entries.clone();
4904        }
4905
4906        let matched_pair_count = matched_pairs.len();
4907        let elimination_entry_count = elimination_entries.len();
4908        let match_rate = matching_result.match_rate;
4909
4910        stats.ic_matched_pair_count = matched_pair_count;
4911        stats.ic_elimination_count = elimination_entry_count;
4912        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4913
4914        info!(
4915            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4916            matched_pair_count,
4917            stats.ic_transaction_count,
4918            seller_entries.len(),
4919            buyer_entries.len(),
4920            elimination_entry_count,
4921            match_rate * 100.0
4922        );
4923        self.check_resources_with_log("post-intercompany")?;
4924
4925        // ----------------------------------------------------------------
4926        // NCI measurements: derive from group structure ownership percentages
4927        // ----------------------------------------------------------------
4928        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4929            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4930            use rust_decimal::Decimal;
4931
4932            let eight_pct = Decimal::new(8, 2); // 0.08
4933
4934            group_structure
4935                .subsidiaries
4936                .iter()
4937                .filter(|sub| {
4938                    sub.nci_percentage > Decimal::ZERO
4939                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4940                })
4941                .map(|sub| {
4942                    // Compute net assets from actual journal entries for this subsidiary.
4943                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4944                    // IC phase runs before the main JE batch has been populated).
4945                    let net_assets_from_jes =
4946                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4947
4948                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4949                        net_assets_from_jes.round_dp(2)
4950                    } else {
4951                        // Fallback: use a plausible base amount
4952                        Decimal::from(1_000_000u64)
4953                    };
4954
4955                    // Net income approximated as 8% of net assets
4956                    let net_income = (net_assets * eight_pct).round_dp(2);
4957
4958                    NciMeasurement::compute(
4959                        sub.entity_code.clone(),
4960                        sub.nci_percentage,
4961                        net_assets,
4962                        net_income,
4963                    )
4964                })
4965                .collect()
4966        };
4967
4968        if !nci_measurements.is_empty() {
4969            info!(
4970                "NCI measurements: {} subsidiaries with non-controlling interests",
4971                nci_measurements.len()
4972            );
4973        }
4974
4975        Ok(IntercompanySnapshot {
4976            group_structure: Some(group_structure),
4977            matched_pairs,
4978            seller_journal_entries: seller_entries,
4979            buyer_journal_entries: buyer_entries,
4980            elimination_entries,
4981            nci_measurements,
4982            ic_document_chains: Some(ic_doc_chains),
4983            matched_pair_count,
4984            elimination_entry_count,
4985            match_rate,
4986        })
4987    }
4988
4989    /// Phase 15: Generate bank reconciliations and financial statements.
4990    fn phase_financial_reporting(
4991        &mut self,
4992        document_flows: &DocumentFlowSnapshot,
4993        journal_entries: &[JournalEntry],
4994        coa: &Arc<ChartOfAccounts>,
4995        _hr: &HrSnapshot,
4996        _audit: &AuditSnapshot,
4997        stats: &mut EnhancedGenerationStatistics,
4998    ) -> SynthResult<FinancialReportingSnapshot> {
4999        let fs_enabled = self.phase_config.generate_financial_statements
5000            || self.config.financial_reporting.enabled;
5001        let br_enabled = self.phase_config.generate_bank_reconciliation;
5002
5003        if !fs_enabled && !br_enabled {
5004            debug!("Phase 15: Skipped (financial reporting disabled)");
5005            return Ok(FinancialReportingSnapshot::default());
5006        }
5007
5008        info!("Phase 15: Generating Financial Reporting Data");
5009
5010        let seed = self.seed;
5011        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5012            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5013
5014        let mut financial_statements = Vec::new();
5015        let mut bank_reconciliations = Vec::new();
5016        let mut trial_balances = Vec::new();
5017        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5018        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5019            Vec::new();
5020        // Standalone statements keyed by entity code
5021        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5022            std::collections::HashMap::new();
5023        // Consolidated statements (one per period)
5024        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5025        // Consolidation schedules (one per period)
5026        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5027
5028        // Generate financial statements from JE-derived trial balances.
5029        //
5030        // When journal entries are available, we use cumulative trial balances for
5031        // balance sheet accounts and current-period trial balances for income
5032        // statement accounts. We also track prior-period trial balances so the
5033        // generator can produce comparative amounts, and we build a proper
5034        // cash flow statement from working capital changes rather than random data.
5035        if fs_enabled {
5036            let has_journal_entries = !journal_entries.is_empty();
5037
5038            // Use FinancialStatementGenerator for balance sheet and income statement,
5039            // but build cash flow ourselves from TB data when JEs are available.
5040            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5041            // Separate generator for consolidated statements (different seed offset)
5042            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5043
5044            // Collect elimination JEs once (reused across periods)
5045            let elimination_entries: Vec<&JournalEntry> = journal_entries
5046                .iter()
5047                .filter(|je| je.header.is_elimination)
5048                .collect();
5049
5050            // Generate one set of statements per period, per entity
5051            for period in 0..self.config.global.period_months {
5052                let period_start = start_date + chrono::Months::new(period);
5053                let period_end =
5054                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5055                let fiscal_year = period_end.year() as u16;
5056                let fiscal_period = period_end.month() as u8;
5057                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5058
5059                // Build per-entity trial balances for this period (non-elimination JEs)
5060                // We accumulate them for the consolidation step.
5061                let mut entity_tb_map: std::collections::HashMap<
5062                    String,
5063                    std::collections::HashMap<String, rust_decimal::Decimal>,
5064                > = std::collections::HashMap::new();
5065
5066                // --- Standalone: one set of statements per company ---
5067                for (company_idx, company) in self.config.companies.iter().enumerate() {
5068                    let company_code = company.code.as_str();
5069                    let currency = company.currency.as_str();
5070                    // Use a unique seed offset per company to keep statements deterministic
5071                    // and distinct across companies
5072                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5073                    let mut company_fs_gen =
5074                        FinancialStatementGenerator::new(seed + company_seed_offset);
5075
5076                    if has_journal_entries {
5077                        let tb_entries = Self::build_cumulative_trial_balance(
5078                            journal_entries,
5079                            coa,
5080                            company_code,
5081                            start_date,
5082                            period_end,
5083                            fiscal_year,
5084                            fiscal_period,
5085                        );
5086
5087                        // Accumulate per-entity category balances for consolidation
5088                        let entity_cat_map =
5089                            entity_tb_map.entry(company_code.to_string()).or_default();
5090                        for tb_entry in &tb_entries {
5091                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5092                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5093                        }
5094
5095                        let stmts = company_fs_gen.generate(
5096                            company_code,
5097                            currency,
5098                            &tb_entries,
5099                            period_start,
5100                            period_end,
5101                            fiscal_year,
5102                            fiscal_period,
5103                            None,
5104                            "SYS-AUTOCLOSE",
5105                        );
5106
5107                        let mut entity_stmts = Vec::new();
5108                        for stmt in stmts {
5109                            if stmt.statement_type == StatementType::CashFlowStatement {
5110                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5111                                let cf_items = Self::build_cash_flow_from_trial_balances(
5112                                    &tb_entries,
5113                                    None,
5114                                    net_income,
5115                                );
5116                                entity_stmts.push(FinancialStatement {
5117                                    cash_flow_items: cf_items,
5118                                    ..stmt
5119                                });
5120                            } else {
5121                                entity_stmts.push(stmt);
5122                            }
5123                        }
5124
5125                        // Add to the flat financial_statements list (used by KPI/budget)
5126                        financial_statements.extend(entity_stmts.clone());
5127
5128                        // Store standalone per-entity
5129                        standalone_statements
5130                            .entry(company_code.to_string())
5131                            .or_default()
5132                            .extend(entity_stmts);
5133
5134                        // Only store trial balance for the first company in the period
5135                        // to avoid duplicates in the trial_balances list
5136                        if company_idx == 0 {
5137                            trial_balances.push(PeriodTrialBalance {
5138                                fiscal_year,
5139                                fiscal_period,
5140                                period_start,
5141                                period_end,
5142                                entries: tb_entries,
5143                            });
5144                        }
5145                    } else {
5146                        // Fallback: no JEs available
5147                        let tb_entries = Self::build_trial_balance_from_entries(
5148                            journal_entries,
5149                            coa,
5150                            company_code,
5151                            fiscal_year,
5152                            fiscal_period,
5153                        );
5154
5155                        let stmts = company_fs_gen.generate(
5156                            company_code,
5157                            currency,
5158                            &tb_entries,
5159                            period_start,
5160                            period_end,
5161                            fiscal_year,
5162                            fiscal_period,
5163                            None,
5164                            "SYS-AUTOCLOSE",
5165                        );
5166                        financial_statements.extend(stmts.clone());
5167                        standalone_statements
5168                            .entry(company_code.to_string())
5169                            .or_default()
5170                            .extend(stmts);
5171
5172                        if company_idx == 0 && !tb_entries.is_empty() {
5173                            trial_balances.push(PeriodTrialBalance {
5174                                fiscal_year,
5175                                fiscal_period,
5176                                period_start,
5177                                period_end,
5178                                entries: tb_entries,
5179                            });
5180                        }
5181                    }
5182                }
5183
5184                // --- Consolidated: aggregate all entities + apply eliminations ---
5185                // Use the primary (first) company's currency for the consolidated statement
5186                let group_currency = self
5187                    .config
5188                    .companies
5189                    .first()
5190                    .map(|c| c.currency.as_str())
5191                    .unwrap_or("USD");
5192
5193                // Build owned elimination entries for this period
5194                let period_eliminations: Vec<JournalEntry> = elimination_entries
5195                    .iter()
5196                    .filter(|je| {
5197                        je.header.fiscal_year == fiscal_year
5198                            && je.header.fiscal_period == fiscal_period
5199                    })
5200                    .map(|je| (*je).clone())
5201                    .collect();
5202
5203                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5204                    &entity_tb_map,
5205                    &period_eliminations,
5206                    &period_label,
5207                );
5208
5209                // Build a pseudo trial balance from consolidated line items for the
5210                // FinancialStatementGenerator to use (only for cash flow direction).
5211                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5212                    .line_items
5213                    .iter()
5214                    .map(|li| {
5215                        let net = li.post_elimination_total;
5216                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5217                            (net, rust_decimal::Decimal::ZERO)
5218                        } else {
5219                            (rust_decimal::Decimal::ZERO, -net)
5220                        };
5221                        datasynth_generators::TrialBalanceEntry {
5222                            account_code: li.account_category.clone(),
5223                            account_name: li.account_category.clone(),
5224                            category: li.account_category.clone(),
5225                            debit_balance: debit,
5226                            credit_balance: credit,
5227                        }
5228                    })
5229                    .collect();
5230
5231                let mut cons_stmts = cons_gen.generate(
5232                    "GROUP",
5233                    group_currency,
5234                    &cons_tb,
5235                    period_start,
5236                    period_end,
5237                    fiscal_year,
5238                    fiscal_period,
5239                    None,
5240                    "SYS-AUTOCLOSE",
5241                );
5242
5243                // Split consolidated line items by statement type.
5244                // The consolidation generator returns BS items first, then IS items,
5245                // identified by their CONS- prefix and category.
5246                let bs_categories: &[&str] = &[
5247                    "CASH",
5248                    "RECEIVABLES",
5249                    "INVENTORY",
5250                    "FIXEDASSETS",
5251                    "PAYABLES",
5252                    "ACCRUEDLIABILITIES",
5253                    "LONGTERMDEBT",
5254                    "EQUITY",
5255                ];
5256                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5257                    cons_line_items.into_iter().partition(|li| {
5258                        let upper = li.label.to_uppercase();
5259                        bs_categories.iter().any(|c| upper == *c)
5260                    });
5261
5262                for stmt in &mut cons_stmts {
5263                    stmt.is_consolidated = true;
5264                    match stmt.statement_type {
5265                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5266                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5267                        _ => {} // CF and equity change statements keep generator output
5268                    }
5269                }
5270
5271                consolidated_statements.extend(cons_stmts);
5272                consolidation_schedules.push(schedule);
5273            }
5274
5275            // Backward compat: if only 1 company, use existing code path logic
5276            // (prior_cumulative_tb for comparative amounts). Already handled above;
5277            // the prior_ref is omitted to keep this change minimal.
5278            let _ = &mut fs_gen; // suppress unused warning
5279
5280            stats.financial_statement_count = financial_statements.len();
5281            info!(
5282                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5283                stats.financial_statement_count,
5284                consolidated_statements.len(),
5285                has_journal_entries
5286            );
5287
5288            // ----------------------------------------------------------------
5289            // IFRS 8 / ASC 280: Operating Segment Reporting
5290            // ----------------------------------------------------------------
5291            // Build entity seeds from the company configuration.
5292            let entity_seeds: Vec<SegmentSeed> = self
5293                .config
5294                .companies
5295                .iter()
5296                .map(|c| SegmentSeed {
5297                    code: c.code.clone(),
5298                    name: c.name.clone(),
5299                    currency: c.currency.clone(),
5300                })
5301                .collect();
5302
5303            let mut seg_gen = SegmentGenerator::new(seed + 30);
5304
5305            // Generate one set of segment reports per period.
5306            // We extract consolidated revenue / profit / assets from the consolidated
5307            // financial statements produced above, falling back to simple sums when
5308            // no consolidated statements were generated (single-entity path).
5309            for period in 0..self.config.global.period_months {
5310                let period_end =
5311                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5312                let fiscal_year = period_end.year() as u16;
5313                let fiscal_period = period_end.month() as u8;
5314                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5315
5316                use datasynth_core::models::StatementType;
5317
5318                // Try to find consolidated income statement for this period
5319                let cons_is = consolidated_statements.iter().find(|s| {
5320                    s.fiscal_year == fiscal_year
5321                        && s.fiscal_period == fiscal_period
5322                        && s.statement_type == StatementType::IncomeStatement
5323                });
5324                let cons_bs = consolidated_statements.iter().find(|s| {
5325                    s.fiscal_year == fiscal_year
5326                        && s.fiscal_period == fiscal_period
5327                        && s.statement_type == StatementType::BalanceSheet
5328                });
5329
5330                // If consolidated statements not available fall back to the flat list
5331                let is_stmt = cons_is.or_else(|| {
5332                    financial_statements.iter().find(|s| {
5333                        s.fiscal_year == fiscal_year
5334                            && s.fiscal_period == fiscal_period
5335                            && s.statement_type == StatementType::IncomeStatement
5336                    })
5337                });
5338                let bs_stmt = cons_bs.or_else(|| {
5339                    financial_statements.iter().find(|s| {
5340                        s.fiscal_year == fiscal_year
5341                            && s.fiscal_period == fiscal_period
5342                            && s.statement_type == StatementType::BalanceSheet
5343                    })
5344                });
5345
5346                let consolidated_revenue = is_stmt
5347                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5348                    .map(|li| -li.amount) // revenue is stored as negative in IS
5349                    .unwrap_or(rust_decimal::Decimal::ZERO);
5350
5351                let consolidated_profit = is_stmt
5352                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5353                    .map(|li| li.amount)
5354                    .unwrap_or(rust_decimal::Decimal::ZERO);
5355
5356                let consolidated_assets = bs_stmt
5357                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5358                    .map(|li| li.amount)
5359                    .unwrap_or(rust_decimal::Decimal::ZERO);
5360
5361                // Skip periods where we have no financial data
5362                if consolidated_revenue == rust_decimal::Decimal::ZERO
5363                    && consolidated_assets == rust_decimal::Decimal::ZERO
5364                {
5365                    continue;
5366                }
5367
5368                let group_code = self
5369                    .config
5370                    .companies
5371                    .first()
5372                    .map(|c| c.code.as_str())
5373                    .unwrap_or("GROUP");
5374
5375                // Compute period depreciation from JEs with document type "CL" hitting account
5376                // 6000 (depreciation expense).  These are generated by phase_period_close.
5377                let total_depr: rust_decimal::Decimal = journal_entries
5378                    .iter()
5379                    .filter(|je| je.header.document_type == "CL")
5380                    .flat_map(|je| je.lines.iter())
5381                    .filter(|l| l.gl_account.starts_with("6000"))
5382                    .map(|l| l.debit_amount)
5383                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5384                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5385                    Some(total_depr)
5386                } else {
5387                    None
5388                };
5389
5390                let (segs, recon) = seg_gen.generate(
5391                    group_code,
5392                    &period_label,
5393                    consolidated_revenue,
5394                    consolidated_profit,
5395                    consolidated_assets,
5396                    &entity_seeds,
5397                    depr_param,
5398                );
5399                segment_reports.extend(segs);
5400                segment_reconciliations.push(recon);
5401            }
5402
5403            info!(
5404                "Segment reports generated: {} segments, {} reconciliations",
5405                segment_reports.len(),
5406                segment_reconciliations.len()
5407            );
5408        }
5409
5410        // Generate bank reconciliations from payment data
5411        if br_enabled && !document_flows.payments.is_empty() {
5412            let employee_ids: Vec<String> = self
5413                .master_data
5414                .employees
5415                .iter()
5416                .map(|e| e.employee_id.clone())
5417                .collect();
5418            let mut br_gen =
5419                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5420
5421            // Group payments by company code and period
5422            for company in &self.config.companies {
5423                let company_payments: Vec<PaymentReference> = document_flows
5424                    .payments
5425                    .iter()
5426                    .filter(|p| p.header.company_code == company.code)
5427                    .map(|p| PaymentReference {
5428                        id: p.header.document_id.clone(),
5429                        amount: if p.is_vendor { p.amount } else { -p.amount },
5430                        date: p.header.document_date,
5431                        reference: p
5432                            .check_number
5433                            .clone()
5434                            .or_else(|| p.wire_reference.clone())
5435                            .unwrap_or_else(|| p.header.document_id.clone()),
5436                    })
5437                    .collect();
5438
5439                if company_payments.is_empty() {
5440                    continue;
5441                }
5442
5443                let bank_account_id = format!("{}-MAIN", company.code);
5444
5445                // Generate one reconciliation per period
5446                for period in 0..self.config.global.period_months {
5447                    let period_start = start_date + chrono::Months::new(period);
5448                    let period_end =
5449                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5450
5451                    let period_payments: Vec<PaymentReference> = company_payments
5452                        .iter()
5453                        .filter(|p| p.date >= period_start && p.date <= period_end)
5454                        .cloned()
5455                        .collect();
5456
5457                    let recon = br_gen.generate(
5458                        &company.code,
5459                        &bank_account_id,
5460                        period_start,
5461                        period_end,
5462                        &company.currency,
5463                        &period_payments,
5464                    );
5465                    bank_reconciliations.push(recon);
5466                }
5467            }
5468            info!(
5469                "Bank reconciliations generated: {} reconciliations",
5470                bank_reconciliations.len()
5471            );
5472        }
5473
5474        stats.bank_reconciliation_count = bank_reconciliations.len();
5475        self.check_resources_with_log("post-financial-reporting")?;
5476
5477        if !trial_balances.is_empty() {
5478            info!(
5479                "Period-close trial balances captured: {} periods",
5480                trial_balances.len()
5481            );
5482        }
5483
5484        // Notes to financial statements are generated in a separate post-processing step
5485        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5486        // phases have completed, so that deferred tax and provision data can be wired in.
5487        let notes_to_financial_statements = Vec::new();
5488
5489        Ok(FinancialReportingSnapshot {
5490            financial_statements,
5491            standalone_statements,
5492            consolidated_statements,
5493            consolidation_schedules,
5494            bank_reconciliations,
5495            trial_balances,
5496            segment_reports,
5497            segment_reconciliations,
5498            notes_to_financial_statements,
5499        })
5500    }
5501
5502    /// Populate notes to financial statements using fully-resolved snapshots.
5503    ///
5504    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5505    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5506    /// can be wired into the notes context.  The method mutates
5507    /// `financial_reporting.notes_to_financial_statements` in-place.
5508    fn generate_notes_to_financial_statements(
5509        &self,
5510        financial_reporting: &mut FinancialReportingSnapshot,
5511        accounting_standards: &AccountingStandardsSnapshot,
5512        tax: &TaxSnapshot,
5513        hr: &HrSnapshot,
5514        audit: &AuditSnapshot,
5515        treasury: &TreasurySnapshot,
5516    ) {
5517        use datasynth_config::schema::AccountingFrameworkConfig;
5518        use datasynth_core::models::StatementType;
5519        use datasynth_generators::period_close::notes_generator::{
5520            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5521        };
5522
5523        let seed = self.seed;
5524        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5525        {
5526            Ok(d) => d,
5527            Err(_) => return,
5528        };
5529
5530        let mut notes_gen = NotesGenerator::new(seed + 4235);
5531
5532        for company in &self.config.companies {
5533            let last_period_end = start_date
5534                + chrono::Months::new(self.config.global.period_months)
5535                - chrono::Days::new(1);
5536            let fiscal_year = last_period_end.year() as u16;
5537
5538            // Extract relevant amounts from the already-generated financial statements
5539            let entity_is = financial_reporting
5540                .standalone_statements
5541                .get(&company.code)
5542                .and_then(|stmts| {
5543                    stmts.iter().find(|s| {
5544                        s.fiscal_year == fiscal_year
5545                            && s.statement_type == StatementType::IncomeStatement
5546                    })
5547                });
5548            let entity_bs = financial_reporting
5549                .standalone_statements
5550                .get(&company.code)
5551                .and_then(|stmts| {
5552                    stmts.iter().find(|s| {
5553                        s.fiscal_year == fiscal_year
5554                            && s.statement_type == StatementType::BalanceSheet
5555                    })
5556                });
5557
5558            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5559            let revenue_amount = entity_is
5560                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5561                .map(|li| li.amount);
5562            let ppe_gross = entity_bs
5563                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5564                .map(|li| li.amount);
5565
5566            let framework = match self
5567                .config
5568                .accounting_standards
5569                .framework
5570                .unwrap_or_default()
5571            {
5572                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5573                    "IFRS".to_string()
5574                }
5575                _ => "US GAAP".to_string(),
5576            };
5577
5578            // ---- Deferred tax (IAS 12 / ASC 740) ----
5579            // Sum closing DTA and DTL from rollforward entries for this entity.
5580            let (entity_dta, entity_dtl) = {
5581                let mut dta = rust_decimal::Decimal::ZERO;
5582                let mut dtl = rust_decimal::Decimal::ZERO;
5583                for rf in &tax.deferred_tax.rollforwards {
5584                    if rf.entity_code == company.code {
5585                        dta += rf.closing_dta;
5586                        dtl += rf.closing_dtl;
5587                    }
5588                }
5589                (
5590                    if dta > rust_decimal::Decimal::ZERO {
5591                        Some(dta)
5592                    } else {
5593                        None
5594                    },
5595                    if dtl > rust_decimal::Decimal::ZERO {
5596                        Some(dtl)
5597                    } else {
5598                        None
5599                    },
5600                )
5601            };
5602
5603            // ---- Provisions (IAS 37 / ASC 450) ----
5604            // Filter provisions to this entity; sum best_estimate amounts.
5605            let entity_provisions: Vec<_> = accounting_standards
5606                .provisions
5607                .iter()
5608                .filter(|p| p.entity_code == company.code)
5609                .collect();
5610            let provision_count = entity_provisions.len();
5611            let total_provisions = if provision_count > 0 {
5612                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5613            } else {
5614                None
5615            };
5616
5617            // ---- Pension data from HR snapshot ----
5618            let entity_pension_plan_count = hr
5619                .pension_plans
5620                .iter()
5621                .filter(|p| p.entity_code == company.code)
5622                .count();
5623            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5624                let sum: rust_decimal::Decimal = hr
5625                    .pension_disclosures
5626                    .iter()
5627                    .filter(|d| {
5628                        hr.pension_plans
5629                            .iter()
5630                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5631                    })
5632                    .map(|d| d.net_pension_liability)
5633                    .sum();
5634                let plan_assets_sum: rust_decimal::Decimal = hr
5635                    .pension_plan_assets
5636                    .iter()
5637                    .filter(|a| {
5638                        hr.pension_plans
5639                            .iter()
5640                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5641                    })
5642                    .map(|a| a.fair_value_closing)
5643                    .sum();
5644                if entity_pension_plan_count > 0 {
5645                    Some(sum + plan_assets_sum)
5646                } else {
5647                    None
5648                }
5649            };
5650            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5651                let sum: rust_decimal::Decimal = hr
5652                    .pension_plan_assets
5653                    .iter()
5654                    .filter(|a| {
5655                        hr.pension_plans
5656                            .iter()
5657                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5658                    })
5659                    .map(|a| a.fair_value_closing)
5660                    .sum();
5661                if entity_pension_plan_count > 0 {
5662                    Some(sum)
5663                } else {
5664                    None
5665                }
5666            };
5667
5668            // ---- Audit data: related parties + subsequent events ----
5669            // Audit snapshot covers all entities; use total counts (common case = single entity).
5670            let rp_count = audit.related_party_transactions.len();
5671            let se_count = audit.subsequent_events.len();
5672            let adjusting_count = audit
5673                .subsequent_events
5674                .iter()
5675                .filter(|e| {
5676                    matches!(
5677                        e.classification,
5678                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5679                    )
5680                })
5681                .count();
5682
5683            let ctx = NotesGeneratorContext {
5684                entity_code: company.code.clone(),
5685                framework,
5686                period: format!("FY{}", fiscal_year),
5687                period_end: last_period_end,
5688                currency: company.currency.clone(),
5689                revenue_amount,
5690                total_ppe_gross: ppe_gross,
5691                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5692                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5693                deferred_tax_asset: entity_dta,
5694                deferred_tax_liability: entity_dtl,
5695                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5696                provision_count,
5697                total_provisions,
5698                // Pension data from HR snapshot
5699                pension_plan_count: entity_pension_plan_count,
5700                total_dbo: entity_total_dbo,
5701                total_plan_assets: entity_total_plan_assets,
5702                // Audit data
5703                related_party_transaction_count: rp_count,
5704                subsequent_event_count: se_count,
5705                adjusting_event_count: adjusting_count,
5706                ..NotesGeneratorContext::default()
5707            };
5708
5709            let entity_notes = notes_gen.generate(&ctx);
5710            let standard_note_count = entity_notes.len() as u32;
5711            info!(
5712                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5713                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5714            );
5715            financial_reporting
5716                .notes_to_financial_statements
5717                .extend(entity_notes);
5718
5719            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5720            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5721                .debt_instruments
5722                .iter()
5723                .filter(|d| d.entity_id == company.code)
5724                .map(|d| {
5725                    (
5726                        format!("{:?}", d.instrument_type),
5727                        d.principal,
5728                        d.maturity_date.to_string(),
5729                    )
5730                })
5731                .collect();
5732
5733            let hedge_count = treasury.hedge_relationships.len();
5734            let effective_hedges = treasury
5735                .hedge_relationships
5736                .iter()
5737                .filter(|h| h.is_effective)
5738                .count();
5739            let total_notional: rust_decimal::Decimal = treasury
5740                .hedging_instruments
5741                .iter()
5742                .map(|h| h.notional_amount)
5743                .sum();
5744            let total_fair_value: rust_decimal::Decimal = treasury
5745                .hedging_instruments
5746                .iter()
5747                .map(|h| h.fair_value)
5748                .sum();
5749
5750            // Join provision_movements with provisions to get entity/type info
5751            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5752                .provisions
5753                .iter()
5754                .filter(|p| p.entity_code == company.code)
5755                .map(|p| p.id.as_str())
5756                .collect();
5757            let provision_movements: Vec<(
5758                String,
5759                rust_decimal::Decimal,
5760                rust_decimal::Decimal,
5761                rust_decimal::Decimal,
5762            )> = accounting_standards
5763                .provision_movements
5764                .iter()
5765                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5766                .map(|m| {
5767                    let prov_type = accounting_standards
5768                        .provisions
5769                        .iter()
5770                        .find(|p| p.id == m.provision_id)
5771                        .map(|p| format!("{:?}", p.provision_type))
5772                        .unwrap_or_else(|| "Unknown".to_string());
5773                    (prov_type, m.opening, m.additions, m.closing)
5774                })
5775                .collect();
5776
5777            let enhanced_ctx = EnhancedNotesContext {
5778                entity_code: company.code.clone(),
5779                period: format!("FY{}", fiscal_year),
5780                currency: company.currency.clone(),
5781                // Inventory breakdown: best-effort using zero (would need balance tracker)
5782                finished_goods_value: rust_decimal::Decimal::ZERO,
5783                wip_value: rust_decimal::Decimal::ZERO,
5784                raw_materials_value: rust_decimal::Decimal::ZERO,
5785                debt_instruments,
5786                hedge_count,
5787                effective_hedges,
5788                total_notional,
5789                total_fair_value,
5790                provision_movements,
5791            };
5792
5793            let enhanced_notes =
5794                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5795            if !enhanced_notes.is_empty() {
5796                info!(
5797                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5798                    company.code,
5799                    enhanced_notes.len(),
5800                    enhanced_ctx.debt_instruments.len(),
5801                    hedge_count,
5802                    enhanced_ctx.provision_movements.len(),
5803                );
5804                financial_reporting
5805                    .notes_to_financial_statements
5806                    .extend(enhanced_notes);
5807            }
5808        }
5809    }
5810
5811    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5812    ///
5813    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5814    /// posted in the journal entries flows through to the trial balance, using the real
5815    /// GL account numbers from the CoA.
5816    fn build_trial_balance_from_entries(
5817        journal_entries: &[JournalEntry],
5818        coa: &ChartOfAccounts,
5819        company_code: &str,
5820        fiscal_year: u16,
5821        fiscal_period: u8,
5822    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5823        use rust_decimal::Decimal;
5824
5825        // Accumulate total debits and credits per GL account
5826        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5827        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5828
5829        for je in journal_entries {
5830            // Filter to matching company, fiscal year, and period
5831            if je.header.company_code != company_code
5832                || je.header.fiscal_year != fiscal_year
5833                || je.header.fiscal_period != fiscal_period
5834            {
5835                continue;
5836            }
5837
5838            for line in &je.lines {
5839                let acct = &line.gl_account;
5840                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5841                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5842            }
5843        }
5844
5845        // Build a TrialBalanceEntry for each account that had activity
5846        let mut all_accounts: Vec<&String> = account_debits
5847            .keys()
5848            .chain(account_credits.keys())
5849            .collect::<std::collections::HashSet<_>>()
5850            .into_iter()
5851            .collect();
5852        all_accounts.sort();
5853
5854        let mut entries = Vec::new();
5855
5856        for acct_number in all_accounts {
5857            let debit = account_debits
5858                .get(acct_number)
5859                .copied()
5860                .unwrap_or(Decimal::ZERO);
5861            let credit = account_credits
5862                .get(acct_number)
5863                .copied()
5864                .unwrap_or(Decimal::ZERO);
5865
5866            if debit.is_zero() && credit.is_zero() {
5867                continue;
5868            }
5869
5870            // Look up account name from CoA, fall back to "Account {code}"
5871            let account_name = coa
5872                .get_account(acct_number)
5873                .map(|gl| gl.short_description.clone())
5874                .unwrap_or_else(|| format!("Account {acct_number}"));
5875
5876            // Map account code prefix to the category strings expected by
5877            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5878            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5879            // OperatingExpenses).
5880            let category = Self::category_from_account_code(acct_number);
5881
5882            entries.push(datasynth_generators::TrialBalanceEntry {
5883                account_code: acct_number.clone(),
5884                account_name,
5885                category,
5886                debit_balance: debit,
5887                credit_balance: credit,
5888            });
5889        }
5890
5891        entries
5892    }
5893
5894    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5895    /// (and including) the given period end date.
5896    ///
5897    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5898    /// while income statement accounts (revenue, expenses) show only the current period.
5899    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5900    fn build_cumulative_trial_balance(
5901        journal_entries: &[JournalEntry],
5902        coa: &ChartOfAccounts,
5903        company_code: &str,
5904        start_date: NaiveDate,
5905        period_end: NaiveDate,
5906        fiscal_year: u16,
5907        fiscal_period: u8,
5908    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5909        use rust_decimal::Decimal;
5910
5911        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5912        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5913        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5914
5915        // Accumulate debits/credits for income statement accounts (current period only)
5916        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5917        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5918
5919        for je in journal_entries {
5920            if je.header.company_code != company_code {
5921                continue;
5922            }
5923
5924            for line in &je.lines {
5925                let acct = &line.gl_account;
5926                let category = Self::category_from_account_code(acct);
5927                let is_bs_account = matches!(
5928                    category.as_str(),
5929                    "Cash"
5930                        | "Receivables"
5931                        | "Inventory"
5932                        | "FixedAssets"
5933                        | "Payables"
5934                        | "AccruedLiabilities"
5935                        | "LongTermDebt"
5936                        | "Equity"
5937                );
5938
5939                if is_bs_account {
5940                    // Balance sheet: accumulate from start through period_end
5941                    if je.header.document_date <= period_end
5942                        && je.header.document_date >= start_date
5943                    {
5944                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5945                            line.debit_amount;
5946                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5947                            line.credit_amount;
5948                    }
5949                } else {
5950                    // Income statement: current period only
5951                    if je.header.fiscal_year == fiscal_year
5952                        && je.header.fiscal_period == fiscal_period
5953                    {
5954                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5955                            line.debit_amount;
5956                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5957                            line.credit_amount;
5958                    }
5959                }
5960            }
5961        }
5962
5963        // Merge all accounts
5964        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5965        all_accounts.extend(bs_debits.keys().cloned());
5966        all_accounts.extend(bs_credits.keys().cloned());
5967        all_accounts.extend(is_debits.keys().cloned());
5968        all_accounts.extend(is_credits.keys().cloned());
5969
5970        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5971        sorted_accounts.sort();
5972
5973        let mut entries = Vec::new();
5974
5975        for acct_number in &sorted_accounts {
5976            let category = Self::category_from_account_code(acct_number);
5977            let is_bs_account = matches!(
5978                category.as_str(),
5979                "Cash"
5980                    | "Receivables"
5981                    | "Inventory"
5982                    | "FixedAssets"
5983                    | "Payables"
5984                    | "AccruedLiabilities"
5985                    | "LongTermDebt"
5986                    | "Equity"
5987            );
5988
5989            let (debit, credit) = if is_bs_account {
5990                (
5991                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5992                    bs_credits
5993                        .get(acct_number)
5994                        .copied()
5995                        .unwrap_or(Decimal::ZERO),
5996                )
5997            } else {
5998                (
5999                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6000                    is_credits
6001                        .get(acct_number)
6002                        .copied()
6003                        .unwrap_or(Decimal::ZERO),
6004                )
6005            };
6006
6007            if debit.is_zero() && credit.is_zero() {
6008                continue;
6009            }
6010
6011            let account_name = coa
6012                .get_account(acct_number)
6013                .map(|gl| gl.short_description.clone())
6014                .unwrap_or_else(|| format!("Account {acct_number}"));
6015
6016            entries.push(datasynth_generators::TrialBalanceEntry {
6017                account_code: acct_number.clone(),
6018                account_name,
6019                category,
6020                debit_balance: debit,
6021                credit_balance: credit,
6022            });
6023        }
6024
6025        entries
6026    }
6027
6028    /// Build a JE-derived cash flow statement using the indirect method.
6029    ///
6030    /// Compares current and prior cumulative trial balances to derive working capital
6031    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6032    fn build_cash_flow_from_trial_balances(
6033        current_tb: &[datasynth_generators::TrialBalanceEntry],
6034        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6035        net_income: rust_decimal::Decimal,
6036    ) -> Vec<CashFlowItem> {
6037        use rust_decimal::Decimal;
6038
6039        // Helper: aggregate a TB by category and return net (debit - credit)
6040        let aggregate =
6041            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6042                let mut map: HashMap<String, Decimal> = HashMap::new();
6043                for entry in tb {
6044                    let net = entry.debit_balance - entry.credit_balance;
6045                    *map.entry(entry.category.clone()).or_default() += net;
6046                }
6047                map
6048            };
6049
6050        let current = aggregate(current_tb);
6051        let prior = prior_tb.map(aggregate);
6052
6053        // Get balance for a category, defaulting to zero
6054        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6055            *map.get(key).unwrap_or(&Decimal::ZERO)
6056        };
6057
6058        // Compute change: current - prior (or current if no prior)
6059        let change = |key: &str| -> Decimal {
6060            let curr = get(&current, key);
6061            match &prior {
6062                Some(p) => curr - get(p, key),
6063                None => curr,
6064            }
6065        };
6066
6067        // Operating activities (indirect method)
6068        // Depreciation add-back: approximate from FixedAssets decrease
6069        let fixed_asset_change = change("FixedAssets");
6070        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6071            -fixed_asset_change
6072        } else {
6073            Decimal::ZERO
6074        };
6075
6076        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6077        let ar_change = change("Receivables");
6078        let inventory_change = change("Inventory");
6079        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6080        let ap_change = change("Payables");
6081        let accrued_change = change("AccruedLiabilities");
6082
6083        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6084            + (-ap_change)
6085            + (-accrued_change);
6086
6087        // Investing activities
6088        let capex = if fixed_asset_change > Decimal::ZERO {
6089            -fixed_asset_change
6090        } else {
6091            Decimal::ZERO
6092        };
6093        let investing_cf = capex;
6094
6095        // Financing activities
6096        let debt_change = -change("LongTermDebt");
6097        let equity_change = -change("Equity");
6098        let financing_cf = debt_change + equity_change;
6099
6100        let net_change = operating_cf + investing_cf + financing_cf;
6101
6102        vec![
6103            CashFlowItem {
6104                item_code: "CF-NI".to_string(),
6105                label: "Net Income".to_string(),
6106                category: CashFlowCategory::Operating,
6107                amount: net_income,
6108                amount_prior: None,
6109                sort_order: 1,
6110                is_total: false,
6111            },
6112            CashFlowItem {
6113                item_code: "CF-DEP".to_string(),
6114                label: "Depreciation & Amortization".to_string(),
6115                category: CashFlowCategory::Operating,
6116                amount: depreciation_addback,
6117                amount_prior: None,
6118                sort_order: 2,
6119                is_total: false,
6120            },
6121            CashFlowItem {
6122                item_code: "CF-AR".to_string(),
6123                label: "Change in Accounts Receivable".to_string(),
6124                category: CashFlowCategory::Operating,
6125                amount: -ar_change,
6126                amount_prior: None,
6127                sort_order: 3,
6128                is_total: false,
6129            },
6130            CashFlowItem {
6131                item_code: "CF-AP".to_string(),
6132                label: "Change in Accounts Payable".to_string(),
6133                category: CashFlowCategory::Operating,
6134                amount: -ap_change,
6135                amount_prior: None,
6136                sort_order: 4,
6137                is_total: false,
6138            },
6139            CashFlowItem {
6140                item_code: "CF-INV".to_string(),
6141                label: "Change in Inventory".to_string(),
6142                category: CashFlowCategory::Operating,
6143                amount: -inventory_change,
6144                amount_prior: None,
6145                sort_order: 5,
6146                is_total: false,
6147            },
6148            CashFlowItem {
6149                item_code: "CF-OP".to_string(),
6150                label: "Net Cash from Operating Activities".to_string(),
6151                category: CashFlowCategory::Operating,
6152                amount: operating_cf,
6153                amount_prior: None,
6154                sort_order: 6,
6155                is_total: true,
6156            },
6157            CashFlowItem {
6158                item_code: "CF-CAPEX".to_string(),
6159                label: "Capital Expenditures".to_string(),
6160                category: CashFlowCategory::Investing,
6161                amount: capex,
6162                amount_prior: None,
6163                sort_order: 7,
6164                is_total: false,
6165            },
6166            CashFlowItem {
6167                item_code: "CF-INV-T".to_string(),
6168                label: "Net Cash from Investing Activities".to_string(),
6169                category: CashFlowCategory::Investing,
6170                amount: investing_cf,
6171                amount_prior: None,
6172                sort_order: 8,
6173                is_total: true,
6174            },
6175            CashFlowItem {
6176                item_code: "CF-DEBT".to_string(),
6177                label: "Net Borrowings / (Repayments)".to_string(),
6178                category: CashFlowCategory::Financing,
6179                amount: debt_change,
6180                amount_prior: None,
6181                sort_order: 9,
6182                is_total: false,
6183            },
6184            CashFlowItem {
6185                item_code: "CF-EQ".to_string(),
6186                label: "Equity Changes".to_string(),
6187                category: CashFlowCategory::Financing,
6188                amount: equity_change,
6189                amount_prior: None,
6190                sort_order: 10,
6191                is_total: false,
6192            },
6193            CashFlowItem {
6194                item_code: "CF-FIN-T".to_string(),
6195                label: "Net Cash from Financing Activities".to_string(),
6196                category: CashFlowCategory::Financing,
6197                amount: financing_cf,
6198                amount_prior: None,
6199                sort_order: 11,
6200                is_total: true,
6201            },
6202            CashFlowItem {
6203                item_code: "CF-NET".to_string(),
6204                label: "Net Change in Cash".to_string(),
6205                category: CashFlowCategory::Operating,
6206                amount: net_change,
6207                amount_prior: None,
6208                sort_order: 12,
6209                is_total: true,
6210            },
6211        ]
6212    }
6213
6214    /// Calculate net income from a set of trial balance entries.
6215    ///
6216    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6217    fn calculate_net_income_from_tb(
6218        tb: &[datasynth_generators::TrialBalanceEntry],
6219    ) -> rust_decimal::Decimal {
6220        use rust_decimal::Decimal;
6221
6222        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6223        for entry in tb {
6224            let net = entry.debit_balance - entry.credit_balance;
6225            *aggregated.entry(entry.category.clone()).or_default() += net;
6226        }
6227
6228        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6229        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6230        let opex = *aggregated
6231            .get("OperatingExpenses")
6232            .unwrap_or(&Decimal::ZERO);
6233        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6234        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6235
6236        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6237        // other_income is typically negative (credit), other_expenses is typically positive
6238        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6239        let tax_rate = Decimal::new(25, 2); // 0.25
6240        let tax = operating_income * tax_rate;
6241        operating_income - tax
6242    }
6243
6244    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6245    ///
6246    /// Uses the first two digits of the account code to classify into the categories
6247    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6248    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6249    /// OperatingExpenses, OtherIncome, OtherExpenses.
6250    fn category_from_account_code(code: &str) -> String {
6251        let prefix: String = code.chars().take(2).collect();
6252        match prefix.as_str() {
6253            "10" => "Cash",
6254            "11" => "Receivables",
6255            "12" | "13" | "14" => "Inventory",
6256            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6257            "20" => "Payables",
6258            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6259            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6260            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6261            "40" | "41" | "42" | "43" | "44" => "Revenue",
6262            "50" | "51" | "52" => "CostOfSales",
6263            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6264                "OperatingExpenses"
6265            }
6266            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6267            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6268            _ => "OperatingExpenses",
6269        }
6270        .to_string()
6271    }
6272
6273    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6274    fn phase_hr_data(
6275        &mut self,
6276        stats: &mut EnhancedGenerationStatistics,
6277    ) -> SynthResult<HrSnapshot> {
6278        if !self.phase_config.generate_hr {
6279            debug!("Phase 16: Skipped (HR generation disabled)");
6280            return Ok(HrSnapshot::default());
6281        }
6282
6283        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6284
6285        let seed = self.seed;
6286        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6289        let company_code = self
6290            .config
6291            .companies
6292            .first()
6293            .map(|c| c.code.as_str())
6294            .unwrap_or("1000");
6295        let currency = self
6296            .config
6297            .companies
6298            .first()
6299            .map(|c| c.currency.as_str())
6300            .unwrap_or("USD");
6301
6302        let employee_ids: Vec<String> = self
6303            .master_data
6304            .employees
6305            .iter()
6306            .map(|e| e.employee_id.clone())
6307            .collect();
6308
6309        if employee_ids.is_empty() {
6310            debug!("Phase 16: Skipped (no employees available)");
6311            return Ok(HrSnapshot::default());
6312        }
6313
6314        // Extract cost-center pool from master data employees for cross-reference
6315        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6316        let cost_center_ids: Vec<String> = self
6317            .master_data
6318            .employees
6319            .iter()
6320            .filter_map(|e| e.cost_center.clone())
6321            .collect::<std::collections::HashSet<_>>()
6322            .into_iter()
6323            .collect();
6324
6325        let mut snapshot = HrSnapshot::default();
6326
6327        // Generate payroll runs (one per month)
6328        if self.config.hr.payroll.enabled {
6329            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6330                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6331
6332            // Look up country pack for payroll deductions and labels
6333            let payroll_pack = self.primary_pack();
6334
6335            // Store the pack on the generator so generate() resolves
6336            // localized deduction rates and labels from it.
6337            payroll_gen.set_country_pack(payroll_pack.clone());
6338
6339            let employees_with_salary: Vec<(
6340                String,
6341                rust_decimal::Decimal,
6342                Option<String>,
6343                Option<String>,
6344            )> = self
6345                .master_data
6346                .employees
6347                .iter()
6348                .map(|e| {
6349                    // Use the employee's actual annual base salary.
6350                    // Fall back to $60,000 / yr if somehow zero.
6351                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6352                        e.base_salary
6353                    } else {
6354                        rust_decimal::Decimal::from(60_000)
6355                    };
6356                    (
6357                        e.employee_id.clone(),
6358                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6359                        e.cost_center.clone(),
6360                        e.department_id.clone(),
6361                    )
6362                })
6363                .collect();
6364
6365            // Use generate_with_changes when employee change history is available
6366            // so that salary adjustments, transfers, etc. are reflected in payroll.
6367            let change_history = &self.master_data.employee_change_history;
6368            let has_changes = !change_history.is_empty();
6369            if has_changes {
6370                debug!(
6371                    "Payroll will incorporate {} employee change events",
6372                    change_history.len()
6373                );
6374            }
6375
6376            for month in 0..self.config.global.period_months {
6377                let period_start = start_date + chrono::Months::new(month);
6378                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6379                let (run, items) = if has_changes {
6380                    payroll_gen.generate_with_changes(
6381                        company_code,
6382                        &employees_with_salary,
6383                        period_start,
6384                        period_end,
6385                        currency,
6386                        change_history,
6387                    )
6388                } else {
6389                    payroll_gen.generate(
6390                        company_code,
6391                        &employees_with_salary,
6392                        period_start,
6393                        period_end,
6394                        currency,
6395                    )
6396                };
6397                snapshot.payroll_runs.push(run);
6398                snapshot.payroll_run_count += 1;
6399                snapshot.payroll_line_item_count += items.len();
6400                snapshot.payroll_line_items.extend(items);
6401            }
6402        }
6403
6404        // Generate time entries
6405        if self.config.hr.time_attendance.enabled {
6406            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6407                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6408            let entries = time_gen.generate(
6409                &employee_ids,
6410                start_date,
6411                end_date,
6412                &self.config.hr.time_attendance,
6413            );
6414            snapshot.time_entry_count = entries.len();
6415            snapshot.time_entries = entries;
6416        }
6417
6418        // Generate expense reports
6419        if self.config.hr.expenses.enabled {
6420            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6421                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6422            expense_gen.set_country_pack(self.primary_pack().clone());
6423            let company_currency = self
6424                .config
6425                .companies
6426                .first()
6427                .map(|c| c.currency.as_str())
6428                .unwrap_or("USD");
6429            let reports = expense_gen.generate_with_currency(
6430                &employee_ids,
6431                start_date,
6432                end_date,
6433                &self.config.hr.expenses,
6434                company_currency,
6435            );
6436            snapshot.expense_report_count = reports.len();
6437            snapshot.expense_reports = reports;
6438        }
6439
6440        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6441        if self.config.hr.payroll.enabled {
6442            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6443            let employee_pairs: Vec<(String, String)> = self
6444                .master_data
6445                .employees
6446                .iter()
6447                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6448                .collect();
6449            let enrollments =
6450                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6451            snapshot.benefit_enrollment_count = enrollments.len();
6452            snapshot.benefit_enrollments = enrollments;
6453        }
6454
6455        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6456        if self.phase_config.generate_hr {
6457            let entity_name = self
6458                .config
6459                .companies
6460                .first()
6461                .map(|c| c.name.as_str())
6462                .unwrap_or("Entity");
6463            let period_months = self.config.global.period_months;
6464            let period_label = {
6465                let y = start_date.year();
6466                let m = start_date.month();
6467                if period_months >= 12 {
6468                    format!("FY{y}")
6469                } else {
6470                    format!("{y}-{m:02}")
6471                }
6472            };
6473            let reporting_date =
6474                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6475
6476            // Compute average annual salary from actual payroll data when available.
6477            // PayrollRun.total_gross covers all employees for one pay period; we sum
6478            // across all runs and divide by employee_count to get per-employee total,
6479            // then annualise for sub-annual periods.
6480            let avg_salary: Option<rust_decimal::Decimal> = {
6481                let employee_count = employee_ids.len();
6482                if self.config.hr.payroll.enabled
6483                    && employee_count > 0
6484                    && !snapshot.payroll_runs.is_empty()
6485                {
6486                    // Sum total gross pay across all payroll runs for this company
6487                    let total_gross: rust_decimal::Decimal = snapshot
6488                        .payroll_runs
6489                        .iter()
6490                        .filter(|r| r.company_code == company_code)
6491                        .map(|r| r.total_gross)
6492                        .sum();
6493                    if total_gross > rust_decimal::Decimal::ZERO {
6494                        // Annualise: total_gross covers `period_months` months of pay
6495                        let annual_total = if period_months > 0 && period_months < 12 {
6496                            total_gross * rust_decimal::Decimal::from(12u32)
6497                                / rust_decimal::Decimal::from(period_months)
6498                        } else {
6499                            total_gross
6500                        };
6501                        Some(
6502                            (annual_total / rust_decimal::Decimal::from(employee_count))
6503                                .round_dp(2),
6504                        )
6505                    } else {
6506                        None
6507                    }
6508                } else {
6509                    None
6510                }
6511            };
6512
6513            let mut pension_gen =
6514                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6515            let pension_snap = pension_gen.generate(
6516                company_code,
6517                entity_name,
6518                &period_label,
6519                reporting_date,
6520                employee_ids.len(),
6521                currency,
6522                avg_salary,
6523                period_months,
6524            );
6525            snapshot.pension_plan_count = pension_snap.plans.len();
6526            snapshot.pension_plans = pension_snap.plans;
6527            snapshot.pension_obligations = pension_snap.obligations;
6528            snapshot.pension_plan_assets = pension_snap.plan_assets;
6529            snapshot.pension_disclosures = pension_snap.disclosures;
6530            // Pension JEs are returned here so they can be added to entries
6531            // in the caller (stored temporarily on snapshot for transfer).
6532            // We embed them in the hr snapshot for simplicity; the orchestrator
6533            // will extract and extend `entries`.
6534            snapshot.pension_journal_entries = pension_snap.journal_entries;
6535        }
6536
6537        // Generate stock-based compensation (ASC 718 / IFRS 2)
6538        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6539            let period_months = self.config.global.period_months;
6540            let period_label = {
6541                let y = start_date.year();
6542                let m = start_date.month();
6543                if period_months >= 12 {
6544                    format!("FY{y}")
6545                } else {
6546                    format!("{y}-{m:02}")
6547                }
6548            };
6549            let reporting_date =
6550                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6551
6552            let mut stock_comp_gen =
6553                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6554            let stock_snap = stock_comp_gen.generate(
6555                company_code,
6556                &employee_ids,
6557                start_date,
6558                &period_label,
6559                reporting_date,
6560                currency,
6561            );
6562            snapshot.stock_grant_count = stock_snap.grants.len();
6563            snapshot.stock_grants = stock_snap.grants;
6564            snapshot.stock_comp_expenses = stock_snap.expenses;
6565            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6566        }
6567
6568        stats.payroll_run_count = snapshot.payroll_run_count;
6569        stats.time_entry_count = snapshot.time_entry_count;
6570        stats.expense_report_count = snapshot.expense_report_count;
6571        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6572        stats.pension_plan_count = snapshot.pension_plan_count;
6573        stats.stock_grant_count = snapshot.stock_grant_count;
6574
6575        info!(
6576            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6577            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6578            snapshot.time_entry_count, snapshot.expense_report_count,
6579            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6580            snapshot.stock_grant_count
6581        );
6582        self.check_resources_with_log("post-hr")?;
6583
6584        Ok(snapshot)
6585    }
6586
6587    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6588    fn phase_accounting_standards(
6589        &mut self,
6590        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6591        journal_entries: &[JournalEntry],
6592        stats: &mut EnhancedGenerationStatistics,
6593    ) -> SynthResult<AccountingStandardsSnapshot> {
6594        if !self.phase_config.generate_accounting_standards {
6595            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6596            return Ok(AccountingStandardsSnapshot::default());
6597        }
6598        info!("Phase 17: Generating Accounting Standards Data");
6599
6600        let seed = self.seed;
6601        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6602            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6603        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6604        let company_code = self
6605            .config
6606            .companies
6607            .first()
6608            .map(|c| c.code.as_str())
6609            .unwrap_or("1000");
6610        let currency = self
6611            .config
6612            .companies
6613            .first()
6614            .map(|c| c.currency.as_str())
6615            .unwrap_or("USD");
6616
6617        // Convert config framework to standards framework.
6618        // If the user explicitly set a framework in the YAML config, use that.
6619        // Otherwise, fall back to the country pack's accounting.framework field,
6620        // and if that is also absent or unrecognised, default to US GAAP.
6621        let framework = match self.config.accounting_standards.framework {
6622            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6623                datasynth_standards::framework::AccountingFramework::UsGaap
6624            }
6625            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6626                datasynth_standards::framework::AccountingFramework::Ifrs
6627            }
6628            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6629                datasynth_standards::framework::AccountingFramework::DualReporting
6630            }
6631            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6632                datasynth_standards::framework::AccountingFramework::FrenchGaap
6633            }
6634            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6635                datasynth_standards::framework::AccountingFramework::GermanGaap
6636            }
6637            None => {
6638                // Derive framework from the primary company's country pack
6639                let pack = self.primary_pack();
6640                let pack_fw = pack.accounting.framework.as_str();
6641                match pack_fw {
6642                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6643                    "dual_reporting" => {
6644                        datasynth_standards::framework::AccountingFramework::DualReporting
6645                    }
6646                    "french_gaap" => {
6647                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6648                    }
6649                    "german_gaap" | "hgb" => {
6650                        datasynth_standards::framework::AccountingFramework::GermanGaap
6651                    }
6652                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6653                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6654                }
6655            }
6656        };
6657
6658        let mut snapshot = AccountingStandardsSnapshot::default();
6659
6660        // Revenue recognition
6661        if self.config.accounting_standards.revenue_recognition.enabled {
6662            let customer_ids: Vec<String> = self
6663                .master_data
6664                .customers
6665                .iter()
6666                .map(|c| c.customer_id.clone())
6667                .collect();
6668
6669            if !customer_ids.is_empty() {
6670                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6671                let contracts = rev_gen.generate(
6672                    company_code,
6673                    &customer_ids,
6674                    start_date,
6675                    end_date,
6676                    currency,
6677                    &self.config.accounting_standards.revenue_recognition,
6678                    framework,
6679                );
6680                snapshot.revenue_contract_count = contracts.len();
6681                snapshot.contracts = contracts;
6682            }
6683        }
6684
6685        // Impairment testing
6686        if self.config.accounting_standards.impairment.enabled {
6687            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6688                .master_data
6689                .assets
6690                .iter()
6691                .map(|a| {
6692                    (
6693                        a.asset_id.clone(),
6694                        a.description.clone(),
6695                        a.acquisition_cost,
6696                    )
6697                })
6698                .collect();
6699
6700            if !asset_data.is_empty() {
6701                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6702                let tests = imp_gen.generate(
6703                    company_code,
6704                    &asset_data,
6705                    end_date,
6706                    &self.config.accounting_standards.impairment,
6707                    framework,
6708                );
6709                snapshot.impairment_test_count = tests.len();
6710                snapshot.impairment_tests = tests;
6711            }
6712        }
6713
6714        // Business combinations (IFRS 3 / ASC 805)
6715        if self
6716            .config
6717            .accounting_standards
6718            .business_combinations
6719            .enabled
6720        {
6721            let bc_config = &self.config.accounting_standards.business_combinations;
6722            let framework_str = match framework {
6723                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6724                _ => "US_GAAP",
6725            };
6726            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6727            let bc_snap = bc_gen.generate(
6728                company_code,
6729                currency,
6730                start_date,
6731                end_date,
6732                bc_config.acquisition_count,
6733                framework_str,
6734            );
6735            snapshot.business_combination_count = bc_snap.combinations.len();
6736            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6737            snapshot.business_combinations = bc_snap.combinations;
6738        }
6739
6740        // Expected Credit Loss (IFRS 9 / ASC 326)
6741        if self
6742            .config
6743            .accounting_standards
6744            .expected_credit_loss
6745            .enabled
6746        {
6747            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6748            let framework_str = match framework {
6749                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6750                _ => "ASC_326",
6751            };
6752
6753            // Use AR aging data from the subledger snapshot if available;
6754            // otherwise generate synthetic bucket exposures.
6755            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6756
6757            let mut ecl_gen = EclGenerator::new(seed + 43);
6758
6759            // Collect combined bucket totals across all company AR aging reports.
6760            let bucket_exposures: Vec<(
6761                datasynth_core::models::subledger::ar::AgingBucket,
6762                rust_decimal::Decimal,
6763            )> = if ar_aging_reports.is_empty() {
6764                // No AR aging data — synthesise plausible bucket exposures.
6765                use datasynth_core::models::subledger::ar::AgingBucket;
6766                vec![
6767                    (
6768                        AgingBucket::Current,
6769                        rust_decimal::Decimal::from(500_000_u32),
6770                    ),
6771                    (
6772                        AgingBucket::Days1To30,
6773                        rust_decimal::Decimal::from(120_000_u32),
6774                    ),
6775                    (
6776                        AgingBucket::Days31To60,
6777                        rust_decimal::Decimal::from(45_000_u32),
6778                    ),
6779                    (
6780                        AgingBucket::Days61To90,
6781                        rust_decimal::Decimal::from(15_000_u32),
6782                    ),
6783                    (
6784                        AgingBucket::Over90Days,
6785                        rust_decimal::Decimal::from(8_000_u32),
6786                    ),
6787                ]
6788            } else {
6789                use datasynth_core::models::subledger::ar::AgingBucket;
6790                // Sum bucket totals from all reports.
6791                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6792                    std::collections::HashMap::new();
6793                for report in ar_aging_reports {
6794                    for (bucket, amount) in &report.bucket_totals {
6795                        *totals.entry(*bucket).or_default() += amount;
6796                    }
6797                }
6798                AgingBucket::all()
6799                    .into_iter()
6800                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6801                    .collect()
6802            };
6803
6804            let ecl_snap = ecl_gen.generate(
6805                company_code,
6806                end_date,
6807                &bucket_exposures,
6808                ecl_config,
6809                &period_label,
6810                framework_str,
6811            );
6812
6813            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6814            snapshot.ecl_models = ecl_snap.ecl_models;
6815            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6816            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6817        }
6818
6819        // Provisions and contingencies (IAS 37 / ASC 450)
6820        {
6821            let framework_str = match framework {
6822                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6823                _ => "US_GAAP",
6824            };
6825
6826            // Compute actual revenue from the journal entries generated so far.
6827            // The `journal_entries` slice passed to this phase contains all GL entries
6828            // up to and including Period Close. Fall back to a minimum of 100_000 to
6829            // avoid degenerate zero-based provision amounts on first-period datasets.
6830            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6831                .max(rust_decimal::Decimal::from(100_000_u32));
6832
6833            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6834
6835            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6836            let prov_snap = prov_gen.generate(
6837                company_code,
6838                currency,
6839                revenue_proxy,
6840                end_date,
6841                &period_label,
6842                framework_str,
6843                None, // prior_opening: no carry-forward data in single-period runs
6844            );
6845
6846            snapshot.provision_count = prov_snap.provisions.len();
6847            snapshot.provisions = prov_snap.provisions;
6848            snapshot.provision_movements = prov_snap.movements;
6849            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6850            snapshot.provision_journal_entries = prov_snap.journal_entries;
6851        }
6852
6853        // IAS 21 Functional Currency Translation
6854        // For each company whose functional currency differs from the presentation
6855        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6856        {
6857            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6858
6859            let presentation_currency = self
6860                .config
6861                .global
6862                .presentation_currency
6863                .clone()
6864                .unwrap_or_else(|| self.config.global.group_currency.clone());
6865
6866            // Build a minimal rate table populated with approximate rates from
6867            // the FX model base rates (USD-based) so we can do the translation.
6868            let mut rate_table = FxRateTable::new(&presentation_currency);
6869
6870            // Populate with base rates against USD; if presentation_currency is
6871            // not USD we do a best-effort two-step conversion using the table's
6872            // triangulation support.
6873            let base_rates = base_rates_usd();
6874            for (ccy, rate) in &base_rates {
6875                rate_table.add_rate(FxRate::new(
6876                    ccy,
6877                    "USD",
6878                    RateType::Closing,
6879                    end_date,
6880                    *rate,
6881                    "SYNTHETIC",
6882                ));
6883                // Average rate = 98% of closing (approximation).
6884                // 0.98 = 98/100 = Decimal::new(98, 2)
6885                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6886                rate_table.add_rate(FxRate::new(
6887                    ccy,
6888                    "USD",
6889                    RateType::Average,
6890                    end_date,
6891                    avg,
6892                    "SYNTHETIC",
6893                ));
6894            }
6895
6896            let mut translation_results = Vec::new();
6897            for company in &self.config.companies {
6898                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6899                // to ensure the translation produces non-trivial CTA amounts.
6900                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6901                    .max(rust_decimal::Decimal::from(100_000_u32));
6902
6903                let func_ccy = company
6904                    .functional_currency
6905                    .clone()
6906                    .unwrap_or_else(|| company.currency.clone());
6907
6908                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6909                    &company.code,
6910                    &func_ccy,
6911                    &presentation_currency,
6912                    &ias21_period_label,
6913                    end_date,
6914                    company_revenue,
6915                    &rate_table,
6916                );
6917                translation_results.push(result);
6918            }
6919
6920            snapshot.currency_translation_count = translation_results.len();
6921            snapshot.currency_translation_results = translation_results;
6922        }
6923
6924        stats.revenue_contract_count = snapshot.revenue_contract_count;
6925        stats.impairment_test_count = snapshot.impairment_test_count;
6926        stats.business_combination_count = snapshot.business_combination_count;
6927        stats.ecl_model_count = snapshot.ecl_model_count;
6928        stats.provision_count = snapshot.provision_count;
6929
6930        info!(
6931            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6932            snapshot.revenue_contract_count,
6933            snapshot.impairment_test_count,
6934            snapshot.business_combination_count,
6935            snapshot.ecl_model_count,
6936            snapshot.provision_count,
6937            snapshot.currency_translation_count
6938        );
6939        self.check_resources_with_log("post-accounting-standards")?;
6940
6941        Ok(snapshot)
6942    }
6943
6944    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6945    fn phase_manufacturing(
6946        &mut self,
6947        stats: &mut EnhancedGenerationStatistics,
6948    ) -> SynthResult<ManufacturingSnapshot> {
6949        if !self.phase_config.generate_manufacturing {
6950            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6951            return Ok(ManufacturingSnapshot::default());
6952        }
6953        info!("Phase 18: Generating Manufacturing Data");
6954
6955        let seed = self.seed;
6956        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6957            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6958        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6959        let company_code = self
6960            .config
6961            .companies
6962            .first()
6963            .map(|c| c.code.as_str())
6964            .unwrap_or("1000");
6965
6966        let material_data: Vec<(String, String)> = self
6967            .master_data
6968            .materials
6969            .iter()
6970            .map(|m| (m.material_id.clone(), m.description.clone()))
6971            .collect();
6972
6973        if material_data.is_empty() {
6974            debug!("Phase 18: Skipped (no materials available)");
6975            return Ok(ManufacturingSnapshot::default());
6976        }
6977
6978        let mut snapshot = ManufacturingSnapshot::default();
6979
6980        // Generate production orders
6981        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6982        let production_orders = prod_gen.generate(
6983            company_code,
6984            &material_data,
6985            start_date,
6986            end_date,
6987            &self.config.manufacturing.production_orders,
6988            &self.config.manufacturing.costing,
6989            &self.config.manufacturing.routing,
6990        );
6991        snapshot.production_order_count = production_orders.len();
6992
6993        // Generate quality inspections from production orders
6994        let inspection_data: Vec<(String, String, String)> = production_orders
6995            .iter()
6996            .map(|po| {
6997                (
6998                    po.order_id.clone(),
6999                    po.material_id.clone(),
7000                    po.material_description.clone(),
7001                )
7002            })
7003            .collect();
7004
7005        snapshot.production_orders = production_orders;
7006
7007        if !inspection_data.is_empty() {
7008            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7009            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7010            snapshot.quality_inspection_count = inspections.len();
7011            snapshot.quality_inspections = inspections;
7012        }
7013
7014        // Generate cycle counts (one per month)
7015        let storage_locations: Vec<(String, String)> = material_data
7016            .iter()
7017            .enumerate()
7018            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7019            .collect();
7020
7021        let employee_ids: Vec<String> = self
7022            .master_data
7023            .employees
7024            .iter()
7025            .map(|e| e.employee_id.clone())
7026            .collect();
7027        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7028            .with_employee_pool(employee_ids);
7029        let mut cycle_count_total = 0usize;
7030        for month in 0..self.config.global.period_months {
7031            let count_date = start_date + chrono::Months::new(month);
7032            let items_per_count = storage_locations.len().clamp(10, 50);
7033            let cc = cc_gen.generate(
7034                company_code,
7035                &storage_locations,
7036                count_date,
7037                items_per_count,
7038            );
7039            snapshot.cycle_counts.push(cc);
7040            cycle_count_total += 1;
7041        }
7042        snapshot.cycle_count_count = cycle_count_total;
7043
7044        // Generate BOM components
7045        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7046        let bom_components = bom_gen.generate(company_code, &material_data);
7047        snapshot.bom_component_count = bom_components.len();
7048        snapshot.bom_components = bom_components;
7049
7050        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7051        let currency = self
7052            .config
7053            .companies
7054            .first()
7055            .map(|c| c.currency.as_str())
7056            .unwrap_or("USD");
7057        let production_order_ids: Vec<String> = snapshot
7058            .production_orders
7059            .iter()
7060            .map(|po| po.order_id.clone())
7061            .collect();
7062        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7063        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7064            company_code,
7065            &material_data,
7066            start_date,
7067            end_date,
7068            2,
7069            currency,
7070            &production_order_ids,
7071        );
7072        snapshot.inventory_movement_count = inventory_movements.len();
7073        snapshot.inventory_movements = inventory_movements;
7074
7075        stats.production_order_count = snapshot.production_order_count;
7076        stats.quality_inspection_count = snapshot.quality_inspection_count;
7077        stats.cycle_count_count = snapshot.cycle_count_count;
7078        stats.bom_component_count = snapshot.bom_component_count;
7079        stats.inventory_movement_count = snapshot.inventory_movement_count;
7080
7081        info!(
7082            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7083            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7084            snapshot.bom_component_count, snapshot.inventory_movement_count
7085        );
7086        self.check_resources_with_log("post-manufacturing")?;
7087
7088        Ok(snapshot)
7089    }
7090
7091    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7092    fn phase_sales_kpi_budgets(
7093        &mut self,
7094        coa: &Arc<ChartOfAccounts>,
7095        financial_reporting: &FinancialReportingSnapshot,
7096        stats: &mut EnhancedGenerationStatistics,
7097    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7098        if !self.phase_config.generate_sales_kpi_budgets {
7099            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7100            return Ok(SalesKpiBudgetsSnapshot::default());
7101        }
7102        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7103
7104        let seed = self.seed;
7105        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7106            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7107        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7108        let company_code = self
7109            .config
7110            .companies
7111            .first()
7112            .map(|c| c.code.as_str())
7113            .unwrap_or("1000");
7114
7115        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7116
7117        // Sales Quotes
7118        if self.config.sales_quotes.enabled {
7119            let customer_data: Vec<(String, String)> = self
7120                .master_data
7121                .customers
7122                .iter()
7123                .map(|c| (c.customer_id.clone(), c.name.clone()))
7124                .collect();
7125            let material_data: Vec<(String, String)> = self
7126                .master_data
7127                .materials
7128                .iter()
7129                .map(|m| (m.material_id.clone(), m.description.clone()))
7130                .collect();
7131
7132            if !customer_data.is_empty() && !material_data.is_empty() {
7133                let employee_ids: Vec<String> = self
7134                    .master_data
7135                    .employees
7136                    .iter()
7137                    .map(|e| e.employee_id.clone())
7138                    .collect();
7139                let customer_ids: Vec<String> = self
7140                    .master_data
7141                    .customers
7142                    .iter()
7143                    .map(|c| c.customer_id.clone())
7144                    .collect();
7145                let company_currency = self
7146                    .config
7147                    .companies
7148                    .first()
7149                    .map(|c| c.currency.as_str())
7150                    .unwrap_or("USD");
7151
7152                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7153                    .with_pools(employee_ids, customer_ids);
7154                let quotes = quote_gen.generate_with_currency(
7155                    company_code,
7156                    &customer_data,
7157                    &material_data,
7158                    start_date,
7159                    end_date,
7160                    &self.config.sales_quotes,
7161                    company_currency,
7162                );
7163                snapshot.sales_quote_count = quotes.len();
7164                snapshot.sales_quotes = quotes;
7165            }
7166        }
7167
7168        // Management KPIs
7169        if self.config.financial_reporting.management_kpis.enabled {
7170            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7171            let mut kpis = kpi_gen.generate(
7172                company_code,
7173                start_date,
7174                end_date,
7175                &self.config.financial_reporting.management_kpis,
7176            );
7177
7178            // Override financial KPIs with actual data from financial statements
7179            {
7180                use rust_decimal::Decimal;
7181
7182                if let Some(income_stmt) =
7183                    financial_reporting.financial_statements.iter().find(|fs| {
7184                        fs.statement_type == StatementType::IncomeStatement
7185                            && fs.company_code == company_code
7186                    })
7187                {
7188                    // Extract revenue and COGS from income statement line items
7189                    let total_revenue: Decimal = income_stmt
7190                        .line_items
7191                        .iter()
7192                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7193                        .map(|li| li.amount)
7194                        .sum();
7195                    let total_cogs: Decimal = income_stmt
7196                        .line_items
7197                        .iter()
7198                        .filter(|li| {
7199                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7200                                && !li.is_total
7201                        })
7202                        .map(|li| li.amount.abs())
7203                        .sum();
7204                    let total_opex: Decimal = income_stmt
7205                        .line_items
7206                        .iter()
7207                        .filter(|li| {
7208                            li.section.contains("Expense")
7209                                && !li.is_total
7210                                && !li.section.contains("Cost")
7211                        })
7212                        .map(|li| li.amount.abs())
7213                        .sum();
7214
7215                    if total_revenue > Decimal::ZERO {
7216                        let hundred = Decimal::from(100);
7217                        let gross_margin_pct =
7218                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7219                        let operating_income = total_revenue - total_cogs - total_opex;
7220                        let op_margin_pct =
7221                            (operating_income * hundred / total_revenue).round_dp(2);
7222
7223                        // Override gross margin and operating margin KPIs
7224                        for kpi in &mut kpis {
7225                            if kpi.name == "Gross Margin" {
7226                                kpi.value = gross_margin_pct;
7227                            } else if kpi.name == "Operating Margin" {
7228                                kpi.value = op_margin_pct;
7229                            }
7230                        }
7231                    }
7232                }
7233
7234                // Override Current Ratio from balance sheet
7235                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7236                    fs.statement_type == StatementType::BalanceSheet
7237                        && fs.company_code == company_code
7238                }) {
7239                    let current_assets: Decimal = bs
7240                        .line_items
7241                        .iter()
7242                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7243                        .map(|li| li.amount)
7244                        .sum();
7245                    let current_liabilities: Decimal = bs
7246                        .line_items
7247                        .iter()
7248                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7249                        .map(|li| li.amount.abs())
7250                        .sum();
7251
7252                    if current_liabilities > Decimal::ZERO {
7253                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7254                        for kpi in &mut kpis {
7255                            if kpi.name == "Current Ratio" {
7256                                kpi.value = current_ratio;
7257                            }
7258                        }
7259                    }
7260                }
7261            }
7262
7263            snapshot.kpi_count = kpis.len();
7264            snapshot.kpis = kpis;
7265        }
7266
7267        // Budgets
7268        if self.config.financial_reporting.budgets.enabled {
7269            let account_data: Vec<(String, String)> = coa
7270                .accounts
7271                .iter()
7272                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7273                .collect();
7274
7275            if !account_data.is_empty() {
7276                let fiscal_year = start_date.year() as u32;
7277                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7278                let budget = budget_gen.generate(
7279                    company_code,
7280                    fiscal_year,
7281                    &account_data,
7282                    &self.config.financial_reporting.budgets,
7283                );
7284                snapshot.budget_line_count = budget.line_items.len();
7285                snapshot.budgets.push(budget);
7286            }
7287        }
7288
7289        stats.sales_quote_count = snapshot.sales_quote_count;
7290        stats.kpi_count = snapshot.kpi_count;
7291        stats.budget_line_count = snapshot.budget_line_count;
7292
7293        info!(
7294            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7295            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7296        );
7297        self.check_resources_with_log("post-sales-kpi-budgets")?;
7298
7299        Ok(snapshot)
7300    }
7301
7302    /// Compute pre-tax income for a single company from actual journal entries.
7303    ///
7304    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7305    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7306    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7307    /// and the period-close engine so that all three use a consistent definition.
7308    fn compute_pre_tax_income(
7309        company_code: &str,
7310        journal_entries: &[JournalEntry],
7311    ) -> rust_decimal::Decimal {
7312        use datasynth_core::accounts::AccountCategory;
7313        use rust_decimal::Decimal;
7314
7315        let mut total_revenue = Decimal::ZERO;
7316        let mut total_expenses = Decimal::ZERO;
7317
7318        for je in journal_entries {
7319            if je.header.company_code != company_code {
7320                continue;
7321            }
7322            for line in &je.lines {
7323                let cat = AccountCategory::from_account(&line.gl_account);
7324                match cat {
7325                    AccountCategory::Revenue => {
7326                        total_revenue += line.credit_amount - line.debit_amount;
7327                    }
7328                    AccountCategory::Cogs
7329                    | AccountCategory::OperatingExpense
7330                    | AccountCategory::OtherIncomeExpense => {
7331                        total_expenses += line.debit_amount - line.credit_amount;
7332                    }
7333                    _ => {}
7334                }
7335            }
7336        }
7337
7338        let pti = (total_revenue - total_expenses).round_dp(2);
7339        if pti == rust_decimal::Decimal::ZERO {
7340            // No income statement activity yet — fall back to a synthetic value so the
7341            // tax provision generator can still produce meaningful output.
7342            rust_decimal::Decimal::from(1_000_000u32)
7343        } else {
7344            pti
7345        }
7346    }
7347
7348    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7349    fn phase_tax_generation(
7350        &mut self,
7351        document_flows: &DocumentFlowSnapshot,
7352        journal_entries: &[JournalEntry],
7353        stats: &mut EnhancedGenerationStatistics,
7354    ) -> SynthResult<TaxSnapshot> {
7355        if !self.phase_config.generate_tax {
7356            debug!("Phase 20: Skipped (tax generation disabled)");
7357            return Ok(TaxSnapshot::default());
7358        }
7359        info!("Phase 20: Generating Tax Data");
7360
7361        let seed = self.seed;
7362        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7363            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7364        let fiscal_year = start_date.year();
7365        let company_code = self
7366            .config
7367            .companies
7368            .first()
7369            .map(|c| c.code.as_str())
7370            .unwrap_or("1000");
7371
7372        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7373            seed + 370,
7374            self.config.tax.clone(),
7375        );
7376
7377        let pack = self.primary_pack().clone();
7378        let (jurisdictions, codes) =
7379            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7380
7381        // Generate tax provisions for each company
7382        let mut provisions = Vec::new();
7383        if self.config.tax.provisions.enabled {
7384            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7385            for company in &self.config.companies {
7386                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7387                let statutory_rate = rust_decimal::Decimal::new(
7388                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7389                    2,
7390                );
7391                let provision = provision_gen.generate(
7392                    &company.code,
7393                    start_date,
7394                    pre_tax_income,
7395                    statutory_rate,
7396                );
7397                provisions.push(provision);
7398            }
7399        }
7400
7401        // Generate tax lines from document invoices
7402        let mut tax_lines = Vec::new();
7403        if !codes.is_empty() {
7404            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7405                datasynth_generators::TaxLineGeneratorConfig::default(),
7406                codes.clone(),
7407                seed + 372,
7408            );
7409
7410            // Tax lines from vendor invoices (input tax)
7411            // Use the first company's country as buyer country
7412            let buyer_country = self
7413                .config
7414                .companies
7415                .first()
7416                .map(|c| c.country.as_str())
7417                .unwrap_or("US");
7418            for vi in &document_flows.vendor_invoices {
7419                let lines = tax_line_gen.generate_for_document(
7420                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7421                    &vi.header.document_id,
7422                    buyer_country, // seller approx same country
7423                    buyer_country,
7424                    vi.payable_amount,
7425                    vi.header.document_date,
7426                    None,
7427                );
7428                tax_lines.extend(lines);
7429            }
7430
7431            // Tax lines from customer invoices (output tax)
7432            for ci in &document_flows.customer_invoices {
7433                let lines = tax_line_gen.generate_for_document(
7434                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7435                    &ci.header.document_id,
7436                    buyer_country, // seller is the company
7437                    buyer_country,
7438                    ci.total_gross_amount,
7439                    ci.header.document_date,
7440                    None,
7441                );
7442                tax_lines.extend(lines);
7443            }
7444        }
7445
7446        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7447        let deferred_tax = {
7448            let companies: Vec<(&str, &str)> = self
7449                .config
7450                .companies
7451                .iter()
7452                .map(|c| (c.code.as_str(), c.country.as_str()))
7453                .collect();
7454            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7455            deferred_gen.generate(&companies, start_date, journal_entries)
7456        };
7457
7458        // Build a document_id → posting_date map so each tax JE uses its
7459        // source document's date rather than a blanket period-end date.
7460        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7461            std::collections::HashMap::new();
7462        for vi in &document_flows.vendor_invoices {
7463            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7464        }
7465        for ci in &document_flows.customer_invoices {
7466            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7467        }
7468
7469        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7470        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7471        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7472            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7473                &tax_lines,
7474                company_code,
7475                &doc_dates,
7476                end_date,
7477            );
7478            debug!("Generated {} tax posting JEs", jes.len());
7479            jes
7480        } else {
7481            Vec::new()
7482        };
7483
7484        let snapshot = TaxSnapshot {
7485            jurisdiction_count: jurisdictions.len(),
7486            code_count: codes.len(),
7487            jurisdictions,
7488            codes,
7489            tax_provisions: provisions,
7490            tax_lines,
7491            tax_returns: Vec::new(),
7492            withholding_records: Vec::new(),
7493            tax_anomaly_labels: Vec::new(),
7494            deferred_tax,
7495            tax_posting_journal_entries,
7496        };
7497
7498        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7499        stats.tax_code_count = snapshot.code_count;
7500        stats.tax_provision_count = snapshot.tax_provisions.len();
7501        stats.tax_line_count = snapshot.tax_lines.len();
7502
7503        info!(
7504            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7505            snapshot.jurisdiction_count,
7506            snapshot.code_count,
7507            snapshot.tax_provisions.len(),
7508            snapshot.deferred_tax.temporary_differences.len(),
7509            snapshot.deferred_tax.journal_entries.len(),
7510            snapshot.tax_posting_journal_entries.len(),
7511        );
7512        self.check_resources_with_log("post-tax")?;
7513
7514        Ok(snapshot)
7515    }
7516
7517    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7518    fn phase_esg_generation(
7519        &mut self,
7520        document_flows: &DocumentFlowSnapshot,
7521        manufacturing: &ManufacturingSnapshot,
7522        stats: &mut EnhancedGenerationStatistics,
7523    ) -> SynthResult<EsgSnapshot> {
7524        if !self.phase_config.generate_esg {
7525            debug!("Phase 21: Skipped (ESG generation disabled)");
7526            return Ok(EsgSnapshot::default());
7527        }
7528        let degradation = self.check_resources()?;
7529        if degradation >= DegradationLevel::Reduced {
7530            debug!(
7531                "Phase skipped due to resource pressure (degradation: {:?})",
7532                degradation
7533            );
7534            return Ok(EsgSnapshot::default());
7535        }
7536        info!("Phase 21: Generating ESG Data");
7537
7538        let seed = self.seed;
7539        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7540            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7541        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7542        let entity_id = self
7543            .config
7544            .companies
7545            .first()
7546            .map(|c| c.code.as_str())
7547            .unwrap_or("1000");
7548
7549        let esg_cfg = &self.config.esg;
7550        let mut snapshot = EsgSnapshot::default();
7551
7552        // Energy consumption (feeds into scope 1 & 2 emissions)
7553        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7554            esg_cfg.environmental.energy.clone(),
7555            seed + 80,
7556        );
7557        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7558
7559        // Water usage
7560        let facility_count = esg_cfg.environmental.energy.facility_count;
7561        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7562        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7563
7564        // Waste
7565        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7566            seed + 82,
7567            esg_cfg.environmental.waste.diversion_target,
7568            facility_count,
7569        );
7570        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7571
7572        // Emissions (scope 1, 2, 3)
7573        let mut emission_gen =
7574            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7575
7576        // Build EnergyInput from energy_records
7577        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7578            .iter()
7579            .map(|e| datasynth_generators::EnergyInput {
7580                facility_id: e.facility_id.clone(),
7581                energy_type: match e.energy_source {
7582                    EnergySourceType::NaturalGas => {
7583                        datasynth_generators::EnergyInputType::NaturalGas
7584                    }
7585                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7586                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7587                    _ => datasynth_generators::EnergyInputType::Electricity,
7588                },
7589                consumption_kwh: e.consumption_kwh,
7590                period: e.period,
7591            })
7592            .collect();
7593
7594        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7595        if !manufacturing.production_orders.is_empty() {
7596            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7597                &manufacturing.production_orders,
7598                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7599                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7600            );
7601            if !mfg_energy.is_empty() {
7602                info!(
7603                    "ESG: {} energy inputs derived from {} production orders",
7604                    mfg_energy.len(),
7605                    manufacturing.production_orders.len(),
7606                );
7607                energy_inputs.extend(mfg_energy);
7608            }
7609        }
7610
7611        let mut emissions = Vec::new();
7612        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7613        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7614
7615        // Scope 3: use vendor spend data from actual payments
7616        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7617            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7618            for payment in &document_flows.payments {
7619                if payment.is_vendor {
7620                    *totals
7621                        .entry(payment.business_partner_id.clone())
7622                        .or_default() += payment.amount;
7623                }
7624            }
7625            totals
7626        };
7627        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7628            .master_data
7629            .vendors
7630            .iter()
7631            .map(|v| {
7632                let spend = vendor_payment_totals
7633                    .get(&v.vendor_id)
7634                    .copied()
7635                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7636                datasynth_generators::VendorSpendInput {
7637                    vendor_id: v.vendor_id.clone(),
7638                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7639                    spend,
7640                    country: v.country.clone(),
7641                }
7642            })
7643            .collect();
7644        if !vendor_spend.is_empty() {
7645            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7646                entity_id,
7647                &vendor_spend,
7648                start_date,
7649                end_date,
7650            ));
7651        }
7652
7653        // Business travel & commuting (scope 3)
7654        let headcount = self.master_data.employees.len() as u32;
7655        if headcount > 0 {
7656            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7657            emissions.extend(emission_gen.generate_scope3_business_travel(
7658                entity_id,
7659                travel_spend,
7660                start_date,
7661            ));
7662            emissions
7663                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7664        }
7665
7666        snapshot.emission_count = emissions.len();
7667        snapshot.emissions = emissions;
7668        snapshot.energy = energy_records;
7669
7670        // Social: Workforce diversity, pay equity, safety
7671        let mut workforce_gen =
7672            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7673        let total_headcount = headcount.max(100);
7674        snapshot.diversity =
7675            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7676        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7677
7678        // v2.4: Derive additional workforce diversity metrics from actual employee data
7679        if !self.master_data.employees.is_empty() {
7680            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7681                entity_id,
7682                &self.master_data.employees,
7683                end_date,
7684            );
7685            if !hr_diversity.is_empty() {
7686                info!(
7687                    "ESG: {} diversity metrics derived from {} actual employees",
7688                    hr_diversity.len(),
7689                    self.master_data.employees.len(),
7690                );
7691                snapshot.diversity.extend(hr_diversity);
7692            }
7693        }
7694
7695        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7696            entity_id,
7697            facility_count,
7698            start_date,
7699            end_date,
7700        );
7701
7702        // Compute safety metrics
7703        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7704        let safety_metric = workforce_gen.compute_safety_metrics(
7705            entity_id,
7706            &snapshot.safety_incidents,
7707            total_hours,
7708            start_date,
7709        );
7710        snapshot.safety_metrics = vec![safety_metric];
7711
7712        // Governance
7713        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7714            seed + 85,
7715            esg_cfg.governance.board_size,
7716            esg_cfg.governance.independence_target,
7717        );
7718        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7719
7720        // Supplier ESG assessments
7721        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7722            esg_cfg.supply_chain_esg.clone(),
7723            seed + 86,
7724        );
7725        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7726            .master_data
7727            .vendors
7728            .iter()
7729            .map(|v| datasynth_generators::VendorInput {
7730                vendor_id: v.vendor_id.clone(),
7731                country: v.country.clone(),
7732                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7733                quality_score: None,
7734            })
7735            .collect();
7736        snapshot.supplier_assessments =
7737            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7738
7739        // Disclosures
7740        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7741            seed + 87,
7742            esg_cfg.reporting.clone(),
7743            esg_cfg.climate_scenarios.clone(),
7744        );
7745        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7746        snapshot.disclosures = disclosure_gen.generate_disclosures(
7747            entity_id,
7748            &snapshot.materiality,
7749            start_date,
7750            end_date,
7751        );
7752        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7753        snapshot.disclosure_count = snapshot.disclosures.len();
7754
7755        // Anomaly injection
7756        if esg_cfg.anomaly_rate > 0.0 {
7757            let mut anomaly_injector =
7758                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7759            let mut labels = Vec::new();
7760            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7761            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7762            labels.extend(
7763                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7764            );
7765            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7766            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7767            snapshot.anomaly_labels = labels;
7768        }
7769
7770        stats.esg_emission_count = snapshot.emission_count;
7771        stats.esg_disclosure_count = snapshot.disclosure_count;
7772
7773        info!(
7774            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7775            snapshot.emission_count,
7776            snapshot.disclosure_count,
7777            snapshot.supplier_assessments.len()
7778        );
7779        self.check_resources_with_log("post-esg")?;
7780
7781        Ok(snapshot)
7782    }
7783
7784    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7785    fn phase_treasury_data(
7786        &mut self,
7787        document_flows: &DocumentFlowSnapshot,
7788        subledger: &SubledgerSnapshot,
7789        intercompany: &IntercompanySnapshot,
7790        stats: &mut EnhancedGenerationStatistics,
7791    ) -> SynthResult<TreasurySnapshot> {
7792        if !self.phase_config.generate_treasury {
7793            debug!("Phase 22: Skipped (treasury generation disabled)");
7794            return Ok(TreasurySnapshot::default());
7795        }
7796        let degradation = self.check_resources()?;
7797        if degradation >= DegradationLevel::Reduced {
7798            debug!(
7799                "Phase skipped due to resource pressure (degradation: {:?})",
7800                degradation
7801            );
7802            return Ok(TreasurySnapshot::default());
7803        }
7804        info!("Phase 22: Generating Treasury Data");
7805
7806        let seed = self.seed;
7807        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7808            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7809        let currency = self
7810            .config
7811            .companies
7812            .first()
7813            .map(|c| c.currency.as_str())
7814            .unwrap_or("USD");
7815        let entity_id = self
7816            .config
7817            .companies
7818            .first()
7819            .map(|c| c.code.as_str())
7820            .unwrap_or("1000");
7821
7822        let mut snapshot = TreasurySnapshot::default();
7823
7824        // Generate debt instruments
7825        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7826            self.config.treasury.debt.clone(),
7827            seed + 90,
7828        );
7829        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7830
7831        // Generate hedging instruments (IR swaps for floating-rate debt)
7832        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7833            self.config.treasury.hedging.clone(),
7834            seed + 91,
7835        );
7836        for debt in &snapshot.debt_instruments {
7837            if debt.rate_type == InterestRateType::Variable {
7838                let swap = hedge_gen.generate_ir_swap(
7839                    currency,
7840                    debt.principal,
7841                    debt.origination_date,
7842                    debt.maturity_date,
7843                );
7844                snapshot.hedging_instruments.push(swap);
7845            }
7846        }
7847
7848        // Build FX exposures from foreign-currency payments and generate
7849        // FX forwards + hedge relationship designations via generate() API.
7850        {
7851            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7852            for payment in &document_flows.payments {
7853                if payment.currency != currency {
7854                    let entry = fx_map
7855                        .entry(payment.currency.clone())
7856                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7857                    entry.0 += payment.amount;
7858                    // Use the latest settlement date among grouped payments
7859                    if payment.header.document_date > entry.1 {
7860                        entry.1 = payment.header.document_date;
7861                    }
7862                }
7863            }
7864            if !fx_map.is_empty() {
7865                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7866                    .into_iter()
7867                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7868                        datasynth_generators::treasury::FxExposure {
7869                            currency_pair: format!("{foreign_ccy}/{currency}"),
7870                            foreign_currency: foreign_ccy,
7871                            net_amount,
7872                            settlement_date,
7873                            description: "AP payment FX exposure".to_string(),
7874                        }
7875                    })
7876                    .collect();
7877                let (fx_instruments, fx_relationships) =
7878                    hedge_gen.generate(start_date, &fx_exposures);
7879                snapshot.hedging_instruments.extend(fx_instruments);
7880                snapshot.hedge_relationships.extend(fx_relationships);
7881            }
7882        }
7883
7884        // Inject anomalies if configured
7885        if self.config.treasury.anomaly_rate > 0.0 {
7886            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7887                seed + 92,
7888                self.config.treasury.anomaly_rate,
7889            );
7890            let mut labels = Vec::new();
7891            labels.extend(
7892                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7893            );
7894            snapshot.treasury_anomaly_labels = labels;
7895        }
7896
7897        // Generate cash positions from payment flows
7898        if self.config.treasury.cash_positioning.enabled {
7899            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7900
7901            // AP payments as outflows
7902            for payment in &document_flows.payments {
7903                cash_flows.push(datasynth_generators::treasury::CashFlow {
7904                    date: payment.header.document_date,
7905                    account_id: format!("{entity_id}-MAIN"),
7906                    amount: payment.amount,
7907                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7908                });
7909            }
7910
7911            // Customer receipts (from O2C chains) as inflows
7912            for chain in &document_flows.o2c_chains {
7913                if let Some(ref receipt) = chain.customer_receipt {
7914                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7915                        date: receipt.header.document_date,
7916                        account_id: format!("{entity_id}-MAIN"),
7917                        amount: receipt.amount,
7918                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7919                    });
7920                }
7921                // Remainder receipts (follow-up to partial payments)
7922                for receipt in &chain.remainder_receipts {
7923                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7924                        date: receipt.header.document_date,
7925                        account_id: format!("{entity_id}-MAIN"),
7926                        amount: receipt.amount,
7927                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7928                    });
7929                }
7930            }
7931
7932            if !cash_flows.is_empty() {
7933                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7934                    self.config.treasury.cash_positioning.clone(),
7935                    seed + 93,
7936                );
7937                let account_id = format!("{entity_id}-MAIN");
7938                snapshot.cash_positions = cash_gen.generate(
7939                    entity_id,
7940                    &account_id,
7941                    currency,
7942                    &cash_flows,
7943                    start_date,
7944                    start_date + chrono::Months::new(self.config.global.period_months),
7945                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7946                );
7947            }
7948        }
7949
7950        // Generate cash forecasts from AR/AP aging
7951        if self.config.treasury.cash_forecasting.enabled {
7952            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7953
7954            // Build AR aging items from subledger AR invoices
7955            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7956                .ar_invoices
7957                .iter()
7958                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7959                .map(|inv| {
7960                    let days_past_due = if inv.due_date < end_date {
7961                        (end_date - inv.due_date).num_days().max(0) as u32
7962                    } else {
7963                        0
7964                    };
7965                    datasynth_generators::treasury::ArAgingItem {
7966                        expected_date: inv.due_date,
7967                        amount: inv.amount_remaining,
7968                        days_past_due,
7969                        document_id: inv.invoice_number.clone(),
7970                    }
7971                })
7972                .collect();
7973
7974            // Build AP aging items from subledger AP invoices
7975            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7976                .ap_invoices
7977                .iter()
7978                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7979                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7980                    payment_date: inv.due_date,
7981                    amount: inv.amount_remaining,
7982                    document_id: inv.invoice_number.clone(),
7983                })
7984                .collect();
7985
7986            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7987                self.config.treasury.cash_forecasting.clone(),
7988                seed + 94,
7989            );
7990            let forecast = forecast_gen.generate(
7991                entity_id,
7992                currency,
7993                end_date,
7994                &ar_items,
7995                &ap_items,
7996                &[], // scheduled disbursements - empty for now
7997            );
7998            snapshot.cash_forecasts.push(forecast);
7999        }
8000
8001        // Generate cash pools and sweeps
8002        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8003            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8004            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8005                self.config.treasury.cash_pooling.clone(),
8006                seed + 95,
8007            );
8008
8009            // Create a pool from available accounts
8010            let account_ids: Vec<String> = snapshot
8011                .cash_positions
8012                .iter()
8013                .map(|cp| cp.bank_account_id.clone())
8014                .collect::<std::collections::HashSet<_>>()
8015                .into_iter()
8016                .collect();
8017
8018            if let Some(pool) =
8019                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8020            {
8021                // Generate sweeps - build participant balances from last cash position per account
8022                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8023                for cp in &snapshot.cash_positions {
8024                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8025                }
8026
8027                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8028                    latest_balances
8029                        .into_iter()
8030                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8031                        .map(
8032                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8033                                account_id: id,
8034                                balance,
8035                            },
8036                        )
8037                        .collect();
8038
8039                let sweeps =
8040                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8041                snapshot.cash_pool_sweeps = sweeps;
8042                snapshot.cash_pools.push(pool);
8043            }
8044        }
8045
8046        // Generate bank guarantees
8047        if self.config.treasury.bank_guarantees.enabled {
8048            let vendor_names: Vec<String> = self
8049                .master_data
8050                .vendors
8051                .iter()
8052                .map(|v| v.name.clone())
8053                .collect();
8054            if !vendor_names.is_empty() {
8055                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8056                    self.config.treasury.bank_guarantees.clone(),
8057                    seed + 96,
8058                );
8059                snapshot.bank_guarantees =
8060                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8061            }
8062        }
8063
8064        // Generate netting runs from intercompany matched pairs
8065        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8066            let entity_ids: Vec<String> = self
8067                .config
8068                .companies
8069                .iter()
8070                .map(|c| c.code.clone())
8071                .collect();
8072            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8073                .matched_pairs
8074                .iter()
8075                .map(|mp| {
8076                    (
8077                        mp.seller_company.clone(),
8078                        mp.buyer_company.clone(),
8079                        mp.amount,
8080                    )
8081                })
8082                .collect();
8083            if entity_ids.len() >= 2 {
8084                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8085                    self.config.treasury.netting.clone(),
8086                    seed + 97,
8087                );
8088                snapshot.netting_runs = netting_gen.generate(
8089                    &entity_ids,
8090                    currency,
8091                    start_date,
8092                    self.config.global.period_months,
8093                    &ic_amounts,
8094                );
8095            }
8096        }
8097
8098        // Generate treasury journal entries from the instruments we just created.
8099        {
8100            use datasynth_generators::treasury::TreasuryAccounting;
8101
8102            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8103            let mut treasury_jes = Vec::new();
8104
8105            // Debt interest accrual JEs
8106            if !snapshot.debt_instruments.is_empty() {
8107                let debt_jes =
8108                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8109                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8110                treasury_jes.extend(debt_jes);
8111            }
8112
8113            // Hedge mark-to-market JEs
8114            if !snapshot.hedging_instruments.is_empty() {
8115                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8116                    &snapshot.hedging_instruments,
8117                    &snapshot.hedge_relationships,
8118                    end_date,
8119                    entity_id,
8120                );
8121                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8122                treasury_jes.extend(hedge_jes);
8123            }
8124
8125            // Cash pool sweep JEs
8126            if !snapshot.cash_pool_sweeps.is_empty() {
8127                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8128                    &snapshot.cash_pool_sweeps,
8129                    entity_id,
8130                );
8131                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8132                treasury_jes.extend(sweep_jes);
8133            }
8134
8135            if !treasury_jes.is_empty() {
8136                debug!("Total treasury journal entries: {}", treasury_jes.len());
8137            }
8138            snapshot.journal_entries = treasury_jes;
8139        }
8140
8141        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8142        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8143        stats.cash_position_count = snapshot.cash_positions.len();
8144        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8145        stats.cash_pool_count = snapshot.cash_pools.len();
8146
8147        info!(
8148            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8149            snapshot.debt_instruments.len(),
8150            snapshot.hedging_instruments.len(),
8151            snapshot.cash_positions.len(),
8152            snapshot.cash_forecasts.len(),
8153            snapshot.cash_pools.len(),
8154            snapshot.bank_guarantees.len(),
8155            snapshot.netting_runs.len(),
8156            snapshot.journal_entries.len(),
8157        );
8158        self.check_resources_with_log("post-treasury")?;
8159
8160        Ok(snapshot)
8161    }
8162
8163    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8164    fn phase_project_accounting(
8165        &mut self,
8166        document_flows: &DocumentFlowSnapshot,
8167        hr: &HrSnapshot,
8168        stats: &mut EnhancedGenerationStatistics,
8169    ) -> SynthResult<ProjectAccountingSnapshot> {
8170        if !self.phase_config.generate_project_accounting {
8171            debug!("Phase 23: Skipped (project accounting disabled)");
8172            return Ok(ProjectAccountingSnapshot::default());
8173        }
8174        let degradation = self.check_resources()?;
8175        if degradation >= DegradationLevel::Reduced {
8176            debug!(
8177                "Phase skipped due to resource pressure (degradation: {:?})",
8178                degradation
8179            );
8180            return Ok(ProjectAccountingSnapshot::default());
8181        }
8182        info!("Phase 23: Generating Project Accounting Data");
8183
8184        let seed = self.seed;
8185        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8186            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8187        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8188        let company_code = self
8189            .config
8190            .companies
8191            .first()
8192            .map(|c| c.code.as_str())
8193            .unwrap_or("1000");
8194
8195        let mut snapshot = ProjectAccountingSnapshot::default();
8196
8197        // Generate projects with WBS hierarchies
8198        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8199            self.config.project_accounting.clone(),
8200            seed + 95,
8201        );
8202        let pool = project_gen.generate(company_code, start_date, end_date);
8203        snapshot.projects = pool.projects.clone();
8204
8205        // Link source documents to projects for cost allocation
8206        {
8207            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8208                Vec::new();
8209
8210            // Time entries
8211            for te in &hr.time_entries {
8212                let total_hours = te.hours_regular + te.hours_overtime;
8213                if total_hours > 0.0 {
8214                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8215                        id: te.entry_id.clone(),
8216                        entity_id: company_code.to_string(),
8217                        date: te.date,
8218                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8219                            .unwrap_or(rust_decimal::Decimal::ZERO),
8220                        source_type: CostSourceType::TimeEntry,
8221                        hours: Some(
8222                            rust_decimal::Decimal::from_f64_retain(total_hours)
8223                                .unwrap_or(rust_decimal::Decimal::ZERO),
8224                        ),
8225                    });
8226                }
8227            }
8228
8229            // Expense reports
8230            for er in &hr.expense_reports {
8231                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8232                    id: er.report_id.clone(),
8233                    entity_id: company_code.to_string(),
8234                    date: er.submission_date,
8235                    amount: er.total_amount,
8236                    source_type: CostSourceType::ExpenseReport,
8237                    hours: None,
8238                });
8239            }
8240
8241            // Purchase orders
8242            for po in &document_flows.purchase_orders {
8243                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8244                    id: po.header.document_id.clone(),
8245                    entity_id: company_code.to_string(),
8246                    date: po.header.document_date,
8247                    amount: po.total_net_amount,
8248                    source_type: CostSourceType::PurchaseOrder,
8249                    hours: None,
8250                });
8251            }
8252
8253            // Vendor invoices
8254            for vi in &document_flows.vendor_invoices {
8255                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8256                    id: vi.header.document_id.clone(),
8257                    entity_id: company_code.to_string(),
8258                    date: vi.header.document_date,
8259                    amount: vi.payable_amount,
8260                    source_type: CostSourceType::VendorInvoice,
8261                    hours: None,
8262                });
8263            }
8264
8265            if !source_docs.is_empty() && !pool.projects.is_empty() {
8266                let mut cost_gen =
8267                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8268                        self.config.project_accounting.cost_allocation.clone(),
8269                        seed + 99,
8270                    );
8271                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8272            }
8273        }
8274
8275        // Generate change orders
8276        if self.config.project_accounting.change_orders.enabled {
8277            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8278                self.config.project_accounting.change_orders.clone(),
8279                seed + 96,
8280            );
8281            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8282        }
8283
8284        // Generate milestones
8285        if self.config.project_accounting.milestones.enabled {
8286            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8287                self.config.project_accounting.milestones.clone(),
8288                seed + 97,
8289            );
8290            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8291        }
8292
8293        // Generate earned value metrics (needs cost lines, so only if we have projects)
8294        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8295            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8296                self.config.project_accounting.earned_value.clone(),
8297                seed + 98,
8298            );
8299            snapshot.earned_value_metrics =
8300                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8301        }
8302
8303        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8304        if self.config.project_accounting.revenue_recognition.enabled
8305            && !snapshot.projects.is_empty()
8306            && !snapshot.cost_lines.is_empty()
8307        {
8308            use datasynth_generators::project_accounting::RevenueGenerator;
8309            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8310            let avg_contract_value =
8311                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8312                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8313
8314            // Build contract value tuples: only customer-type projects get revenue recognition.
8315            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8316            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8317                snapshot
8318                    .projects
8319                    .iter()
8320                    .filter(|p| {
8321                        matches!(
8322                            p.project_type,
8323                            datasynth_core::models::ProjectType::Customer
8324                        )
8325                    })
8326                    .map(|p| {
8327                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8328                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8329                        // budget × 1.25 → contract value
8330                        } else {
8331                            avg_contract_value
8332                        };
8333                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8334                        (p.project_id.clone(), cv, etc)
8335                    })
8336                    .collect();
8337
8338            if !contract_values.is_empty() {
8339                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8340                snapshot.revenue_records = rev_gen.generate(
8341                    &snapshot.projects,
8342                    &snapshot.cost_lines,
8343                    &contract_values,
8344                    start_date,
8345                    end_date,
8346                );
8347                debug!(
8348                    "Generated {} revenue recognition records for {} customer projects",
8349                    snapshot.revenue_records.len(),
8350                    contract_values.len()
8351                );
8352            }
8353        }
8354
8355        stats.project_count = snapshot.projects.len();
8356        stats.project_change_order_count = snapshot.change_orders.len();
8357        stats.project_cost_line_count = snapshot.cost_lines.len();
8358
8359        info!(
8360            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8361            snapshot.projects.len(),
8362            snapshot.change_orders.len(),
8363            snapshot.milestones.len(),
8364            snapshot.earned_value_metrics.len()
8365        );
8366        self.check_resources_with_log("post-project-accounting")?;
8367
8368        Ok(snapshot)
8369    }
8370
8371    /// Phase 24: Generate process evolution and organizational events.
8372    fn phase_evolution_events(
8373        &mut self,
8374        stats: &mut EnhancedGenerationStatistics,
8375    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8376        if !self.phase_config.generate_evolution_events {
8377            debug!("Phase 24: Skipped (evolution events disabled)");
8378            return Ok((Vec::new(), Vec::new()));
8379        }
8380        info!("Phase 24: Generating Process Evolution + Organizational Events");
8381
8382        let seed = self.seed;
8383        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8384            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8385        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8386
8387        // Process evolution events
8388        let mut proc_gen =
8389            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8390                seed + 100,
8391            );
8392        let process_events = proc_gen.generate_events(start_date, end_date);
8393
8394        // Organizational events
8395        let company_codes: Vec<String> = self
8396            .config
8397            .companies
8398            .iter()
8399            .map(|c| c.code.clone())
8400            .collect();
8401        let mut org_gen =
8402            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8403                seed + 101,
8404            );
8405        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8406
8407        stats.process_evolution_event_count = process_events.len();
8408        stats.organizational_event_count = org_events.len();
8409
8410        info!(
8411            "Evolution events generated: {} process evolution, {} organizational",
8412            process_events.len(),
8413            org_events.len()
8414        );
8415        self.check_resources_with_log("post-evolution-events")?;
8416
8417        Ok((process_events, org_events))
8418    }
8419
8420    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8421    /// data recovery, and regulatory changes).
8422    fn phase_disruption_events(
8423        &self,
8424        stats: &mut EnhancedGenerationStatistics,
8425    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8426        if !self.config.organizational_events.enabled {
8427            debug!("Phase 24b: Skipped (organizational events disabled)");
8428            return Ok(Vec::new());
8429        }
8430        info!("Phase 24b: Generating Disruption Events");
8431
8432        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8433            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8434        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8435
8436        let company_codes: Vec<String> = self
8437            .config
8438            .companies
8439            .iter()
8440            .map(|c| c.code.clone())
8441            .collect();
8442
8443        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8444        let events = gen.generate(start_date, end_date, &company_codes);
8445
8446        stats.disruption_event_count = events.len();
8447        info!("Disruption events generated: {} events", events.len());
8448        self.check_resources_with_log("post-disruption-events")?;
8449
8450        Ok(events)
8451    }
8452
8453    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8454    ///
8455    /// Produces paired examples where each pair contains the original clean JE
8456    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8457    /// split transaction). Useful for training anomaly detection models with
8458    /// known ground truth.
8459    fn phase_counterfactuals(
8460        &self,
8461        journal_entries: &[JournalEntry],
8462        stats: &mut EnhancedGenerationStatistics,
8463    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8464        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8465            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8466            return Ok(Vec::new());
8467        }
8468        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8469
8470        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8471
8472        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8473
8474        // Rotating set of specs to produce diverse mutation types
8475        let specs = [
8476            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8477            CounterfactualSpec::ShiftDate { days: -14 },
8478            CounterfactualSpec::SelfApprove,
8479            CounterfactualSpec::SplitTransaction { split_count: 3 },
8480        ];
8481
8482        let pairs: Vec<_> = journal_entries
8483            .iter()
8484            .enumerate()
8485            .map(|(i, je)| {
8486                let spec = &specs[i % specs.len()];
8487                gen.generate(je, spec)
8488            })
8489            .collect();
8490
8491        stats.counterfactual_pair_count = pairs.len();
8492        info!(
8493            "Counterfactual pairs generated: {} pairs from {} journal entries",
8494            pairs.len(),
8495            journal_entries.len()
8496        );
8497        self.check_resources_with_log("post-counterfactuals")?;
8498
8499        Ok(pairs)
8500    }
8501
8502    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8503    ///
8504    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8505    /// fraudulent, then generates probabilistic red flags on all chain documents.
8506    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8507    /// to produce realistic ML training data.
8508    fn phase_red_flags(
8509        &self,
8510        anomaly_labels: &AnomalyLabels,
8511        document_flows: &DocumentFlowSnapshot,
8512        stats: &mut EnhancedGenerationStatistics,
8513    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8514        if !self.config.fraud.enabled {
8515            debug!("Phase 26: Skipped (fraud generation disabled)");
8516            return Ok(Vec::new());
8517        }
8518        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8519
8520        use datasynth_generators::fraud::RedFlagGenerator;
8521
8522        let generator = RedFlagGenerator::new();
8523        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8524
8525        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8526        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8527            .labels
8528            .iter()
8529            .filter(|label| label.anomaly_type.is_intentional())
8530            .map(|label| label.document_id.as_str())
8531            .collect();
8532
8533        let mut flags = Vec::new();
8534
8535        // Iterate P2P chains: use the purchase order document ID as the chain key.
8536        for chain in &document_flows.p2p_chains {
8537            let doc_id = &chain.purchase_order.header.document_id;
8538            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8539            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8540        }
8541
8542        // Iterate O2C chains: use the sales order document ID as the chain key.
8543        for chain in &document_flows.o2c_chains {
8544            let doc_id = &chain.sales_order.header.document_id;
8545            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8546            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8547        }
8548
8549        stats.red_flag_count = flags.len();
8550        info!(
8551            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8552            flags.len(),
8553            document_flows.p2p_chains.len(),
8554            document_flows.o2c_chains.len(),
8555            fraud_doc_ids.len()
8556        );
8557        self.check_resources_with_log("post-red-flags")?;
8558
8559        Ok(flags)
8560    }
8561
8562    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8563    ///
8564    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8565    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8566    /// advance them over the simulation period.
8567    fn phase_collusion_rings(
8568        &mut self,
8569        stats: &mut EnhancedGenerationStatistics,
8570    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8571        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8572            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8573            return Ok(Vec::new());
8574        }
8575        info!("Phase 26b: Generating Collusion Rings");
8576
8577        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8578            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8579        let months = self.config.global.period_months;
8580
8581        let employee_ids: Vec<String> = self
8582            .master_data
8583            .employees
8584            .iter()
8585            .map(|e| e.employee_id.clone())
8586            .collect();
8587        let vendor_ids: Vec<String> = self
8588            .master_data
8589            .vendors
8590            .iter()
8591            .map(|v| v.vendor_id.clone())
8592            .collect();
8593
8594        let mut generator =
8595            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8596        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8597
8598        stats.collusion_ring_count = rings.len();
8599        info!(
8600            "Collusion rings generated: {} rings, total members: {}",
8601            rings.len(),
8602            rings
8603                .iter()
8604                .map(datasynth_generators::fraud::CollusionRing::size)
8605                .sum::<usize>()
8606        );
8607        self.check_resources_with_log("post-collusion-rings")?;
8608
8609        Ok(rings)
8610    }
8611
8612    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8613    ///
8614    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8615    /// master data changes over time, supporting bi-temporal audit queries.
8616    fn phase_temporal_attributes(
8617        &mut self,
8618        stats: &mut EnhancedGenerationStatistics,
8619    ) -> SynthResult<
8620        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8621    > {
8622        if !self.config.temporal_attributes.enabled {
8623            debug!("Phase 27: Skipped (temporal attributes disabled)");
8624            return Ok(Vec::new());
8625        }
8626        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8627
8628        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8629            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8630
8631        // Build a TemporalAttributeConfig from the user's config.
8632        // Since Phase 27 is already gated on temporal_attributes.enabled,
8633        // default to enabling version chains so users get actual mutations.
8634        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8635            || self.config.temporal_attributes.enabled;
8636        let temporal_config = {
8637            let ta = &self.config.temporal_attributes;
8638            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8639                .enabled(ta.enabled)
8640                .closed_probability(ta.valid_time.closed_probability)
8641                .avg_validity_days(ta.valid_time.avg_validity_days)
8642                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8643                .with_version_chains(if generate_version_chains {
8644                    ta.avg_versions_per_entity
8645                } else {
8646                    1.0
8647                })
8648                .build()
8649        };
8650        // Apply backdating settings if configured
8651        let temporal_config = if self
8652            .config
8653            .temporal_attributes
8654            .transaction_time
8655            .allow_backdating
8656        {
8657            let mut c = temporal_config;
8658            c.transaction_time.allow_backdating = true;
8659            c.transaction_time.backdating_probability = self
8660                .config
8661                .temporal_attributes
8662                .transaction_time
8663                .backdating_probability;
8664            c.transaction_time.max_backdate_days = self
8665                .config
8666                .temporal_attributes
8667                .transaction_time
8668                .max_backdate_days;
8669            c
8670        } else {
8671            temporal_config
8672        };
8673        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8674            temporal_config,
8675            self.seed + 130,
8676            start_date,
8677        );
8678
8679        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8680            self.seed + 130,
8681            datasynth_core::GeneratorType::Vendor,
8682        );
8683
8684        let chains: Vec<_> = self
8685            .master_data
8686            .vendors
8687            .iter()
8688            .map(|vendor| {
8689                let id = uuid_factory.next();
8690                gen.generate_version_chain(vendor.clone(), id)
8691            })
8692            .collect();
8693
8694        stats.temporal_version_chain_count = chains.len();
8695        info!("Temporal version chains generated: {} chains", chains.len());
8696        self.check_resources_with_log("post-temporal-attributes")?;
8697
8698        Ok(chains)
8699    }
8700
8701    /// Phase 28: Build entity relationship graph and cross-process links.
8702    ///
8703    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8704    /// `EntityGraph` from master-data vendor/customer entities and
8705    /// journal-entry-derived transaction summaries.
8706    ///
8707    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8708    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8709    /// generates inventory-movement cross-process links.
8710    fn phase_entity_relationships(
8711        &self,
8712        journal_entries: &[JournalEntry],
8713        document_flows: &DocumentFlowSnapshot,
8714        stats: &mut EnhancedGenerationStatistics,
8715    ) -> SynthResult<(
8716        Option<datasynth_core::models::EntityGraph>,
8717        Vec<datasynth_core::models::CrossProcessLink>,
8718    )> {
8719        use datasynth_generators::relationships::{
8720            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8721            TransactionSummary,
8722        };
8723
8724        let rs_enabled = self.config.relationship_strength.enabled;
8725        let cpl_enabled = self.config.cross_process_links.enabled
8726            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8727
8728        if !rs_enabled && !cpl_enabled {
8729            debug!(
8730                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8731            );
8732            return Ok((None, Vec::new()));
8733        }
8734
8735        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8736
8737        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8738            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8739
8740        let company_code = self
8741            .config
8742            .companies
8743            .first()
8744            .map(|c| c.code.as_str())
8745            .unwrap_or("1000");
8746
8747        // Build the generator with matching config flags
8748        let gen_config = EntityGraphConfig {
8749            enabled: rs_enabled,
8750            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8751                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8752                enable_return_flows: false,
8753                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8754                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8755                // Use higher link rate for small datasets to avoid probabilistic empty results
8756                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8757                    1.0
8758                } else {
8759                    0.30
8760                },
8761                ..Default::default()
8762            },
8763            strength_config: datasynth_generators::relationships::StrengthConfig {
8764                transaction_volume_weight: self
8765                    .config
8766                    .relationship_strength
8767                    .calculation
8768                    .transaction_volume_weight,
8769                transaction_count_weight: self
8770                    .config
8771                    .relationship_strength
8772                    .calculation
8773                    .transaction_count_weight,
8774                duration_weight: self
8775                    .config
8776                    .relationship_strength
8777                    .calculation
8778                    .relationship_duration_weight,
8779                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8780                mutual_connections_weight: self
8781                    .config
8782                    .relationship_strength
8783                    .calculation
8784                    .mutual_connections_weight,
8785                recency_half_life_days: self
8786                    .config
8787                    .relationship_strength
8788                    .calculation
8789                    .recency_half_life_days,
8790            },
8791            ..Default::default()
8792        };
8793
8794        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8795
8796        // --- Part 1: Entity Relationship Graph ---
8797        let entity_graph = if rs_enabled {
8798            // Build EntitySummary lists from master data
8799            let vendor_summaries: Vec<EntitySummary> = self
8800                .master_data
8801                .vendors
8802                .iter()
8803                .map(|v| {
8804                    EntitySummary::new(
8805                        &v.vendor_id,
8806                        &v.name,
8807                        datasynth_core::models::GraphEntityType::Vendor,
8808                        start_date,
8809                    )
8810                })
8811                .collect();
8812
8813            let customer_summaries: Vec<EntitySummary> = self
8814                .master_data
8815                .customers
8816                .iter()
8817                .map(|c| {
8818                    EntitySummary::new(
8819                        &c.customer_id,
8820                        &c.name,
8821                        datasynth_core::models::GraphEntityType::Customer,
8822                        start_date,
8823                    )
8824                })
8825                .collect();
8826
8827            // Build transaction summaries from journal entries.
8828            // Key = (company_code, trading_partner) for entries that have a
8829            // trading partner.  This captures intercompany flows and any JE
8830            // whose line items carry a trading_partner reference.
8831            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8832                std::collections::HashMap::new();
8833
8834            for je in journal_entries {
8835                let cc = je.header.company_code.clone();
8836                let posting_date = je.header.posting_date;
8837                for line in &je.lines {
8838                    if let Some(ref tp) = line.trading_partner {
8839                        let amount = if line.debit_amount > line.credit_amount {
8840                            line.debit_amount
8841                        } else {
8842                            line.credit_amount
8843                        };
8844                        let entry = txn_summaries
8845                            .entry((cc.clone(), tp.clone()))
8846                            .or_insert_with(|| TransactionSummary {
8847                                total_volume: rust_decimal::Decimal::ZERO,
8848                                transaction_count: 0,
8849                                first_transaction_date: posting_date,
8850                                last_transaction_date: posting_date,
8851                                related_entities: std::collections::HashSet::new(),
8852                            });
8853                        entry.total_volume += amount;
8854                        entry.transaction_count += 1;
8855                        if posting_date < entry.first_transaction_date {
8856                            entry.first_transaction_date = posting_date;
8857                        }
8858                        if posting_date > entry.last_transaction_date {
8859                            entry.last_transaction_date = posting_date;
8860                        }
8861                        entry.related_entities.insert(cc.clone());
8862                    }
8863                }
8864            }
8865
8866            // Also extract transaction relationships from document flow chains.
8867            // P2P chains: Company → Vendor relationships
8868            for chain in &document_flows.p2p_chains {
8869                let cc = chain.purchase_order.header.company_code.clone();
8870                let vendor_id = chain.purchase_order.vendor_id.clone();
8871                let po_date = chain.purchase_order.header.document_date;
8872                let amount = chain.purchase_order.total_net_amount;
8873
8874                let entry = txn_summaries
8875                    .entry((cc.clone(), vendor_id))
8876                    .or_insert_with(|| TransactionSummary {
8877                        total_volume: rust_decimal::Decimal::ZERO,
8878                        transaction_count: 0,
8879                        first_transaction_date: po_date,
8880                        last_transaction_date: po_date,
8881                        related_entities: std::collections::HashSet::new(),
8882                    });
8883                entry.total_volume += amount;
8884                entry.transaction_count += 1;
8885                if po_date < entry.first_transaction_date {
8886                    entry.first_transaction_date = po_date;
8887                }
8888                if po_date > entry.last_transaction_date {
8889                    entry.last_transaction_date = po_date;
8890                }
8891                entry.related_entities.insert(cc);
8892            }
8893
8894            // O2C chains: Company → Customer relationships
8895            for chain in &document_flows.o2c_chains {
8896                let cc = chain.sales_order.header.company_code.clone();
8897                let customer_id = chain.sales_order.customer_id.clone();
8898                let so_date = chain.sales_order.header.document_date;
8899                let amount = chain.sales_order.total_net_amount;
8900
8901                let entry = txn_summaries
8902                    .entry((cc.clone(), customer_id))
8903                    .or_insert_with(|| TransactionSummary {
8904                        total_volume: rust_decimal::Decimal::ZERO,
8905                        transaction_count: 0,
8906                        first_transaction_date: so_date,
8907                        last_transaction_date: so_date,
8908                        related_entities: std::collections::HashSet::new(),
8909                    });
8910                entry.total_volume += amount;
8911                entry.transaction_count += 1;
8912                if so_date < entry.first_transaction_date {
8913                    entry.first_transaction_date = so_date;
8914                }
8915                if so_date > entry.last_transaction_date {
8916                    entry.last_transaction_date = so_date;
8917                }
8918                entry.related_entities.insert(cc);
8919            }
8920
8921            let as_of_date = journal_entries
8922                .last()
8923                .map(|je| je.header.posting_date)
8924                .unwrap_or(start_date);
8925
8926            let graph = gen.generate_entity_graph(
8927                company_code,
8928                as_of_date,
8929                &vendor_summaries,
8930                &customer_summaries,
8931                &txn_summaries,
8932            );
8933
8934            info!(
8935                "Entity relationship graph: {} nodes, {} edges",
8936                graph.nodes.len(),
8937                graph.edges.len()
8938            );
8939            stats.entity_relationship_node_count = graph.nodes.len();
8940            stats.entity_relationship_edge_count = graph.edges.len();
8941            Some(graph)
8942        } else {
8943            None
8944        };
8945
8946        // --- Part 2: Cross-Process Links ---
8947        let cross_process_links = if cpl_enabled {
8948            // Build GoodsReceiptRef from P2P chains
8949            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8950                .p2p_chains
8951                .iter()
8952                .flat_map(|chain| {
8953                    let vendor_id = chain.purchase_order.vendor_id.clone();
8954                    let cc = chain.purchase_order.header.company_code.clone();
8955                    chain.goods_receipts.iter().flat_map(move |gr| {
8956                        gr.items.iter().filter_map({
8957                            let doc_id = gr.header.document_id.clone();
8958                            let v_id = vendor_id.clone();
8959                            let company = cc.clone();
8960                            let receipt_date = gr.header.document_date;
8961                            move |item| {
8962                                item.base
8963                                    .material_id
8964                                    .as_ref()
8965                                    .map(|mat_id| GoodsReceiptRef {
8966                                        document_id: doc_id.clone(),
8967                                        material_id: mat_id.clone(),
8968                                        quantity: item.base.quantity,
8969                                        receipt_date,
8970                                        vendor_id: v_id.clone(),
8971                                        company_code: company.clone(),
8972                                    })
8973                            }
8974                        })
8975                    })
8976                })
8977                .collect();
8978
8979            // Build DeliveryRef from O2C chains
8980            let del_refs: Vec<DeliveryRef> = document_flows
8981                .o2c_chains
8982                .iter()
8983                .flat_map(|chain| {
8984                    let customer_id = chain.sales_order.customer_id.clone();
8985                    let cc = chain.sales_order.header.company_code.clone();
8986                    chain.deliveries.iter().flat_map(move |del| {
8987                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8988                        del.items.iter().filter_map({
8989                            let doc_id = del.header.document_id.clone();
8990                            let c_id = customer_id.clone();
8991                            let company = cc.clone();
8992                            move |item| {
8993                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8994                                    document_id: doc_id.clone(),
8995                                    material_id: mat_id.clone(),
8996                                    quantity: item.base.quantity,
8997                                    delivery_date,
8998                                    customer_id: c_id.clone(),
8999                                    company_code: company.clone(),
9000                                })
9001                            }
9002                        })
9003                    })
9004                })
9005                .collect();
9006
9007            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9008            info!("Cross-process links generated: {} links", links.len());
9009            stats.cross_process_link_count = links.len();
9010            links
9011        } else {
9012            Vec::new()
9013        };
9014
9015        self.check_resources_with_log("post-entity-relationships")?;
9016        Ok((entity_graph, cross_process_links))
9017    }
9018
9019    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9020    fn phase_industry_data(
9021        &self,
9022        stats: &mut EnhancedGenerationStatistics,
9023    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9024        if !self.config.industry_specific.enabled {
9025            return None;
9026        }
9027        info!("Phase 29: Generating industry-specific data");
9028        let output = datasynth_generators::industry::factory::generate_industry_output(
9029            self.config.global.industry,
9030        );
9031        stats.industry_gl_account_count = output.gl_accounts.len();
9032        info!(
9033            "Industry data generated: {} GL accounts for {:?}",
9034            output.gl_accounts.len(),
9035            self.config.global.industry
9036        );
9037        Some(output)
9038    }
9039
9040    /// Phase 3b: Generate opening balances for each company.
9041    fn phase_opening_balances(
9042        &mut self,
9043        coa: &Arc<ChartOfAccounts>,
9044        stats: &mut EnhancedGenerationStatistics,
9045    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9046        if !self.config.balance.generate_opening_balances {
9047            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9048            return Ok(Vec::new());
9049        }
9050        info!("Phase 3b: Generating Opening Balances");
9051
9052        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9053            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9054        let fiscal_year = start_date.year();
9055
9056        let industry = match self.config.global.industry {
9057            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9058            IndustrySector::Retail => IndustryType::Retail,
9059            IndustrySector::FinancialServices => IndustryType::Financial,
9060            IndustrySector::Healthcare => IndustryType::Healthcare,
9061            IndustrySector::Technology => IndustryType::Technology,
9062            _ => IndustryType::Manufacturing,
9063        };
9064
9065        let config = datasynth_generators::OpeningBalanceConfig {
9066            industry,
9067            ..Default::default()
9068        };
9069        let mut gen =
9070            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9071
9072        let mut results = Vec::new();
9073        for company in &self.config.companies {
9074            let spec = OpeningBalanceSpec::new(
9075                company.code.clone(),
9076                start_date,
9077                fiscal_year,
9078                company.currency.clone(),
9079                rust_decimal::Decimal::new(10_000_000, 0),
9080                industry,
9081            );
9082            let ob = gen.generate(&spec, coa, start_date, &company.code);
9083            results.push(ob);
9084        }
9085
9086        stats.opening_balance_count = results.len();
9087        info!("Opening balances generated: {} companies", results.len());
9088        self.check_resources_with_log("post-opening-balances")?;
9089
9090        Ok(results)
9091    }
9092
9093    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9094    fn phase_subledger_reconciliation(
9095        &mut self,
9096        subledger: &SubledgerSnapshot,
9097        entries: &[JournalEntry],
9098        stats: &mut EnhancedGenerationStatistics,
9099    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9100        if !self.config.balance.reconcile_subledgers {
9101            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9102            return Ok(Vec::new());
9103        }
9104        info!("Phase 9b: Reconciling GL to subledger balances");
9105
9106        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9107            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9108            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9109
9110        // Build GL balance map from journal entries using a balance tracker
9111        let tracker_config = BalanceTrackerConfig {
9112            validate_on_each_entry: false,
9113            track_history: false,
9114            fail_on_validation_error: false,
9115            ..Default::default()
9116        };
9117        let recon_currency = self
9118            .config
9119            .companies
9120            .first()
9121            .map(|c| c.currency.clone())
9122            .unwrap_or_else(|| "USD".to_string());
9123        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9124        let validation_errors = tracker.apply_entries(entries);
9125        if !validation_errors.is_empty() {
9126            warn!(
9127                error_count = validation_errors.len(),
9128                "Balance tracker encountered validation errors during subledger reconciliation"
9129            );
9130            for err in &validation_errors {
9131                debug!("Balance validation error: {:?}", err);
9132            }
9133        }
9134
9135        let mut engine = datasynth_generators::ReconciliationEngine::new(
9136            datasynth_generators::ReconciliationConfig::default(),
9137        );
9138
9139        let mut results = Vec::new();
9140        let company_code = self
9141            .config
9142            .companies
9143            .first()
9144            .map(|c| c.code.as_str())
9145            .unwrap_or("1000");
9146
9147        // Reconcile AR
9148        if !subledger.ar_invoices.is_empty() {
9149            let gl_balance = tracker
9150                .get_account_balance(
9151                    company_code,
9152                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9153                )
9154                .map(|b| b.closing_balance)
9155                .unwrap_or_default();
9156            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9157            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9158        }
9159
9160        // Reconcile AP
9161        if !subledger.ap_invoices.is_empty() {
9162            let gl_balance = tracker
9163                .get_account_balance(
9164                    company_code,
9165                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9166                )
9167                .map(|b| b.closing_balance)
9168                .unwrap_or_default();
9169            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9170            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9171        }
9172
9173        // Reconcile FA
9174        if !subledger.fa_records.is_empty() {
9175            let gl_asset_balance = tracker
9176                .get_account_balance(
9177                    company_code,
9178                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9179                )
9180                .map(|b| b.closing_balance)
9181                .unwrap_or_default();
9182            let gl_accum_depr_balance = tracker
9183                .get_account_balance(
9184                    company_code,
9185                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9186                )
9187                .map(|b| b.closing_balance)
9188                .unwrap_or_default();
9189            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9190                subledger.fa_records.iter().collect();
9191            let (asset_recon, depr_recon) = engine.reconcile_fa(
9192                company_code,
9193                end_date,
9194                gl_asset_balance,
9195                gl_accum_depr_balance,
9196                &fa_refs,
9197            );
9198            results.push(asset_recon);
9199            results.push(depr_recon);
9200        }
9201
9202        // Reconcile Inventory
9203        if !subledger.inventory_positions.is_empty() {
9204            let gl_balance = tracker
9205                .get_account_balance(
9206                    company_code,
9207                    datasynth_core::accounts::control_accounts::INVENTORY,
9208                )
9209                .map(|b| b.closing_balance)
9210                .unwrap_or_default();
9211            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9212                subledger.inventory_positions.iter().collect();
9213            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9214        }
9215
9216        stats.subledger_reconciliation_count = results.len();
9217        let passed = results.iter().filter(|r| r.is_balanced()).count();
9218        let failed = results.len() - passed;
9219        info!(
9220            "Subledger reconciliation: {} checks, {} passed, {} failed",
9221            results.len(),
9222            passed,
9223            failed
9224        );
9225        self.check_resources_with_log("post-subledger-reconciliation")?;
9226
9227        Ok(results)
9228    }
9229
9230    /// Generate the chart of accounts.
9231    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9232        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9233
9234        let coa_framework = self.resolve_coa_framework();
9235
9236        let mut gen = ChartOfAccountsGenerator::new(
9237            self.config.chart_of_accounts.complexity,
9238            self.config.global.industry,
9239            self.seed,
9240        )
9241        .with_coa_framework(coa_framework);
9242
9243        let coa = Arc::new(gen.generate());
9244        self.coa = Some(Arc::clone(&coa));
9245
9246        if let Some(pb) = pb {
9247            pb.finish_with_message("Chart of Accounts complete");
9248        }
9249
9250        Ok(coa)
9251    }
9252
9253    /// Generate master data entities.
9254    fn generate_master_data(&mut self) -> SynthResult<()> {
9255        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9256            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9257        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9258
9259        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9260        let pb = self.create_progress_bar(total, "Generating Master Data");
9261
9262        // Resolve country pack once for all companies (uses primary company's country)
9263        let pack = self.primary_pack().clone();
9264
9265        // Capture config values needed inside the parallel closure
9266        let vendors_per_company = self.phase_config.vendors_per_company;
9267        let customers_per_company = self.phase_config.customers_per_company;
9268        let materials_per_company = self.phase_config.materials_per_company;
9269        let assets_per_company = self.phase_config.assets_per_company;
9270        let coa_framework = self.resolve_coa_framework();
9271
9272        // Generate all master data in parallel across companies.
9273        // Each company's data is independent, making this embarrassingly parallel.
9274        let per_company_results: Vec<_> = self
9275            .config
9276            .companies
9277            .par_iter()
9278            .enumerate()
9279            .map(|(i, company)| {
9280                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9281                let pack = pack.clone();
9282
9283                // Generate vendors (offset counter so IDs are globally unique across companies)
9284                let mut vendor_gen = VendorGenerator::new(company_seed);
9285                vendor_gen.set_country_pack(pack.clone());
9286                vendor_gen.set_coa_framework(coa_framework);
9287                vendor_gen.set_counter_offset(i * vendors_per_company);
9288                // Wire vendor network config when enabled
9289                if self.config.vendor_network.enabled {
9290                    let vn = &self.config.vendor_network;
9291                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9292                        enabled: true,
9293                        depth: vn.depth,
9294                        tier1_count: datasynth_generators::TierCountConfig::new(
9295                            vn.tier1.min,
9296                            vn.tier1.max,
9297                        ),
9298                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9299                            vn.tier2_per_parent.min,
9300                            vn.tier2_per_parent.max,
9301                        ),
9302                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9303                            vn.tier3_per_parent.min,
9304                            vn.tier3_per_parent.max,
9305                        ),
9306                        cluster_distribution: datasynth_generators::ClusterDistribution {
9307                            reliable_strategic: vn.clusters.reliable_strategic,
9308                            standard_operational: vn.clusters.standard_operational,
9309                            transactional: vn.clusters.transactional,
9310                            problematic: vn.clusters.problematic,
9311                        },
9312                        concentration_limits: datasynth_generators::ConcentrationLimits {
9313                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9314                            max_top5: vn.dependencies.top_5_concentration,
9315                        },
9316                        ..datasynth_generators::VendorNetworkConfig::default()
9317                    });
9318                }
9319                let vendor_pool =
9320                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9321
9322                // Generate customers (offset counter so IDs are globally unique across companies)
9323                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9324                customer_gen.set_country_pack(pack.clone());
9325                customer_gen.set_coa_framework(coa_framework);
9326                customer_gen.set_counter_offset(i * customers_per_company);
9327                // Wire customer segmentation config when enabled
9328                if self.config.customer_segmentation.enabled {
9329                    let cs = &self.config.customer_segmentation;
9330                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9331                        enabled: true,
9332                        segment_distribution: datasynth_generators::SegmentDistribution {
9333                            enterprise: cs.value_segments.enterprise.customer_share,
9334                            mid_market: cs.value_segments.mid_market.customer_share,
9335                            smb: cs.value_segments.smb.customer_share,
9336                            consumer: cs.value_segments.consumer.customer_share,
9337                        },
9338                        referral_config: datasynth_generators::ReferralConfig {
9339                            enabled: cs.networks.referrals.enabled,
9340                            referral_rate: cs.networks.referrals.referral_rate,
9341                            ..Default::default()
9342                        },
9343                        hierarchy_config: datasynth_generators::HierarchyConfig {
9344                            enabled: cs.networks.corporate_hierarchies.enabled,
9345                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9346                            ..Default::default()
9347                        },
9348                        ..Default::default()
9349                    };
9350                    customer_gen.set_segmentation_config(seg_cfg);
9351                }
9352                let customer_pool = customer_gen.generate_customer_pool(
9353                    customers_per_company,
9354                    &company.code,
9355                    start_date,
9356                );
9357
9358                // Generate materials (offset counter so IDs are globally unique across companies)
9359                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9360                material_gen.set_country_pack(pack.clone());
9361                material_gen.set_counter_offset(i * materials_per_company);
9362                let material_pool = material_gen.generate_material_pool(
9363                    materials_per_company,
9364                    &company.code,
9365                    start_date,
9366                );
9367
9368                // Generate fixed assets
9369                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9370                let asset_pool = asset_gen.generate_asset_pool(
9371                    assets_per_company,
9372                    &company.code,
9373                    (start_date, end_date),
9374                );
9375
9376                // Generate employees
9377                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9378                employee_gen.set_country_pack(pack);
9379                let employee_pool =
9380                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9381
9382                // Generate employee change history (2-5 events per employee)
9383                let employee_change_history =
9384                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9385
9386                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9387                let employee_ids: Vec<String> = employee_pool
9388                    .employees
9389                    .iter()
9390                    .map(|e| e.employee_id.clone())
9391                    .collect();
9392                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9393                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9394
9395                (
9396                    vendor_pool.vendors,
9397                    customer_pool.customers,
9398                    material_pool.materials,
9399                    asset_pool.assets,
9400                    employee_pool.employees,
9401                    employee_change_history,
9402                    cost_centers,
9403                )
9404            })
9405            .collect();
9406
9407        // Aggregate results from all companies
9408        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9409            per_company_results
9410        {
9411            self.master_data.vendors.extend(vendors);
9412            self.master_data.customers.extend(customers);
9413            self.master_data.materials.extend(materials);
9414            self.master_data.assets.extend(assets);
9415            self.master_data.employees.extend(employees);
9416            self.master_data.cost_centers.extend(cost_centers);
9417            self.master_data
9418                .employee_change_history
9419                .extend(change_history);
9420        }
9421
9422        if let Some(pb) = &pb {
9423            pb.inc(total);
9424        }
9425        if let Some(pb) = pb {
9426            pb.finish_with_message("Master data generation complete");
9427        }
9428
9429        Ok(())
9430    }
9431
9432    /// Generate document flows (P2P and O2C).
9433    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9434        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9435            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9436
9437        // Generate P2P chains
9438        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9439        let months = (self.config.global.period_months as usize).max(1);
9440        let p2p_count = self
9441            .phase_config
9442            .p2p_chains
9443            .min(self.master_data.vendors.len() * 2 * months);
9444        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9445
9446        // Convert P2P config from schema to generator config
9447        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9448        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9449        p2p_gen.set_country_pack(self.primary_pack().clone());
9450
9451        for i in 0..p2p_count {
9452            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9453            let materials: Vec<&Material> = self
9454                .master_data
9455                .materials
9456                .iter()
9457                .skip(i % self.master_data.materials.len().max(1))
9458                .take(2.min(self.master_data.materials.len()))
9459                .collect();
9460
9461            if materials.is_empty() {
9462                continue;
9463            }
9464
9465            let company = &self.config.companies[i % self.config.companies.len()];
9466            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9467            let fiscal_period = po_date.month() as u8;
9468            let created_by = if self.master_data.employees.is_empty() {
9469                "SYSTEM"
9470            } else {
9471                self.master_data.employees[i % self.master_data.employees.len()]
9472                    .user_id
9473                    .as_str()
9474            };
9475
9476            let chain = p2p_gen.generate_chain(
9477                &company.code,
9478                vendor,
9479                &materials,
9480                po_date,
9481                start_date.year() as u16,
9482                fiscal_period,
9483                created_by,
9484            );
9485
9486            // Flatten documents
9487            flows.purchase_orders.push(chain.purchase_order.clone());
9488            flows.goods_receipts.extend(chain.goods_receipts.clone());
9489            if let Some(vi) = &chain.vendor_invoice {
9490                flows.vendor_invoices.push(vi.clone());
9491            }
9492            if let Some(payment) = &chain.payment {
9493                flows.payments.push(payment.clone());
9494            }
9495            for remainder in &chain.remainder_payments {
9496                flows.payments.push(remainder.clone());
9497            }
9498            flows.p2p_chains.push(chain);
9499
9500            if let Some(pb) = &pb {
9501                pb.inc(1);
9502            }
9503        }
9504
9505        if let Some(pb) = pb {
9506            pb.finish_with_message("P2P document flows complete");
9507        }
9508
9509        // Generate O2C chains
9510        // Cap at ~2 SOs per customer per month to keep order volume realistic
9511        let o2c_count = self
9512            .phase_config
9513            .o2c_chains
9514            .min(self.master_data.customers.len() * 2 * months);
9515        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9516
9517        // Convert O2C config from schema to generator config
9518        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9519        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9520        o2c_gen.set_country_pack(self.primary_pack().clone());
9521
9522        for i in 0..o2c_count {
9523            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9524            let materials: Vec<&Material> = self
9525                .master_data
9526                .materials
9527                .iter()
9528                .skip(i % self.master_data.materials.len().max(1))
9529                .take(2.min(self.master_data.materials.len()))
9530                .collect();
9531
9532            if materials.is_empty() {
9533                continue;
9534            }
9535
9536            let company = &self.config.companies[i % self.config.companies.len()];
9537            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9538            let fiscal_period = so_date.month() as u8;
9539            let created_by = if self.master_data.employees.is_empty() {
9540                "SYSTEM"
9541            } else {
9542                self.master_data.employees[i % self.master_data.employees.len()]
9543                    .user_id
9544                    .as_str()
9545            };
9546
9547            let chain = o2c_gen.generate_chain(
9548                &company.code,
9549                customer,
9550                &materials,
9551                so_date,
9552                start_date.year() as u16,
9553                fiscal_period,
9554                created_by,
9555            );
9556
9557            // Flatten documents
9558            flows.sales_orders.push(chain.sales_order.clone());
9559            flows.deliveries.extend(chain.deliveries.clone());
9560            if let Some(ci) = &chain.customer_invoice {
9561                flows.customer_invoices.push(ci.clone());
9562            }
9563            if let Some(receipt) = &chain.customer_receipt {
9564                flows.payments.push(receipt.clone());
9565            }
9566            // Extract remainder receipts (follow-up to partial payments)
9567            for receipt in &chain.remainder_receipts {
9568                flows.payments.push(receipt.clone());
9569            }
9570            flows.o2c_chains.push(chain);
9571
9572            if let Some(pb) = &pb {
9573                pb.inc(1);
9574            }
9575        }
9576
9577        if let Some(pb) = pb {
9578            pb.finish_with_message("O2C document flows complete");
9579        }
9580
9581        // Collect all document cross-references from document headers.
9582        // Each document embeds references to its predecessor(s) via add_reference(); here we
9583        // denormalise them into a flat list for the document_references.json output file.
9584        {
9585            let mut refs = Vec::new();
9586            for doc in &flows.purchase_orders {
9587                refs.extend(doc.header.document_references.iter().cloned());
9588            }
9589            for doc in &flows.goods_receipts {
9590                refs.extend(doc.header.document_references.iter().cloned());
9591            }
9592            for doc in &flows.vendor_invoices {
9593                refs.extend(doc.header.document_references.iter().cloned());
9594            }
9595            for doc in &flows.sales_orders {
9596                refs.extend(doc.header.document_references.iter().cloned());
9597            }
9598            for doc in &flows.deliveries {
9599                refs.extend(doc.header.document_references.iter().cloned());
9600            }
9601            for doc in &flows.customer_invoices {
9602                refs.extend(doc.header.document_references.iter().cloned());
9603            }
9604            for doc in &flows.payments {
9605                refs.extend(doc.header.document_references.iter().cloned());
9606            }
9607            debug!(
9608                "Collected {} document cross-references from document headers",
9609                refs.len()
9610            );
9611            flows.document_references = refs;
9612        }
9613
9614        Ok(())
9615    }
9616
9617    /// Generate journal entries using parallel generation across multiple cores.
9618    fn generate_journal_entries(
9619        &mut self,
9620        coa: &Arc<ChartOfAccounts>,
9621    ) -> SynthResult<Vec<JournalEntry>> {
9622        use datasynth_core::traits::ParallelGenerator;
9623
9624        let total = self.calculate_total_transactions();
9625        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9626
9627        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9628            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9629        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9630
9631        let company_codes: Vec<String> = self
9632            .config
9633            .companies
9634            .iter()
9635            .map(|c| c.code.clone())
9636            .collect();
9637
9638        let generator = JournalEntryGenerator::new_with_params(
9639            self.config.transactions.clone(),
9640            Arc::clone(coa),
9641            company_codes,
9642            start_date,
9643            end_date,
9644            self.seed,
9645        );
9646
9647        // Connect generated master data to ensure JEs reference real entities
9648        // Enable persona-based error injection for realistic human behavior
9649        // Pass fraud configuration for fraud injection
9650        let je_pack = self.primary_pack();
9651
9652        let mut generator = generator
9653            .with_master_data(
9654                &self.master_data.vendors,
9655                &self.master_data.customers,
9656                &self.master_data.materials,
9657            )
9658            .with_country_pack_names(je_pack)
9659            .with_country_pack_temporal(
9660                self.config.temporal_patterns.clone(),
9661                self.seed + 200,
9662                je_pack,
9663            )
9664            .with_persona_errors(true)
9665            .with_fraud_config(self.config.fraud.clone());
9666
9667        // Apply temporal drift if configured
9668        if self.config.temporal.enabled {
9669            let drift_config = self.config.temporal.to_core_config();
9670            generator = generator.with_drift_config(drift_config, self.seed + 100);
9671        }
9672
9673        // Check memory limit at start
9674        self.check_memory_limit()?;
9675
9676        // Determine parallelism: use available cores, but cap at total entries
9677        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9678
9679        // Use parallel generation for datasets with 10K+ entries.
9680        // Below this threshold, the statistical properties of a single-seeded
9681        // generator (e.g. Benford compliance) are better preserved.
9682        let entries = if total >= 10_000 && num_threads > 1 {
9683            // Parallel path: split the generator across cores and generate in parallel.
9684            // Each sub-generator gets a unique seed for deterministic, independent generation.
9685            let sub_generators = generator.split(num_threads);
9686            let entries_per_thread = total as usize / num_threads;
9687            let remainder = total as usize % num_threads;
9688
9689            let batches: Vec<Vec<JournalEntry>> = sub_generators
9690                .into_par_iter()
9691                .enumerate()
9692                .map(|(i, mut gen)| {
9693                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9694                    gen.generate_batch(count)
9695                })
9696                .collect();
9697
9698            // Merge all batches into a single Vec
9699            let entries = JournalEntryGenerator::merge_results(batches);
9700
9701            if let Some(pb) = &pb {
9702                pb.inc(total);
9703            }
9704            entries
9705        } else {
9706            // Sequential path for small datasets (< 1000 entries)
9707            let mut entries = Vec::with_capacity(total as usize);
9708            for _ in 0..total {
9709                let entry = generator.generate();
9710                entries.push(entry);
9711                if let Some(pb) = &pb {
9712                    pb.inc(1);
9713                }
9714            }
9715            entries
9716        };
9717
9718        if let Some(pb) = pb {
9719            pb.finish_with_message("Journal entries complete");
9720        }
9721
9722        Ok(entries)
9723    }
9724
9725    /// Generate journal entries from document flows.
9726    ///
9727    /// This creates proper GL entries for each document in the P2P and O2C flows,
9728    /// ensuring that document activity is reflected in the general ledger.
9729    fn generate_jes_from_document_flows(
9730        &mut self,
9731        flows: &DocumentFlowSnapshot,
9732    ) -> SynthResult<Vec<JournalEntry>> {
9733        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9734        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9735
9736        let je_config = match self.resolve_coa_framework() {
9737            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9738            CoAFramework::GermanSkr04 => {
9739                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9740                DocumentFlowJeConfig::from(&fa)
9741            }
9742            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9743        };
9744
9745        let populate_fec = je_config.populate_fec_fields;
9746        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9747
9748        // Build auxiliary account lookup from vendor/customer master data so that
9749        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9750        // PCG "4010001") instead of raw partner IDs.
9751        if populate_fec {
9752            let mut aux_lookup = std::collections::HashMap::new();
9753            for vendor in &self.master_data.vendors {
9754                if let Some(ref aux) = vendor.auxiliary_gl_account {
9755                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9756                }
9757            }
9758            for customer in &self.master_data.customers {
9759                if let Some(ref aux) = customer.auxiliary_gl_account {
9760                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9761                }
9762            }
9763            if !aux_lookup.is_empty() {
9764                generator.set_auxiliary_account_lookup(aux_lookup);
9765            }
9766        }
9767
9768        let mut entries = Vec::new();
9769
9770        // Generate JEs from P2P chains
9771        for chain in &flows.p2p_chains {
9772            let chain_entries = generator.generate_from_p2p_chain(chain);
9773            entries.extend(chain_entries);
9774            if let Some(pb) = &pb {
9775                pb.inc(1);
9776            }
9777        }
9778
9779        // Generate JEs from O2C chains
9780        for chain in &flows.o2c_chains {
9781            let chain_entries = generator.generate_from_o2c_chain(chain);
9782            entries.extend(chain_entries);
9783            if let Some(pb) = &pb {
9784                pb.inc(1);
9785            }
9786        }
9787
9788        if let Some(pb) = pb {
9789            pb.finish_with_message(format!(
9790                "Generated {} JEs from document flows",
9791                entries.len()
9792            ));
9793        }
9794
9795        Ok(entries)
9796    }
9797
9798    /// Generate journal entries from payroll runs.
9799    ///
9800    /// Creates one JE per payroll run:
9801    /// - DR Salaries & Wages (6100) for gross pay
9802    /// - CR Payroll Clearing (9100) for gross pay
9803    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9804        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9805
9806        let mut jes = Vec::with_capacity(payroll_runs.len());
9807
9808        for run in payroll_runs {
9809            let mut je = JournalEntry::new_simple(
9810                format!("JE-PAYROLL-{}", run.payroll_id),
9811                run.company_code.clone(),
9812                run.run_date,
9813                format!("Payroll {}", run.payroll_id),
9814            );
9815
9816            // Debit Salaries & Wages for gross pay
9817            je.add_line(JournalEntryLine {
9818                line_number: 1,
9819                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9820                debit_amount: run.total_gross,
9821                reference: Some(run.payroll_id.clone()),
9822                text: Some(format!(
9823                    "Payroll {} ({} employees)",
9824                    run.payroll_id, run.employee_count
9825                )),
9826                ..Default::default()
9827            });
9828
9829            // Credit Payroll Clearing for gross pay
9830            je.add_line(JournalEntryLine {
9831                line_number: 2,
9832                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9833                credit_amount: run.total_gross,
9834                reference: Some(run.payroll_id.clone()),
9835                ..Default::default()
9836            });
9837
9838            jes.push(je);
9839        }
9840
9841        jes
9842    }
9843
9844    /// Link document flows to subledger records.
9845    ///
9846    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9847    /// ensuring subledger data is coherent with document flow data.
9848    fn link_document_flows_to_subledgers(
9849        &mut self,
9850        flows: &DocumentFlowSnapshot,
9851    ) -> SynthResult<SubledgerSnapshot> {
9852        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9853        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9854
9855        // Build vendor/customer name maps from master data for realistic subledger names
9856        let vendor_names: std::collections::HashMap<String, String> = self
9857            .master_data
9858            .vendors
9859            .iter()
9860            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9861            .collect();
9862        let customer_names: std::collections::HashMap<String, String> = self
9863            .master_data
9864            .customers
9865            .iter()
9866            .map(|c| (c.customer_id.clone(), c.name.clone()))
9867            .collect();
9868
9869        let mut linker = DocumentFlowLinker::new()
9870            .with_vendor_names(vendor_names)
9871            .with_customer_names(customer_names);
9872
9873        // Convert vendor invoices to AP invoices
9874        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9875        if let Some(pb) = &pb {
9876            pb.inc(flows.vendor_invoices.len() as u64);
9877        }
9878
9879        // Convert customer invoices to AR invoices
9880        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9881        if let Some(pb) = &pb {
9882            pb.inc(flows.customer_invoices.len() as u64);
9883        }
9884
9885        if let Some(pb) = pb {
9886            pb.finish_with_message(format!(
9887                "Linked {} AP and {} AR invoices",
9888                ap_invoices.len(),
9889                ar_invoices.len()
9890            ));
9891        }
9892
9893        Ok(SubledgerSnapshot {
9894            ap_invoices,
9895            ar_invoices,
9896            fa_records: Vec::new(),
9897            inventory_positions: Vec::new(),
9898            inventory_movements: Vec::new(),
9899            // Aging reports are computed after payment settlement in phase_document_flows.
9900            ar_aging_reports: Vec::new(),
9901            ap_aging_reports: Vec::new(),
9902            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9903            depreciation_runs: Vec::new(),
9904            inventory_valuations: Vec::new(),
9905            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9906            dunning_runs: Vec::new(),
9907            dunning_letters: Vec::new(),
9908        })
9909    }
9910
9911    /// Generate OCPM events from document flows.
9912    ///
9913    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9914    /// capturing the object-centric process perspective.
9915    #[allow(clippy::too_many_arguments)]
9916    fn generate_ocpm_events(
9917        &mut self,
9918        flows: &DocumentFlowSnapshot,
9919        sourcing: &SourcingSnapshot,
9920        hr: &HrSnapshot,
9921        manufacturing: &ManufacturingSnapshot,
9922        banking: &BankingSnapshot,
9923        audit: &AuditSnapshot,
9924        financial_reporting: &FinancialReportingSnapshot,
9925    ) -> SynthResult<OcpmSnapshot> {
9926        let total_chains = flows.p2p_chains.len()
9927            + flows.o2c_chains.len()
9928            + sourcing.sourcing_projects.len()
9929            + hr.payroll_runs.len()
9930            + manufacturing.production_orders.len()
9931            + banking.customers.len()
9932            + audit.engagements.len()
9933            + financial_reporting.bank_reconciliations.len();
9934        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9935
9936        // Create OCPM event log with standard types
9937        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9938        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9939
9940        // Configure the OCPM generator
9941        let ocpm_config = OcpmGeneratorConfig {
9942            generate_p2p: true,
9943            generate_o2c: true,
9944            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9945            generate_h2r: !hr.payroll_runs.is_empty(),
9946            generate_mfg: !manufacturing.production_orders.is_empty(),
9947            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9948            generate_bank: !banking.customers.is_empty(),
9949            generate_audit: !audit.engagements.is_empty(),
9950            happy_path_rate: 0.75,
9951            exception_path_rate: 0.20,
9952            error_path_rate: 0.05,
9953            add_duration_variability: true,
9954            duration_std_dev_factor: 0.3,
9955        };
9956        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9957        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9958
9959        // Get available users for resource assignment
9960        let available_users: Vec<String> = self
9961            .master_data
9962            .employees
9963            .iter()
9964            .take(20)
9965            .map(|e| e.user_id.clone())
9966            .collect();
9967
9968        // Deterministic base date from config (avoids Utc::now() non-determinism)
9969        let fallback_date =
9970            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9971        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9972            .unwrap_or(fallback_date);
9973        let base_midnight = base_date
9974            .and_hms_opt(0, 0, 0)
9975            .expect("midnight is always valid");
9976        let base_datetime =
9977            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9978
9979        // Helper closure to add case results to event log
9980        let add_result = |event_log: &mut OcpmEventLog,
9981                          result: datasynth_ocpm::CaseGenerationResult| {
9982            for event in result.events {
9983                event_log.add_event(event);
9984            }
9985            for object in result.objects {
9986                event_log.add_object(object);
9987            }
9988            for relationship in result.relationships {
9989                event_log.add_relationship(relationship);
9990            }
9991            for corr in result.correlation_events {
9992                event_log.add_correlation_event(corr);
9993            }
9994            event_log.add_case(result.case_trace);
9995        };
9996
9997        // Generate events from P2P chains
9998        for chain in &flows.p2p_chains {
9999            let po = &chain.purchase_order;
10000            let documents = P2pDocuments::new(
10001                &po.header.document_id,
10002                &po.vendor_id,
10003                &po.header.company_code,
10004                po.total_net_amount,
10005                &po.header.currency,
10006                &ocpm_uuid_factory,
10007            )
10008            .with_goods_receipt(
10009                chain
10010                    .goods_receipts
10011                    .first()
10012                    .map(|gr| gr.header.document_id.as_str())
10013                    .unwrap_or(""),
10014                &ocpm_uuid_factory,
10015            )
10016            .with_invoice(
10017                chain
10018                    .vendor_invoice
10019                    .as_ref()
10020                    .map(|vi| vi.header.document_id.as_str())
10021                    .unwrap_or(""),
10022                &ocpm_uuid_factory,
10023            )
10024            .with_payment(
10025                chain
10026                    .payment
10027                    .as_ref()
10028                    .map(|p| p.header.document_id.as_str())
10029                    .unwrap_or(""),
10030                &ocpm_uuid_factory,
10031            );
10032
10033            let start_time =
10034                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10035            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10036            add_result(&mut event_log, result);
10037
10038            if let Some(pb) = &pb {
10039                pb.inc(1);
10040            }
10041        }
10042
10043        // Generate events from O2C chains
10044        for chain in &flows.o2c_chains {
10045            let so = &chain.sales_order;
10046            let documents = O2cDocuments::new(
10047                &so.header.document_id,
10048                &so.customer_id,
10049                &so.header.company_code,
10050                so.total_net_amount,
10051                &so.header.currency,
10052                &ocpm_uuid_factory,
10053            )
10054            .with_delivery(
10055                chain
10056                    .deliveries
10057                    .first()
10058                    .map(|d| d.header.document_id.as_str())
10059                    .unwrap_or(""),
10060                &ocpm_uuid_factory,
10061            )
10062            .with_invoice(
10063                chain
10064                    .customer_invoice
10065                    .as_ref()
10066                    .map(|ci| ci.header.document_id.as_str())
10067                    .unwrap_or(""),
10068                &ocpm_uuid_factory,
10069            )
10070            .with_receipt(
10071                chain
10072                    .customer_receipt
10073                    .as_ref()
10074                    .map(|r| r.header.document_id.as_str())
10075                    .unwrap_or(""),
10076                &ocpm_uuid_factory,
10077            );
10078
10079            let start_time =
10080                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10081            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10082            add_result(&mut event_log, result);
10083
10084            if let Some(pb) = &pb {
10085                pb.inc(1);
10086            }
10087        }
10088
10089        // Generate events from S2C sourcing projects
10090        for project in &sourcing.sourcing_projects {
10091            // Find vendor from contracts or qualifications
10092            let vendor_id = sourcing
10093                .contracts
10094                .iter()
10095                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10096                .map(|c| c.vendor_id.clone())
10097                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10098                .or_else(|| {
10099                    self.master_data
10100                        .vendors
10101                        .first()
10102                        .map(|v| v.vendor_id.clone())
10103                })
10104                .unwrap_or_else(|| "V000".to_string());
10105            let mut docs = S2cDocuments::new(
10106                &project.project_id,
10107                &vendor_id,
10108                &project.company_code,
10109                project.estimated_annual_spend,
10110                &ocpm_uuid_factory,
10111            );
10112            // Link RFx if available
10113            if let Some(rfx) = sourcing
10114                .rfx_events
10115                .iter()
10116                .find(|r| r.sourcing_project_id == project.project_id)
10117            {
10118                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10119                // Link winning bid (status == Accepted)
10120                if let Some(bid) = sourcing.bids.iter().find(|b| {
10121                    b.rfx_id == rfx.rfx_id
10122                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10123                }) {
10124                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10125                }
10126            }
10127            // Link contract
10128            if let Some(contract) = sourcing
10129                .contracts
10130                .iter()
10131                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10132            {
10133                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10134            }
10135            let start_time = base_datetime - chrono::Duration::days(90);
10136            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10137            add_result(&mut event_log, result);
10138
10139            if let Some(pb) = &pb {
10140                pb.inc(1);
10141            }
10142        }
10143
10144        // Generate events from H2R payroll runs
10145        for run in &hr.payroll_runs {
10146            // Use first matching payroll line item's employee, or fallback
10147            let employee_id = hr
10148                .payroll_line_items
10149                .iter()
10150                .find(|li| li.payroll_id == run.payroll_id)
10151                .map(|li| li.employee_id.as_str())
10152                .unwrap_or("EMP000");
10153            let docs = H2rDocuments::new(
10154                &run.payroll_id,
10155                employee_id,
10156                &run.company_code,
10157                run.total_gross,
10158                &ocpm_uuid_factory,
10159            )
10160            .with_time_entries(
10161                hr.time_entries
10162                    .iter()
10163                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10164                    .take(5)
10165                    .map(|t| t.entry_id.as_str())
10166                    .collect(),
10167            );
10168            let start_time = base_datetime - chrono::Duration::days(30);
10169            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10170            add_result(&mut event_log, result);
10171
10172            if let Some(pb) = &pb {
10173                pb.inc(1);
10174            }
10175        }
10176
10177        // Generate events from MFG production orders
10178        for order in &manufacturing.production_orders {
10179            let mut docs = MfgDocuments::new(
10180                &order.order_id,
10181                &order.material_id,
10182                &order.company_code,
10183                order.planned_quantity,
10184                &ocpm_uuid_factory,
10185            )
10186            .with_operations(
10187                order
10188                    .operations
10189                    .iter()
10190                    .map(|o| format!("OP-{:04}", o.operation_number))
10191                    .collect::<Vec<_>>()
10192                    .iter()
10193                    .map(std::string::String::as_str)
10194                    .collect(),
10195            );
10196            // Link quality inspection if available (via reference_id matching order_id)
10197            if let Some(insp) = manufacturing
10198                .quality_inspections
10199                .iter()
10200                .find(|i| i.reference_id == order.order_id)
10201            {
10202                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10203            }
10204            // Link cycle count if available (match by material_id in items)
10205            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10206                cc.items
10207                    .iter()
10208                    .any(|item| item.material_id == order.material_id)
10209            }) {
10210                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10211            }
10212            let start_time = base_datetime - chrono::Duration::days(60);
10213            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10214            add_result(&mut event_log, result);
10215
10216            if let Some(pb) = &pb {
10217                pb.inc(1);
10218            }
10219        }
10220
10221        // Generate events from Banking customers
10222        for customer in &banking.customers {
10223            let customer_id_str = customer.customer_id.to_string();
10224            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10225            // Link accounts (primary_owner_id matches customer_id)
10226            if let Some(account) = banking
10227                .accounts
10228                .iter()
10229                .find(|a| a.primary_owner_id == customer.customer_id)
10230            {
10231                let account_id_str = account.account_id.to_string();
10232                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10233                // Link transactions for this account
10234                let txn_strs: Vec<String> = banking
10235                    .transactions
10236                    .iter()
10237                    .filter(|t| t.account_id == account.account_id)
10238                    .take(10)
10239                    .map(|t| t.transaction_id.to_string())
10240                    .collect();
10241                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10242                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10243                    .transactions
10244                    .iter()
10245                    .filter(|t| t.account_id == account.account_id)
10246                    .take(10)
10247                    .map(|t| t.amount)
10248                    .collect();
10249                if !txn_ids.is_empty() {
10250                    docs = docs.with_transactions(txn_ids, txn_amounts);
10251                }
10252            }
10253            let start_time = base_datetime - chrono::Duration::days(180);
10254            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10255            add_result(&mut event_log, result);
10256
10257            if let Some(pb) = &pb {
10258                pb.inc(1);
10259            }
10260        }
10261
10262        // Generate events from Audit engagements
10263        for engagement in &audit.engagements {
10264            let engagement_id_str = engagement.engagement_id.to_string();
10265            let docs = AuditDocuments::new(
10266                &engagement_id_str,
10267                &engagement.client_entity_id,
10268                &ocpm_uuid_factory,
10269            )
10270            .with_workpapers(
10271                audit
10272                    .workpapers
10273                    .iter()
10274                    .filter(|w| w.engagement_id == engagement.engagement_id)
10275                    .take(10)
10276                    .map(|w| w.workpaper_id.to_string())
10277                    .collect::<Vec<_>>()
10278                    .iter()
10279                    .map(std::string::String::as_str)
10280                    .collect(),
10281            )
10282            .with_evidence(
10283                audit
10284                    .evidence
10285                    .iter()
10286                    .filter(|e| e.engagement_id == engagement.engagement_id)
10287                    .take(10)
10288                    .map(|e| e.evidence_id.to_string())
10289                    .collect::<Vec<_>>()
10290                    .iter()
10291                    .map(std::string::String::as_str)
10292                    .collect(),
10293            )
10294            .with_risks(
10295                audit
10296                    .risk_assessments
10297                    .iter()
10298                    .filter(|r| r.engagement_id == engagement.engagement_id)
10299                    .take(5)
10300                    .map(|r| r.risk_id.to_string())
10301                    .collect::<Vec<_>>()
10302                    .iter()
10303                    .map(std::string::String::as_str)
10304                    .collect(),
10305            )
10306            .with_findings(
10307                audit
10308                    .findings
10309                    .iter()
10310                    .filter(|f| f.engagement_id == engagement.engagement_id)
10311                    .take(5)
10312                    .map(|f| f.finding_id.to_string())
10313                    .collect::<Vec<_>>()
10314                    .iter()
10315                    .map(std::string::String::as_str)
10316                    .collect(),
10317            )
10318            .with_judgments(
10319                audit
10320                    .judgments
10321                    .iter()
10322                    .filter(|j| j.engagement_id == engagement.engagement_id)
10323                    .take(5)
10324                    .map(|j| j.judgment_id.to_string())
10325                    .collect::<Vec<_>>()
10326                    .iter()
10327                    .map(std::string::String::as_str)
10328                    .collect(),
10329            );
10330            let start_time = base_datetime - chrono::Duration::days(120);
10331            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10332            add_result(&mut event_log, result);
10333
10334            if let Some(pb) = &pb {
10335                pb.inc(1);
10336            }
10337        }
10338
10339        // Generate events from Bank Reconciliations
10340        for recon in &financial_reporting.bank_reconciliations {
10341            let docs = BankReconDocuments::new(
10342                &recon.reconciliation_id,
10343                &recon.bank_account_id,
10344                &recon.company_code,
10345                recon.bank_ending_balance,
10346                &ocpm_uuid_factory,
10347            )
10348            .with_statement_lines(
10349                recon
10350                    .statement_lines
10351                    .iter()
10352                    .take(20)
10353                    .map(|l| l.line_id.as_str())
10354                    .collect(),
10355            )
10356            .with_reconciling_items(
10357                recon
10358                    .reconciling_items
10359                    .iter()
10360                    .take(10)
10361                    .map(|i| i.item_id.as_str())
10362                    .collect(),
10363            );
10364            let start_time = base_datetime - chrono::Duration::days(30);
10365            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10366            add_result(&mut event_log, result);
10367
10368            if let Some(pb) = &pb {
10369                pb.inc(1);
10370            }
10371        }
10372
10373        // Compute process variants
10374        event_log.compute_variants();
10375
10376        let summary = event_log.summary();
10377
10378        if let Some(pb) = pb {
10379            pb.finish_with_message(format!(
10380                "Generated {} OCPM events, {} objects",
10381                summary.event_count, summary.object_count
10382            ));
10383        }
10384
10385        Ok(OcpmSnapshot {
10386            event_count: summary.event_count,
10387            object_count: summary.object_count,
10388            case_count: summary.case_count,
10389            event_log: Some(event_log),
10390        })
10391    }
10392
10393    /// Inject anomalies into journal entries.
10394    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10395        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10396
10397        // Read anomaly rates from config instead of using hardcoded values.
10398        // Priority: anomaly_injection config > fraud config > default 0.02
10399        let total_rate = if self.config.anomaly_injection.enabled {
10400            self.config.anomaly_injection.rates.total_rate
10401        } else if self.config.fraud.enabled {
10402            self.config.fraud.fraud_rate
10403        } else {
10404            0.02
10405        };
10406
10407        let fraud_rate = if self.config.anomaly_injection.enabled {
10408            self.config.anomaly_injection.rates.fraud_rate
10409        } else {
10410            AnomalyRateConfig::default().fraud_rate
10411        };
10412
10413        let error_rate = if self.config.anomaly_injection.enabled {
10414            self.config.anomaly_injection.rates.error_rate
10415        } else {
10416            AnomalyRateConfig::default().error_rate
10417        };
10418
10419        let process_issue_rate = if self.config.anomaly_injection.enabled {
10420            self.config.anomaly_injection.rates.process_rate
10421        } else {
10422            AnomalyRateConfig::default().process_issue_rate
10423        };
10424
10425        let anomaly_config = AnomalyInjectorConfig {
10426            rates: AnomalyRateConfig {
10427                total_rate,
10428                fraud_rate,
10429                error_rate,
10430                process_issue_rate,
10431                ..Default::default()
10432            },
10433            seed: self.seed + 5000,
10434            ..Default::default()
10435        };
10436
10437        let mut injector = AnomalyInjector::new(anomaly_config);
10438        let result = injector.process_entries(entries);
10439
10440        if let Some(pb) = &pb {
10441            pb.inc(entries.len() as u64);
10442            pb.finish_with_message("Anomaly injection complete");
10443        }
10444
10445        let mut by_type = HashMap::new();
10446        for label in &result.labels {
10447            *by_type
10448                .entry(format!("{:?}", label.anomaly_type))
10449                .or_insert(0) += 1;
10450        }
10451
10452        Ok(AnomalyLabels {
10453            labels: result.labels,
10454            summary: Some(result.summary),
10455            by_type,
10456        })
10457    }
10458
10459    /// Validate journal entries using running balance tracker.
10460    ///
10461    /// Applies all entries to the balance tracker and validates:
10462    /// - Each entry is internally balanced (debits = credits)
10463    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10464    ///
10465    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10466    /// excluded from balance validation as they may be intentionally unbalanced.
10467    fn validate_journal_entries(
10468        &mut self,
10469        entries: &[JournalEntry],
10470    ) -> SynthResult<BalanceValidationResult> {
10471        // Filter out entries with human errors as they may be intentionally unbalanced
10472        let clean_entries: Vec<&JournalEntry> = entries
10473            .iter()
10474            .filter(|e| {
10475                e.header
10476                    .header_text
10477                    .as_ref()
10478                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10479                    .unwrap_or(true)
10480            })
10481            .collect();
10482
10483        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10484
10485        // Configure tracker to not fail on errors (collect them instead)
10486        let config = BalanceTrackerConfig {
10487            validate_on_each_entry: false,   // We'll validate at the end
10488            track_history: false,            // Skip history for performance
10489            fail_on_validation_error: false, // Collect errors, don't fail
10490            ..Default::default()
10491        };
10492        let validation_currency = self
10493            .config
10494            .companies
10495            .first()
10496            .map(|c| c.currency.clone())
10497            .unwrap_or_else(|| "USD".to_string());
10498
10499        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10500
10501        // Apply clean entries (without human errors)
10502        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10503        let errors = tracker.apply_entries(&clean_refs);
10504
10505        if let Some(pb) = &pb {
10506            pb.inc(entries.len() as u64);
10507        }
10508
10509        // Check if any entries were unbalanced
10510        // Note: When fail_on_validation_error is false, errors are stored in tracker
10511        let has_unbalanced = tracker
10512            .get_validation_errors()
10513            .iter()
10514            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10515
10516        // Validate balance sheet for each company
10517        // Include both returned errors and collected validation errors
10518        let mut all_errors = errors;
10519        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10520        let company_codes: Vec<String> = self
10521            .config
10522            .companies
10523            .iter()
10524            .map(|c| c.code.clone())
10525            .collect();
10526
10527        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10528            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10529            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10530
10531        for company_code in &company_codes {
10532            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10533                all_errors.push(e);
10534            }
10535        }
10536
10537        // Get statistics after all mutable operations are done
10538        let stats = tracker.get_statistics();
10539
10540        // Determine if balanced overall
10541        let is_balanced = all_errors.is_empty();
10542
10543        if let Some(pb) = pb {
10544            let msg = if is_balanced {
10545                "Balance validation passed"
10546            } else {
10547                "Balance validation completed with errors"
10548            };
10549            pb.finish_with_message(msg);
10550        }
10551
10552        Ok(BalanceValidationResult {
10553            validated: true,
10554            is_balanced,
10555            entries_processed: stats.entries_processed,
10556            total_debits: stats.total_debits,
10557            total_credits: stats.total_credits,
10558            accounts_tracked: stats.accounts_tracked,
10559            companies_tracked: stats.companies_tracked,
10560            validation_errors: all_errors,
10561            has_unbalanced_entries: has_unbalanced,
10562        })
10563    }
10564
10565    /// Inject data quality variations into journal entries.
10566    ///
10567    /// Applies typos, missing values, and format variations to make
10568    /// the synthetic data more realistic for testing data cleaning pipelines.
10569    fn inject_data_quality(
10570        &mut self,
10571        entries: &mut [JournalEntry],
10572    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10573        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10574
10575        // Build config from user-specified schema settings when data_quality is enabled;
10576        // otherwise fall back to the low-rate minimal() preset.
10577        let config = if self.config.data_quality.enabled {
10578            let dq = &self.config.data_quality;
10579            DataQualityConfig {
10580                enable_missing_values: dq.missing_values.enabled,
10581                missing_values: datasynth_generators::MissingValueConfig {
10582                    global_rate: dq.effective_missing_rate(),
10583                    ..Default::default()
10584                },
10585                enable_format_variations: dq.format_variations.enabled,
10586                format_variations: datasynth_generators::FormatVariationConfig {
10587                    date_variation_rate: dq.format_variations.dates.rate,
10588                    amount_variation_rate: dq.format_variations.amounts.rate,
10589                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10590                    ..Default::default()
10591                },
10592                enable_duplicates: dq.duplicates.enabled,
10593                duplicates: datasynth_generators::DuplicateConfig {
10594                    duplicate_rate: dq.effective_duplicate_rate(),
10595                    ..Default::default()
10596                },
10597                enable_typos: dq.typos.enabled,
10598                typos: datasynth_generators::TypoConfig {
10599                    char_error_rate: dq.effective_typo_rate(),
10600                    ..Default::default()
10601                },
10602                enable_encoding_issues: dq.encoding_issues.enabled,
10603                encoding_issue_rate: dq.encoding_issues.rate,
10604                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10605                track_statistics: true,
10606            }
10607        } else {
10608            DataQualityConfig::minimal()
10609        };
10610        let mut injector = DataQualityInjector::new(config);
10611
10612        // Wire country pack for locale-aware format baselines
10613        injector.set_country_pack(self.primary_pack().clone());
10614
10615        // Build context for missing value decisions
10616        let context = HashMap::new();
10617
10618        for entry in entries.iter_mut() {
10619            // Process header_text field (common target for typos)
10620            if let Some(text) = &entry.header.header_text {
10621                let processed = injector.process_text_field(
10622                    "header_text",
10623                    text,
10624                    &entry.header.document_id.to_string(),
10625                    &context,
10626                );
10627                match processed {
10628                    Some(new_text) if new_text != *text => {
10629                        entry.header.header_text = Some(new_text);
10630                    }
10631                    None => {
10632                        entry.header.header_text = None; // Missing value
10633                    }
10634                    _ => {}
10635                }
10636            }
10637
10638            // Process reference field
10639            if let Some(ref_text) = &entry.header.reference {
10640                let processed = injector.process_text_field(
10641                    "reference",
10642                    ref_text,
10643                    &entry.header.document_id.to_string(),
10644                    &context,
10645                );
10646                match processed {
10647                    Some(new_text) if new_text != *ref_text => {
10648                        entry.header.reference = Some(new_text);
10649                    }
10650                    None => {
10651                        entry.header.reference = None;
10652                    }
10653                    _ => {}
10654                }
10655            }
10656
10657            // Process user_persona field (potential for typos in user IDs)
10658            let user_persona = entry.header.user_persona.clone();
10659            if let Some(processed) = injector.process_text_field(
10660                "user_persona",
10661                &user_persona,
10662                &entry.header.document_id.to_string(),
10663                &context,
10664            ) {
10665                if processed != user_persona {
10666                    entry.header.user_persona = processed;
10667                }
10668            }
10669
10670            // Process line items
10671            for line in &mut entry.lines {
10672                // Process line description if present
10673                if let Some(ref text) = line.line_text {
10674                    let processed = injector.process_text_field(
10675                        "line_text",
10676                        text,
10677                        &entry.header.document_id.to_string(),
10678                        &context,
10679                    );
10680                    match processed {
10681                        Some(new_text) if new_text != *text => {
10682                            line.line_text = Some(new_text);
10683                        }
10684                        None => {
10685                            line.line_text = None;
10686                        }
10687                        _ => {}
10688                    }
10689                }
10690
10691                // Process cost_center if present
10692                if let Some(cc) = &line.cost_center {
10693                    let processed = injector.process_text_field(
10694                        "cost_center",
10695                        cc,
10696                        &entry.header.document_id.to_string(),
10697                        &context,
10698                    );
10699                    match processed {
10700                        Some(new_cc) if new_cc != *cc => {
10701                            line.cost_center = Some(new_cc);
10702                        }
10703                        None => {
10704                            line.cost_center = None;
10705                        }
10706                        _ => {}
10707                    }
10708                }
10709            }
10710
10711            if let Some(pb) = &pb {
10712                pb.inc(1);
10713            }
10714        }
10715
10716        if let Some(pb) = pb {
10717            pb.finish_with_message("Data quality injection complete");
10718        }
10719
10720        let quality_issues = injector.issues().to_vec();
10721        Ok((injector.stats().clone(), quality_issues))
10722    }
10723
10724    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10725    ///
10726    /// Creates complete audit documentation for each company in the configuration,
10727    /// following ISA standards:
10728    /// - ISA 210/220: Engagement acceptance and terms
10729    /// - ISA 230: Audit documentation (workpapers)
10730    /// - ISA 265: Control deficiencies (findings)
10731    /// - ISA 315/330: Risk assessment and response
10732    /// - ISA 500: Audit evidence
10733    /// - ISA 200: Professional judgment
10734    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10735        // Check if FSM-driven audit generation is enabled
10736        let use_fsm = self
10737            .config
10738            .audit
10739            .fsm
10740            .as_ref()
10741            .map(|f| f.enabled)
10742            .unwrap_or(false);
10743
10744        if use_fsm {
10745            return self.generate_audit_data_with_fsm(entries);
10746        }
10747
10748        // --- Legacy (non-FSM) audit generation follows ---
10749        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10750            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10751        let fiscal_year = start_date.year() as u16;
10752        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10753
10754        // Calculate rough total revenue from entries for materiality
10755        let total_revenue: rust_decimal::Decimal = entries
10756            .iter()
10757            .flat_map(|e| e.lines.iter())
10758            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10759            .map(|l| l.credit_amount)
10760            .sum();
10761
10762        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10763        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10764
10765        let mut snapshot = AuditSnapshot::default();
10766
10767        // Initialize generators
10768        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10769        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10770        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10771        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10772        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10773        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10774        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10775        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10776        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10777        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10778        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10779        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10780
10781        // Get list of accounts from CoA for risk assessment
10782        let accounts: Vec<String> = self
10783            .coa
10784            .as_ref()
10785            .map(|coa| {
10786                coa.get_postable_accounts()
10787                    .iter()
10788                    .map(|acc| acc.account_code().to_string())
10789                    .collect()
10790            })
10791            .unwrap_or_default();
10792
10793        // Generate engagements for each company
10794        for (i, company) in self.config.companies.iter().enumerate() {
10795            // Calculate company-specific revenue (proportional to volume weight)
10796            let company_revenue = total_revenue
10797                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10798
10799            // Generate engagements for this company
10800            let engagements_for_company =
10801                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10802            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10803                1
10804            } else {
10805                0
10806            };
10807
10808            for _eng_idx in 0..(engagements_for_company + extra) {
10809                // Generate the engagement
10810                let mut engagement = engagement_gen.generate_engagement(
10811                    &company.code,
10812                    &company.name,
10813                    fiscal_year,
10814                    period_end,
10815                    company_revenue,
10816                    None, // Use default engagement type
10817                );
10818
10819                // Replace synthetic team IDs with real employee IDs from master data
10820                if !self.master_data.employees.is_empty() {
10821                    let emp_count = self.master_data.employees.len();
10822                    // Use employee IDs deterministically based on engagement index
10823                    let base = (i * 10 + _eng_idx) % emp_count;
10824                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10825                        .employee_id
10826                        .clone();
10827                    engagement.engagement_manager_id = self.master_data.employees
10828                        [(base + 1) % emp_count]
10829                        .employee_id
10830                        .clone();
10831                    let real_team: Vec<String> = engagement
10832                        .team_member_ids
10833                        .iter()
10834                        .enumerate()
10835                        .map(|(j, _)| {
10836                            self.master_data.employees[(base + 2 + j) % emp_count]
10837                                .employee_id
10838                                .clone()
10839                        })
10840                        .collect();
10841                    engagement.team_member_ids = real_team;
10842                }
10843
10844                if let Some(pb) = &pb {
10845                    pb.inc(1);
10846                }
10847
10848                // Get team members from the engagement
10849                let team_members: Vec<String> = engagement.team_member_ids.clone();
10850
10851                // Generate workpapers for the engagement
10852                let workpapers =
10853                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10854
10855                for wp in &workpapers {
10856                    if let Some(pb) = &pb {
10857                        pb.inc(1);
10858                    }
10859
10860                    // Generate evidence for each workpaper
10861                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10862                        wp,
10863                        &team_members,
10864                        wp.preparer_date,
10865                    );
10866
10867                    for _ in &evidence {
10868                        if let Some(pb) = &pb {
10869                            pb.inc(1);
10870                        }
10871                    }
10872
10873                    snapshot.evidence.extend(evidence);
10874                }
10875
10876                // Generate risk assessments for the engagement
10877                let risks =
10878                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10879
10880                for _ in &risks {
10881                    if let Some(pb) = &pb {
10882                        pb.inc(1);
10883                    }
10884                }
10885                snapshot.risk_assessments.extend(risks);
10886
10887                // Generate findings for the engagement
10888                let findings = finding_gen.generate_findings_for_engagement(
10889                    &engagement,
10890                    &workpapers,
10891                    &team_members,
10892                );
10893
10894                for _ in &findings {
10895                    if let Some(pb) = &pb {
10896                        pb.inc(1);
10897                    }
10898                }
10899                snapshot.findings.extend(findings);
10900
10901                // Generate professional judgments for the engagement
10902                let judgments =
10903                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10904
10905                for _ in &judgments {
10906                    if let Some(pb) = &pb {
10907                        pb.inc(1);
10908                    }
10909                }
10910                snapshot.judgments.extend(judgments);
10911
10912                // ISA 505: External confirmations and responses
10913                let (confs, resps) =
10914                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10915                snapshot.confirmations.extend(confs);
10916                snapshot.confirmation_responses.extend(resps);
10917
10918                // ISA 330: Procedure steps per workpaper
10919                let team_pairs: Vec<(String, String)> = team_members
10920                    .iter()
10921                    .map(|id| {
10922                        let name = self
10923                            .master_data
10924                            .employees
10925                            .iter()
10926                            .find(|e| e.employee_id == *id)
10927                            .map(|e| e.display_name.clone())
10928                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10929                        (id.clone(), name)
10930                    })
10931                    .collect();
10932                for wp in &workpapers {
10933                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10934                    snapshot.procedure_steps.extend(steps);
10935                }
10936
10937                // ISA 530: Samples per workpaper
10938                for wp in &workpapers {
10939                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10940                        snapshot.samples.push(sample);
10941                    }
10942                }
10943
10944                // ISA 520: Analytical procedures
10945                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10946                snapshot.analytical_results.extend(analytical);
10947
10948                // ISA 610: Internal audit function and reports
10949                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10950                snapshot.ia_functions.push(ia_func);
10951                snapshot.ia_reports.extend(ia_reports);
10952
10953                // ISA 550: Related parties and transactions
10954                let vendor_names: Vec<String> = self
10955                    .master_data
10956                    .vendors
10957                    .iter()
10958                    .map(|v| v.name.clone())
10959                    .collect();
10960                let customer_names: Vec<String> = self
10961                    .master_data
10962                    .customers
10963                    .iter()
10964                    .map(|c| c.name.clone())
10965                    .collect();
10966                let (parties, rp_txns) =
10967                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10968                snapshot.related_parties.extend(parties);
10969                snapshot.related_party_transactions.extend(rp_txns);
10970
10971                // Add workpapers after findings since findings need them
10972                snapshot.workpapers.extend(workpapers);
10973
10974                // Generate audit scope record for this engagement (one per engagement)
10975                {
10976                    let scope_id = format!(
10977                        "SCOPE-{}-{}",
10978                        engagement.engagement_id.simple(),
10979                        &engagement.client_entity_id
10980                    );
10981                    let scope = datasynth_core::models::audit::AuditScope::new(
10982                        scope_id.clone(),
10983                        engagement.engagement_id.to_string(),
10984                        engagement.client_entity_id.clone(),
10985                        engagement.materiality,
10986                    );
10987                    // Wire scope_id back to engagement
10988                    let mut eng = engagement;
10989                    eng.scope_id = Some(scope_id);
10990                    snapshot.audit_scopes.push(scope);
10991                    snapshot.engagements.push(eng);
10992                }
10993            }
10994        }
10995
10996        // ----------------------------------------------------------------
10997        // ISA 600: Group audit — component auditors, plan, instructions, reports
10998        // ----------------------------------------------------------------
10999        if self.config.companies.len() > 1 {
11000            // Use materiality from the first engagement if available, otherwise
11001            // derive a reasonable figure from total revenue.
11002            let group_materiality = snapshot
11003                .engagements
11004                .first()
11005                .map(|e| e.materiality)
11006                .unwrap_or_else(|| {
11007                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11008                    total_revenue * pct
11009                });
11010
11011            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11012            let group_engagement_id = snapshot
11013                .engagements
11014                .first()
11015                .map(|e| e.engagement_id.to_string())
11016                .unwrap_or_else(|| "GROUP-ENG".to_string());
11017
11018            let component_snapshot = component_gen.generate(
11019                &self.config.companies,
11020                group_materiality,
11021                &group_engagement_id,
11022                period_end,
11023            );
11024
11025            snapshot.component_auditors = component_snapshot.component_auditors;
11026            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11027            snapshot.component_instructions = component_snapshot.component_instructions;
11028            snapshot.component_reports = component_snapshot.component_reports;
11029
11030            info!(
11031                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11032                snapshot.component_auditors.len(),
11033                snapshot.component_instructions.len(),
11034                snapshot.component_reports.len(),
11035            );
11036        }
11037
11038        // ----------------------------------------------------------------
11039        // ISA 210: Engagement letters — one per engagement
11040        // ----------------------------------------------------------------
11041        {
11042            let applicable_framework = self
11043                .config
11044                .accounting_standards
11045                .framework
11046                .as_ref()
11047                .map(|f| format!("{f:?}"))
11048                .unwrap_or_else(|| "IFRS".to_string());
11049
11050            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11051            let entity_count = self.config.companies.len();
11052
11053            for engagement in &snapshot.engagements {
11054                let company = self
11055                    .config
11056                    .companies
11057                    .iter()
11058                    .find(|c| c.code == engagement.client_entity_id);
11059                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11060                let letter_date = engagement.planning_start;
11061                let letter = letter_gen.generate(
11062                    &engagement.engagement_id.to_string(),
11063                    &engagement.client_name,
11064                    entity_count,
11065                    engagement.period_end_date,
11066                    currency,
11067                    &applicable_framework,
11068                    letter_date,
11069                );
11070                snapshot.engagement_letters.push(letter);
11071            }
11072
11073            info!(
11074                "ISA 210 engagement letters: {} generated",
11075                snapshot.engagement_letters.len()
11076            );
11077        }
11078
11079        // ----------------------------------------------------------------
11080        // ISA 560 / IAS 10: Subsequent events
11081        // ----------------------------------------------------------------
11082        {
11083            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11084            let entity_codes: Vec<String> = self
11085                .config
11086                .companies
11087                .iter()
11088                .map(|c| c.code.clone())
11089                .collect();
11090            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11091            info!(
11092                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11093                subsequent.len(),
11094                subsequent
11095                    .iter()
11096                    .filter(|e| matches!(
11097                        e.classification,
11098                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11099                    ))
11100                    .count(),
11101                subsequent
11102                    .iter()
11103                    .filter(|e| matches!(
11104                        e.classification,
11105                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11106                    ))
11107                    .count(),
11108            );
11109            snapshot.subsequent_events = subsequent;
11110        }
11111
11112        // ----------------------------------------------------------------
11113        // ISA 402: Service organization controls
11114        // ----------------------------------------------------------------
11115        {
11116            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11117            let entity_codes: Vec<String> = self
11118                .config
11119                .companies
11120                .iter()
11121                .map(|c| c.code.clone())
11122                .collect();
11123            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11124            info!(
11125                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11126                soc_snapshot.service_organizations.len(),
11127                soc_snapshot.soc_reports.len(),
11128                soc_snapshot.user_entity_controls.len(),
11129            );
11130            snapshot.service_organizations = soc_snapshot.service_organizations;
11131            snapshot.soc_reports = soc_snapshot.soc_reports;
11132            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11133        }
11134
11135        // ----------------------------------------------------------------
11136        // ISA 570: Going concern assessments
11137        // ----------------------------------------------------------------
11138        {
11139            use datasynth_generators::audit::going_concern_generator::{
11140                GoingConcernGenerator, GoingConcernInput,
11141            };
11142            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11143            let entity_codes: Vec<String> = self
11144                .config
11145                .companies
11146                .iter()
11147                .map(|c| c.code.clone())
11148                .collect();
11149            // Assessment date = period end + 75 days (typical sign-off window).
11150            let assessment_date = period_end + chrono::Duration::days(75);
11151            let period_label = format!("FY{}", period_end.year());
11152
11153            // Build financial inputs from actual journal entries.
11154            //
11155            // We derive approximate P&L, working capital, and operating cash flow
11156            // by aggregating GL account balances from the journal entry population.
11157            // Account ranges used (standard chart):
11158            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11159            //   Expenses:        6xxx (debit-normal)
11160            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11161            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11162            //   Operating CF:    net income adjusted for D&A (rough proxy)
11163            let gc_inputs: Vec<GoingConcernInput> = self
11164                .config
11165                .companies
11166                .iter()
11167                .map(|company| {
11168                    let code = &company.code;
11169                    let mut revenue = rust_decimal::Decimal::ZERO;
11170                    let mut expenses = rust_decimal::Decimal::ZERO;
11171                    let mut current_assets = rust_decimal::Decimal::ZERO;
11172                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11173                    let mut total_debt = rust_decimal::Decimal::ZERO;
11174
11175                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11176                        for line in &je.lines {
11177                            let acct = line.gl_account.as_str();
11178                            let net = line.debit_amount - line.credit_amount;
11179                            if acct.starts_with('4') {
11180                                // Revenue accounts: credit-normal, so negative net = revenue earned
11181                                revenue -= net;
11182                            } else if acct.starts_with('6') {
11183                                // Expense accounts: debit-normal
11184                                expenses += net;
11185                            }
11186                            // Balance sheet accounts for working capital
11187                            if acct.starts_with('1') {
11188                                // Current asset accounts (1000–1499)
11189                                if let Ok(n) = acct.parse::<u32>() {
11190                                    if (1000..=1499).contains(&n) {
11191                                        current_assets += net;
11192                                    }
11193                                }
11194                            } else if acct.starts_with('2') {
11195                                if let Ok(n) = acct.parse::<u32>() {
11196                                    if (2000..=2499).contains(&n) {
11197                                        // Current liabilities
11198                                        current_liabs -= net; // credit-normal
11199                                    } else if (2500..=2999).contains(&n) {
11200                                        // Long-term debt
11201                                        total_debt -= net;
11202                                    }
11203                                }
11204                            }
11205                        }
11206                    }
11207
11208                    let net_income = revenue - expenses;
11209                    let working_capital = current_assets - current_liabs;
11210                    // Rough operating CF proxy: net income (full accrual CF calculation
11211                    // is done separately in the cash flow statement generator)
11212                    let operating_cash_flow = net_income;
11213
11214                    GoingConcernInput {
11215                        entity_code: code.clone(),
11216                        net_income,
11217                        working_capital,
11218                        operating_cash_flow,
11219                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11220                        assessment_date,
11221                    }
11222                })
11223                .collect();
11224
11225            let assessments = if gc_inputs.is_empty() {
11226                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11227            } else {
11228                gc_gen.generate_for_entities_with_inputs(
11229                    &entity_codes,
11230                    &gc_inputs,
11231                    assessment_date,
11232                    &period_label,
11233                )
11234            };
11235            info!(
11236                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11237                assessments.len(),
11238                assessments.iter().filter(|a| matches!(
11239                    a.auditor_conclusion,
11240                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11241                )).count(),
11242                assessments.iter().filter(|a| matches!(
11243                    a.auditor_conclusion,
11244                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11245                )).count(),
11246                assessments.iter().filter(|a| matches!(
11247                    a.auditor_conclusion,
11248                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11249                )).count(),
11250            );
11251            snapshot.going_concern_assessments = assessments;
11252        }
11253
11254        // ----------------------------------------------------------------
11255        // ISA 540: Accounting estimates
11256        // ----------------------------------------------------------------
11257        {
11258            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11259            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11260            let entity_codes: Vec<String> = self
11261                .config
11262                .companies
11263                .iter()
11264                .map(|c| c.code.clone())
11265                .collect();
11266            let estimates = est_gen.generate_for_entities(&entity_codes);
11267            info!(
11268                "ISA 540 accounting estimates: {} estimates across {} entities \
11269                 ({} with retrospective reviews, {} with auditor point estimates)",
11270                estimates.len(),
11271                entity_codes.len(),
11272                estimates
11273                    .iter()
11274                    .filter(|e| e.retrospective_review.is_some())
11275                    .count(),
11276                estimates
11277                    .iter()
11278                    .filter(|e| e.auditor_point_estimate.is_some())
11279                    .count(),
11280            );
11281            snapshot.accounting_estimates = estimates;
11282        }
11283
11284        // ----------------------------------------------------------------
11285        // ISA 700/701/705/706: Audit opinions (one per engagement)
11286        // ----------------------------------------------------------------
11287        {
11288            use datasynth_generators::audit::audit_opinion_generator::{
11289                AuditOpinionGenerator, AuditOpinionInput,
11290            };
11291
11292            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11293
11294            // Build inputs — one per engagement, linking findings and going concern.
11295            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11296                .engagements
11297                .iter()
11298                .map(|eng| {
11299                    // Collect findings for this engagement.
11300                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11301                        .findings
11302                        .iter()
11303                        .filter(|f| f.engagement_id == eng.engagement_id)
11304                        .cloned()
11305                        .collect();
11306
11307                    // Going concern for this entity.
11308                    let gc = snapshot
11309                        .going_concern_assessments
11310                        .iter()
11311                        .find(|g| g.entity_code == eng.client_entity_id)
11312                        .cloned();
11313
11314                    // Component reports relevant to this engagement.
11315                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11316                        snapshot.component_reports.clone();
11317
11318                    let auditor = self
11319                        .master_data
11320                        .employees
11321                        .first()
11322                        .map(|e| e.display_name.clone())
11323                        .unwrap_or_else(|| "Global Audit LLP".into());
11324
11325                    let partner = self
11326                        .master_data
11327                        .employees
11328                        .get(1)
11329                        .map(|e| e.display_name.clone())
11330                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11331
11332                    AuditOpinionInput {
11333                        entity_code: eng.client_entity_id.clone(),
11334                        entity_name: eng.client_name.clone(),
11335                        engagement_id: eng.engagement_id,
11336                        period_end: eng.period_end_date,
11337                        findings: eng_findings,
11338                        going_concern: gc,
11339                        component_reports: comp_reports,
11340                        // Mark as US-listed when audit standards include PCAOB.
11341                        is_us_listed: {
11342                            let fw = &self.config.audit_standards.isa_compliance.framework;
11343                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11344                        },
11345                        auditor_name: auditor,
11346                        engagement_partner: partner,
11347                    }
11348                })
11349                .collect();
11350
11351            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11352
11353            for go in &generated_opinions {
11354                snapshot
11355                    .key_audit_matters
11356                    .extend(go.key_audit_matters.clone());
11357            }
11358            snapshot.audit_opinions = generated_opinions
11359                .into_iter()
11360                .map(|go| go.opinion)
11361                .collect();
11362
11363            info!(
11364                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11365                snapshot.audit_opinions.len(),
11366                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11367                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11368                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11369                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11370            );
11371        }
11372
11373        // ----------------------------------------------------------------
11374        // SOX 302 / 404 assessments
11375        // ----------------------------------------------------------------
11376        {
11377            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11378
11379            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11380
11381            for (i, company) in self.config.companies.iter().enumerate() {
11382                // Collect findings for this company's engagements.
11383                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11384                    .engagements
11385                    .iter()
11386                    .filter(|e| e.client_entity_id == company.code)
11387                    .map(|e| e.engagement_id)
11388                    .collect();
11389
11390                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11391                    .findings
11392                    .iter()
11393                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11394                    .cloned()
11395                    .collect();
11396
11397                // Derive executive names from employee list.
11398                let emp_count = self.master_data.employees.len();
11399                let ceo_name = if emp_count > 0 {
11400                    self.master_data.employees[i % emp_count]
11401                        .display_name
11402                        .clone()
11403                } else {
11404                    format!("CEO of {}", company.name)
11405                };
11406                let cfo_name = if emp_count > 1 {
11407                    self.master_data.employees[(i + 1) % emp_count]
11408                        .display_name
11409                        .clone()
11410                } else {
11411                    format!("CFO of {}", company.name)
11412                };
11413
11414                // Use engagement materiality if available.
11415                let materiality = snapshot
11416                    .engagements
11417                    .iter()
11418                    .find(|e| e.client_entity_id == company.code)
11419                    .map(|e| e.materiality)
11420                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11421
11422                let input = SoxGeneratorInput {
11423                    company_code: company.code.clone(),
11424                    company_name: company.name.clone(),
11425                    fiscal_year,
11426                    period_end,
11427                    findings: company_findings,
11428                    ceo_name,
11429                    cfo_name,
11430                    materiality_threshold: materiality,
11431                    revenue_percent: rust_decimal::Decimal::from(100),
11432                    assets_percent: rust_decimal::Decimal::from(100),
11433                    significant_accounts: vec![
11434                        "Revenue".into(),
11435                        "Accounts Receivable".into(),
11436                        "Inventory".into(),
11437                        "Fixed Assets".into(),
11438                        "Accounts Payable".into(),
11439                    ],
11440                };
11441
11442                let (certs, assessment) = sox_gen.generate(&input);
11443                snapshot.sox_302_certifications.extend(certs);
11444                snapshot.sox_404_assessments.push(assessment);
11445            }
11446
11447            info!(
11448                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11449                snapshot.sox_302_certifications.len(),
11450                snapshot.sox_404_assessments.len(),
11451                snapshot
11452                    .sox_404_assessments
11453                    .iter()
11454                    .filter(|a| a.icfr_effective)
11455                    .count(),
11456                snapshot
11457                    .sox_404_assessments
11458                    .iter()
11459                    .filter(|a| !a.icfr_effective)
11460                    .count(),
11461            );
11462        }
11463
11464        // ----------------------------------------------------------------
11465        // ISA 320: Materiality calculations (one per entity)
11466        // ----------------------------------------------------------------
11467        {
11468            use datasynth_generators::audit::materiality_generator::{
11469                MaterialityGenerator, MaterialityInput,
11470            };
11471
11472            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11473
11474            // Compute per-company financials from JEs.
11475            // Asset accounts start with '1', revenue with '4',
11476            // expense accounts with '5' or '6'.
11477            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11478
11479            for company in &self.config.companies {
11480                let company_code = company.code.clone();
11481
11482                // Revenue: credit-side entries on 4xxx accounts
11483                let company_revenue: rust_decimal::Decimal = entries
11484                    .iter()
11485                    .filter(|e| e.company_code() == company_code)
11486                    .flat_map(|e| e.lines.iter())
11487                    .filter(|l| l.account_code.starts_with('4'))
11488                    .map(|l| l.credit_amount)
11489                    .sum();
11490
11491                // Total assets: debit balances on 1xxx accounts
11492                let total_assets: rust_decimal::Decimal = entries
11493                    .iter()
11494                    .filter(|e| e.company_code() == company_code)
11495                    .flat_map(|e| e.lines.iter())
11496                    .filter(|l| l.account_code.starts_with('1'))
11497                    .map(|l| l.debit_amount)
11498                    .sum();
11499
11500                // Expenses: debit-side entries on 5xxx/6xxx accounts
11501                let total_expenses: rust_decimal::Decimal = entries
11502                    .iter()
11503                    .filter(|e| e.company_code() == company_code)
11504                    .flat_map(|e| e.lines.iter())
11505                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11506                    .map(|l| l.debit_amount)
11507                    .sum();
11508
11509                // Equity: credit balances on 3xxx accounts
11510                let equity: rust_decimal::Decimal = entries
11511                    .iter()
11512                    .filter(|e| e.company_code() == company_code)
11513                    .flat_map(|e| e.lines.iter())
11514                    .filter(|l| l.account_code.starts_with('3'))
11515                    .map(|l| l.credit_amount)
11516                    .sum();
11517
11518                let pretax_income = company_revenue - total_expenses;
11519
11520                // If no company-specific data, fall back to proportional share
11521                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11522                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11523                        .unwrap_or(rust_decimal::Decimal::ONE);
11524                    (
11525                        total_revenue * w,
11526                        total_revenue * w * rust_decimal::Decimal::from(3),
11527                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11528                        total_revenue * w * rust_decimal::Decimal::from(2),
11529                    )
11530                } else {
11531                    (company_revenue, total_assets, pretax_income, equity)
11532                };
11533
11534                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11535
11536                materiality_inputs.push(MaterialityInput {
11537                    entity_code: company_code,
11538                    period: format!("FY{}", fiscal_year),
11539                    revenue: rev,
11540                    pretax_income: pti,
11541                    total_assets: assets,
11542                    equity: eq,
11543                    gross_profit,
11544                });
11545            }
11546
11547            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11548
11549            info!(
11550                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11551                 {} total assets, {} equity benchmarks)",
11552                snapshot.materiality_calculations.len(),
11553                snapshot
11554                    .materiality_calculations
11555                    .iter()
11556                    .filter(|m| matches!(
11557                        m.benchmark,
11558                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11559                    ))
11560                    .count(),
11561                snapshot
11562                    .materiality_calculations
11563                    .iter()
11564                    .filter(|m| matches!(
11565                        m.benchmark,
11566                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11567                    ))
11568                    .count(),
11569                snapshot
11570                    .materiality_calculations
11571                    .iter()
11572                    .filter(|m| matches!(
11573                        m.benchmark,
11574                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11575                    ))
11576                    .count(),
11577                snapshot
11578                    .materiality_calculations
11579                    .iter()
11580                    .filter(|m| matches!(
11581                        m.benchmark,
11582                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11583                    ))
11584                    .count(),
11585            );
11586        }
11587
11588        // ----------------------------------------------------------------
11589        // ISA 315: Combined Risk Assessments (per entity, per account area)
11590        // ----------------------------------------------------------------
11591        {
11592            use datasynth_generators::audit::cra_generator::CraGenerator;
11593
11594            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11595
11596            // Build entity → scope_id map from already-generated scopes
11597            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11598                .audit_scopes
11599                .iter()
11600                .map(|s| (s.entity_code.clone(), s.id.clone()))
11601                .collect();
11602
11603            for company in &self.config.companies {
11604                let cras = cra_gen.generate_for_entity(&company.code, None);
11605                let scope_id = entity_scope_map.get(&company.code).cloned();
11606                let cras_with_scope: Vec<_> = cras
11607                    .into_iter()
11608                    .map(|mut cra| {
11609                        cra.scope_id = scope_id.clone();
11610                        cra
11611                    })
11612                    .collect();
11613                snapshot.combined_risk_assessments.extend(cras_with_scope);
11614            }
11615
11616            let significant_count = snapshot
11617                .combined_risk_assessments
11618                .iter()
11619                .filter(|c| c.significant_risk)
11620                .count();
11621            let high_cra_count = snapshot
11622                .combined_risk_assessments
11623                .iter()
11624                .filter(|c| {
11625                    matches!(
11626                        c.combined_risk,
11627                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11628                    )
11629                })
11630                .count();
11631
11632            info!(
11633                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11634                snapshot.combined_risk_assessments.len(),
11635                significant_count,
11636                high_cra_count,
11637            );
11638        }
11639
11640        // ----------------------------------------------------------------
11641        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11642        // ----------------------------------------------------------------
11643        {
11644            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11645
11646            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11647
11648            // Group CRAs by entity and use per-entity tolerable error from materiality
11649            for company in &self.config.companies {
11650                let entity_code = company.code.clone();
11651
11652                // Find tolerable error for this entity (= performance materiality)
11653                let tolerable_error = snapshot
11654                    .materiality_calculations
11655                    .iter()
11656                    .find(|m| m.entity_code == entity_code)
11657                    .map(|m| m.tolerable_error);
11658
11659                // Collect CRAs for this entity
11660                let entity_cras: Vec<_> = snapshot
11661                    .combined_risk_assessments
11662                    .iter()
11663                    .filter(|c| c.entity_code == entity_code)
11664                    .cloned()
11665                    .collect();
11666
11667                if !entity_cras.is_empty() {
11668                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11669                    snapshot.sampling_plans.extend(plans);
11670                    snapshot.sampled_items.extend(items);
11671                }
11672            }
11673
11674            let misstatement_count = snapshot
11675                .sampled_items
11676                .iter()
11677                .filter(|i| i.misstatement_found)
11678                .count();
11679
11680            info!(
11681                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11682                snapshot.sampling_plans.len(),
11683                snapshot.sampled_items.len(),
11684                misstatement_count,
11685            );
11686        }
11687
11688        // ----------------------------------------------------------------
11689        // ISA 315: Significant Classes of Transactions (SCOTS)
11690        // ----------------------------------------------------------------
11691        {
11692            use datasynth_generators::audit::scots_generator::{
11693                ScotsGenerator, ScotsGeneratorConfig,
11694            };
11695
11696            let ic_enabled = self.config.intercompany.enabled;
11697
11698            let config = ScotsGeneratorConfig {
11699                intercompany_enabled: ic_enabled,
11700                ..ScotsGeneratorConfig::default()
11701            };
11702            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11703
11704            for company in &self.config.companies {
11705                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11706                snapshot
11707                    .significant_transaction_classes
11708                    .extend(entity_scots);
11709            }
11710
11711            let estimation_count = snapshot
11712                .significant_transaction_classes
11713                .iter()
11714                .filter(|s| {
11715                    matches!(
11716                        s.transaction_type,
11717                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11718                    )
11719                })
11720                .count();
11721
11722            info!(
11723                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11724                snapshot.significant_transaction_classes.len(),
11725                estimation_count,
11726            );
11727        }
11728
11729        // ----------------------------------------------------------------
11730        // ISA 520: Unusual Item Markers
11731        // ----------------------------------------------------------------
11732        {
11733            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11734
11735            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11736            let entity_codes: Vec<String> = self
11737                .config
11738                .companies
11739                .iter()
11740                .map(|c| c.code.clone())
11741                .collect();
11742            let unusual_flags =
11743                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11744            info!(
11745                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11746                unusual_flags.len(),
11747                unusual_flags
11748                    .iter()
11749                    .filter(|f| matches!(
11750                        f.severity,
11751                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11752                    ))
11753                    .count(),
11754                unusual_flags
11755                    .iter()
11756                    .filter(|f| matches!(
11757                        f.severity,
11758                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11759                    ))
11760                    .count(),
11761                unusual_flags
11762                    .iter()
11763                    .filter(|f| matches!(
11764                        f.severity,
11765                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11766                    ))
11767                    .count(),
11768            );
11769            snapshot.unusual_items = unusual_flags;
11770        }
11771
11772        // ----------------------------------------------------------------
11773        // ISA 520: Analytical Relationships
11774        // ----------------------------------------------------------------
11775        {
11776            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11777
11778            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11779            let entity_codes: Vec<String> = self
11780                .config
11781                .companies
11782                .iter()
11783                .map(|c| c.code.clone())
11784                .collect();
11785            let current_period_label = format!("FY{fiscal_year}");
11786            let prior_period_label = format!("FY{}", fiscal_year - 1);
11787            let analytical_rels = ar_gen.generate_for_entities(
11788                &entity_codes,
11789                entries,
11790                &current_period_label,
11791                &prior_period_label,
11792            );
11793            let out_of_range = analytical_rels
11794                .iter()
11795                .filter(|r| !r.within_expected_range)
11796                .count();
11797            info!(
11798                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11799                analytical_rels.len(),
11800                out_of_range,
11801            );
11802            snapshot.analytical_relationships = analytical_rels;
11803        }
11804
11805        if let Some(pb) = pb {
11806            pb.finish_with_message(format!(
11807                "Audit data: {} engagements, {} workpapers, {} evidence, \
11808                 {} confirmations, {} procedure steps, {} samples, \
11809                 {} analytical, {} IA funcs, {} related parties, \
11810                 {} component auditors, {} letters, {} subsequent events, \
11811                 {} service orgs, {} going concern, {} accounting estimates, \
11812                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11813                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11814                 {} unusual items, {} analytical relationships",
11815                snapshot.engagements.len(),
11816                snapshot.workpapers.len(),
11817                snapshot.evidence.len(),
11818                snapshot.confirmations.len(),
11819                snapshot.procedure_steps.len(),
11820                snapshot.samples.len(),
11821                snapshot.analytical_results.len(),
11822                snapshot.ia_functions.len(),
11823                snapshot.related_parties.len(),
11824                snapshot.component_auditors.len(),
11825                snapshot.engagement_letters.len(),
11826                snapshot.subsequent_events.len(),
11827                snapshot.service_organizations.len(),
11828                snapshot.going_concern_assessments.len(),
11829                snapshot.accounting_estimates.len(),
11830                snapshot.audit_opinions.len(),
11831                snapshot.key_audit_matters.len(),
11832                snapshot.sox_302_certifications.len(),
11833                snapshot.sox_404_assessments.len(),
11834                snapshot.materiality_calculations.len(),
11835                snapshot.combined_risk_assessments.len(),
11836                snapshot.sampling_plans.len(),
11837                snapshot.significant_transaction_classes.len(),
11838                snapshot.unusual_items.len(),
11839                snapshot.analytical_relationships.len(),
11840            ));
11841        }
11842
11843        // ----------------------------------------------------------------
11844        // PCAOB-ISA cross-reference mappings
11845        // ----------------------------------------------------------------
11846        // Always include the standard PCAOB-ISA mappings when audit generation is
11847        // enabled. These are static reference data (no randomness required) so we
11848        // call standard_mappings() directly.
11849        {
11850            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11851            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11852            debug!(
11853                "PCAOB-ISA mappings generated: {} mappings",
11854                snapshot.isa_pcaob_mappings.len()
11855            );
11856        }
11857
11858        // ----------------------------------------------------------------
11859        // ISA standard reference entries
11860        // ----------------------------------------------------------------
11861        // Emit flat ISA standard reference data (number, title, series) so
11862        // consumers get a machine-readable listing of all 34 ISA standards in
11863        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11864        {
11865            use datasynth_standards::audit::isa_reference::IsaStandard;
11866            snapshot.isa_mappings = IsaStandard::standard_entries();
11867            debug!(
11868                "ISA standard entries generated: {} standards",
11869                snapshot.isa_mappings.len()
11870            );
11871        }
11872
11873        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11874        // For each RPT, find the chronologically closest JE for the engagement's entity.
11875        {
11876            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11877                .engagements
11878                .iter()
11879                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11880                .collect();
11881
11882            for rpt in &mut snapshot.related_party_transactions {
11883                if rpt.journal_entry_id.is_some() {
11884                    continue; // already set
11885                }
11886                let entity = engagement_by_id
11887                    .get(&rpt.engagement_id.to_string())
11888                    .copied()
11889                    .unwrap_or("");
11890
11891                // Find closest JE by date in the entity's company
11892                let best_je = entries
11893                    .iter()
11894                    .filter(|je| je.header.company_code == entity)
11895                    .min_by_key(|je| {
11896                        (je.header.posting_date - rpt.transaction_date)
11897                            .num_days()
11898                            .abs()
11899                    });
11900
11901                if let Some(je) = best_je {
11902                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11903                }
11904            }
11905
11906            let linked = snapshot
11907                .related_party_transactions
11908                .iter()
11909                .filter(|t| t.journal_entry_id.is_some())
11910                .count();
11911            debug!(
11912                "Linked {}/{} related party transactions to journal entries",
11913                linked,
11914                snapshot.related_party_transactions.len()
11915            );
11916        }
11917
11918        Ok(snapshot)
11919    }
11920
11921    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11922    ///
11923    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11924    /// from the current orchestrator state, runs the FSM engine, and maps the
11925    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11926    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11927    fn generate_audit_data_with_fsm(
11928        &mut self,
11929        entries: &[JournalEntry],
11930    ) -> SynthResult<AuditSnapshot> {
11931        use datasynth_audit_fsm::{
11932            context::EngagementContext,
11933            engine::AuditFsmEngine,
11934            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11935        };
11936        use rand::SeedableRng;
11937        use rand_chacha::ChaCha8Rng;
11938
11939        info!("Audit FSM: generating audit data via FSM engine");
11940
11941        let fsm_config = self
11942            .config
11943            .audit
11944            .fsm
11945            .as_ref()
11946            .expect("FSM config must be present when FSM is enabled");
11947
11948        // 1. Load blueprint from config string.
11949        let bwp = match fsm_config.blueprint.as_str() {
11950            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11951            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11952            _ => {
11953                warn!(
11954                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11955                    fsm_config.blueprint
11956                );
11957                BlueprintWithPreconditions::load_builtin_fsa()
11958            }
11959        }
11960        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11961
11962        // 2. Load overlay from config string.
11963        let overlay = match fsm_config.overlay.as_str() {
11964            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11965            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11966            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11967            _ => {
11968                warn!(
11969                    "Unknown FSM overlay '{}', falling back to builtin:default",
11970                    fsm_config.overlay
11971                );
11972                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11973            }
11974        }
11975        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11976
11977        // 3. Build EngagementContext from orchestrator state.
11978        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11979            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11980        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11981
11982        // Determine the engagement entity early so we can filter JEs.
11983        let company = self.config.companies.first();
11984        let company_code = company
11985            .map(|c| c.code.clone())
11986            .unwrap_or_else(|| "UNKNOWN".to_string());
11987        let company_name = company
11988            .map(|c| c.name.clone())
11989            .unwrap_or_else(|| "Unknown Company".to_string());
11990        let currency = company
11991            .map(|c| c.currency.clone())
11992            .unwrap_or_else(|| "USD".to_string());
11993
11994        // Filter JEs to the engagement entity for single-company coherence.
11995        let entity_entries: Vec<_> = entries
11996            .iter()
11997            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11998            .cloned()
11999            .collect();
12000        let entries = &entity_entries; // Shadow the parameter for remaining usage
12001
12002        // Financial aggregates from journal entries.
12003        let total_revenue: rust_decimal::Decimal = entries
12004            .iter()
12005            .flat_map(|e| e.lines.iter())
12006            .filter(|l| l.account_code.starts_with('4'))
12007            .map(|l| l.credit_amount - l.debit_amount)
12008            .sum();
12009
12010        let total_assets: rust_decimal::Decimal = entries
12011            .iter()
12012            .flat_map(|e| e.lines.iter())
12013            .filter(|l| l.account_code.starts_with('1'))
12014            .map(|l| l.debit_amount - l.credit_amount)
12015            .sum();
12016
12017        let total_expenses: rust_decimal::Decimal = entries
12018            .iter()
12019            .flat_map(|e| e.lines.iter())
12020            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12021            .map(|l| l.debit_amount)
12022            .sum();
12023
12024        let equity: rust_decimal::Decimal = entries
12025            .iter()
12026            .flat_map(|e| e.lines.iter())
12027            .filter(|l| l.account_code.starts_with('3'))
12028            .map(|l| l.credit_amount - l.debit_amount)
12029            .sum();
12030
12031        let total_debt: rust_decimal::Decimal = entries
12032            .iter()
12033            .flat_map(|e| e.lines.iter())
12034            .filter(|l| l.account_code.starts_with('2'))
12035            .map(|l| l.credit_amount - l.debit_amount)
12036            .sum();
12037
12038        let pretax_income = total_revenue - total_expenses;
12039
12040        let cogs: rust_decimal::Decimal = entries
12041            .iter()
12042            .flat_map(|e| e.lines.iter())
12043            .filter(|l| l.account_code.starts_with('5'))
12044            .map(|l| l.debit_amount)
12045            .sum();
12046        let gross_profit = total_revenue - cogs;
12047
12048        let current_assets: rust_decimal::Decimal = entries
12049            .iter()
12050            .flat_map(|e| e.lines.iter())
12051            .filter(|l| {
12052                l.account_code.starts_with("10")
12053                    || l.account_code.starts_with("11")
12054                    || l.account_code.starts_with("12")
12055                    || l.account_code.starts_with("13")
12056            })
12057            .map(|l| l.debit_amount - l.credit_amount)
12058            .sum();
12059        let current_liabilities: rust_decimal::Decimal = entries
12060            .iter()
12061            .flat_map(|e| e.lines.iter())
12062            .filter(|l| {
12063                l.account_code.starts_with("20")
12064                    || l.account_code.starts_with("21")
12065                    || l.account_code.starts_with("22")
12066            })
12067            .map(|l| l.credit_amount - l.debit_amount)
12068            .sum();
12069        let working_capital = current_assets - current_liabilities;
12070
12071        let depreciation: rust_decimal::Decimal = entries
12072            .iter()
12073            .flat_map(|e| e.lines.iter())
12074            .filter(|l| l.account_code.starts_with("60"))
12075            .map(|l| l.debit_amount)
12076            .sum();
12077        let operating_cash_flow = pretax_income + depreciation;
12078
12079        // GL accounts for reference data.
12080        let accounts: Vec<String> = self
12081            .coa
12082            .as_ref()
12083            .map(|coa| {
12084                coa.get_postable_accounts()
12085                    .iter()
12086                    .map(|acc| acc.account_code().to_string())
12087                    .collect()
12088            })
12089            .unwrap_or_default();
12090
12091        // Team member IDs and display names from master data.
12092        let team_member_ids: Vec<String> = self
12093            .master_data
12094            .employees
12095            .iter()
12096            .take(8) // Cap team size
12097            .map(|e| e.employee_id.clone())
12098            .collect();
12099        let team_member_pairs: Vec<(String, String)> = self
12100            .master_data
12101            .employees
12102            .iter()
12103            .take(8)
12104            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12105            .collect();
12106
12107        let vendor_names: Vec<String> = self
12108            .master_data
12109            .vendors
12110            .iter()
12111            .map(|v| v.name.clone())
12112            .collect();
12113        let customer_names: Vec<String> = self
12114            .master_data
12115            .customers
12116            .iter()
12117            .map(|c| c.name.clone())
12118            .collect();
12119
12120        let entity_codes: Vec<String> = self
12121            .config
12122            .companies
12123            .iter()
12124            .map(|c| c.code.clone())
12125            .collect();
12126
12127        // Journal entry IDs for evidence tracing (sample up to 50).
12128        let journal_entry_ids: Vec<String> = entries
12129            .iter()
12130            .take(50)
12131            .map(|e| e.header.document_id.to_string())
12132            .collect();
12133
12134        // Account balances for risk weighting (aggregate debit - credit per account).
12135        let mut account_balances = std::collections::HashMap::<String, f64>::new();
12136        for entry in entries {
12137            for line in &entry.lines {
12138                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12139                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12140                *account_balances
12141                    .entry(line.account_code.clone())
12142                    .or_insert(0.0) += debit_f64 - credit_f64;
12143            }
12144        }
12145
12146        // Internal control IDs and anomaly refs are populated by the
12147        // caller when available; here we default to empty because the
12148        // orchestrator state may not have generated controls/anomalies
12149        // yet at this point in the pipeline.
12150        let control_ids: Vec<String> = Vec::new();
12151        let anomaly_refs: Vec<String> = Vec::new();
12152
12153        let mut context = EngagementContext {
12154            company_code,
12155            company_name,
12156            fiscal_year: start_date.year(),
12157            currency,
12158            total_revenue,
12159            total_assets,
12160            engagement_start: start_date,
12161            report_date: period_end,
12162            pretax_income,
12163            equity,
12164            gross_profit,
12165            working_capital,
12166            operating_cash_flow,
12167            total_debt,
12168            team_member_ids,
12169            team_member_pairs,
12170            accounts,
12171            vendor_names,
12172            customer_names,
12173            journal_entry_ids,
12174            account_balances,
12175            control_ids,
12176            anomaly_refs,
12177            journal_entries: entries.to_vec(),
12178            is_us_listed: false,
12179            entity_codes,
12180            auditor_firm_name: "DataSynth Audit LLP".into(),
12181            accounting_framework: self
12182                .config
12183                .accounting_standards
12184                .framework
12185                .map(|f| match f {
12186                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12187                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12188                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12189                        "French GAAP"
12190                    }
12191                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12192                        "German GAAP"
12193                    }
12194                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12195                        "Dual Reporting"
12196                    }
12197                })
12198                .unwrap_or("IFRS")
12199                .into(),
12200        };
12201
12202        // 4. Create and run the FSM engine.
12203        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12204        let rng = ChaCha8Rng::seed_from_u64(seed);
12205        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12206
12207        let mut result = engine
12208            .run_engagement(&context)
12209            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12210
12211        info!(
12212            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12213             {} phases completed, duration {:.1}h",
12214            result.event_log.len(),
12215            result.artifacts.total_artifacts(),
12216            result.anomalies.len(),
12217            result.phases_completed.len(),
12218            result.total_duration_hours,
12219        );
12220
12221        // 4b. Populate financial data in the artifact bag for downstream consumers.
12222        let tb_entity = context.company_code.clone();
12223        let tb_fy = context.fiscal_year;
12224        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12225        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12226            entries,
12227            &tb_entity,
12228            tb_fy,
12229            self.coa.as_ref().map(|c| c.as_ref()),
12230        );
12231
12232        // 5. Map ArtifactBag fields to AuditSnapshot.
12233        let bag = result.artifacts;
12234        let mut snapshot = AuditSnapshot {
12235            engagements: bag.engagements,
12236            engagement_letters: bag.engagement_letters,
12237            materiality_calculations: bag.materiality_calculations,
12238            risk_assessments: bag.risk_assessments,
12239            combined_risk_assessments: bag.combined_risk_assessments,
12240            workpapers: bag.workpapers,
12241            evidence: bag.evidence,
12242            findings: bag.findings,
12243            judgments: bag.judgments,
12244            sampling_plans: bag.sampling_plans,
12245            sampled_items: bag.sampled_items,
12246            analytical_results: bag.analytical_results,
12247            going_concern_assessments: bag.going_concern_assessments,
12248            subsequent_events: bag.subsequent_events,
12249            audit_opinions: bag.audit_opinions,
12250            key_audit_matters: bag.key_audit_matters,
12251            procedure_steps: bag.procedure_steps,
12252            samples: bag.samples,
12253            confirmations: bag.confirmations,
12254            confirmation_responses: bag.confirmation_responses,
12255            // Store the event trail for downstream export.
12256            fsm_event_trail: Some(result.event_log),
12257            // Fields not produced by the FSM engine remain at their defaults.
12258            ..Default::default()
12259        };
12260
12261        // 6. Add static reference data (same as legacy path).
12262        {
12263            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12264            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12265        }
12266        {
12267            use datasynth_standards::audit::isa_reference::IsaStandard;
12268            snapshot.isa_mappings = IsaStandard::standard_entries();
12269        }
12270
12271        info!(
12272            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12273             {} risk assessments, {} findings, {} materiality calcs",
12274            snapshot.engagements.len(),
12275            snapshot.workpapers.len(),
12276            snapshot.evidence.len(),
12277            snapshot.risk_assessments.len(),
12278            snapshot.findings.len(),
12279            snapshot.materiality_calculations.len(),
12280        );
12281
12282        Ok(snapshot)
12283    }
12284
12285    /// Export journal entries as graph data for ML training and network reconstruction.
12286    ///
12287    /// Builds a transaction graph where:
12288    /// - Nodes are GL accounts
12289    /// - Edges are money flows from credit to debit accounts
12290    /// - Edge attributes include amount, date, business process, anomaly flags
12291    fn export_graphs(
12292        &mut self,
12293        entries: &[JournalEntry],
12294        _coa: &Arc<ChartOfAccounts>,
12295        stats: &mut EnhancedGenerationStatistics,
12296    ) -> SynthResult<GraphExportSnapshot> {
12297        let pb = self.create_progress_bar(100, "Exporting Graphs");
12298
12299        let mut snapshot = GraphExportSnapshot::default();
12300
12301        // Get output directory
12302        let output_dir = self
12303            .output_path
12304            .clone()
12305            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12306        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12307
12308        // Process each graph type configuration
12309        for graph_type in &self.config.graph_export.graph_types {
12310            if let Some(pb) = &pb {
12311                pb.inc(10);
12312            }
12313
12314            // Build transaction graph
12315            let graph_config = TransactionGraphConfig {
12316                include_vendors: false,
12317                include_customers: false,
12318                create_debit_credit_edges: true,
12319                include_document_nodes: graph_type.include_document_nodes,
12320                min_edge_weight: graph_type.min_edge_weight,
12321                aggregate_parallel_edges: graph_type.aggregate_edges,
12322                framework: None,
12323            };
12324
12325            let mut builder = TransactionGraphBuilder::new(graph_config);
12326            builder.add_journal_entries(entries);
12327            let graph = builder.build();
12328
12329            // Update stats
12330            stats.graph_node_count += graph.node_count();
12331            stats.graph_edge_count += graph.edge_count();
12332
12333            if let Some(pb) = &pb {
12334                pb.inc(40);
12335            }
12336
12337            // Export to each configured format
12338            for format in &self.config.graph_export.formats {
12339                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12340
12341                // Create output directory
12342                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12343                    warn!("Failed to create graph output directory: {}", e);
12344                    continue;
12345                }
12346
12347                match format {
12348                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12349                        let pyg_config = PyGExportConfig {
12350                            common: datasynth_graph::CommonExportConfig {
12351                                export_node_features: true,
12352                                export_edge_features: true,
12353                                export_node_labels: true,
12354                                export_edge_labels: true,
12355                                export_masks: true,
12356                                train_ratio: self.config.graph_export.train_ratio,
12357                                val_ratio: self.config.graph_export.validation_ratio,
12358                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12359                            },
12360                            one_hot_categoricals: false,
12361                        };
12362
12363                        let exporter = PyGExporter::new(pyg_config);
12364                        match exporter.export(&graph, &format_dir) {
12365                            Ok(metadata) => {
12366                                snapshot.exports.insert(
12367                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12368                                    GraphExportInfo {
12369                                        name: graph_type.name.clone(),
12370                                        format: "pytorch_geometric".to_string(),
12371                                        output_path: format_dir.clone(),
12372                                        node_count: metadata.num_nodes,
12373                                        edge_count: metadata.num_edges,
12374                                    },
12375                                );
12376                                snapshot.graph_count += 1;
12377                            }
12378                            Err(e) => {
12379                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12380                            }
12381                        }
12382                    }
12383                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12384                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12385
12386                        let neo4j_config = Neo4jExportConfig {
12387                            export_node_properties: true,
12388                            export_edge_properties: true,
12389                            export_features: true,
12390                            generate_cypher: true,
12391                            generate_admin_import: true,
12392                            database_name: "synth".to_string(),
12393                            cypher_batch_size: 1000,
12394                        };
12395
12396                        let exporter = Neo4jExporter::new(neo4j_config);
12397                        match exporter.export(&graph, &format_dir) {
12398                            Ok(metadata) => {
12399                                snapshot.exports.insert(
12400                                    format!("{}_{}", graph_type.name, "neo4j"),
12401                                    GraphExportInfo {
12402                                        name: graph_type.name.clone(),
12403                                        format: "neo4j".to_string(),
12404                                        output_path: format_dir.clone(),
12405                                        node_count: metadata.num_nodes,
12406                                        edge_count: metadata.num_edges,
12407                                    },
12408                                );
12409                                snapshot.graph_count += 1;
12410                            }
12411                            Err(e) => {
12412                                warn!("Failed to export Neo4j graph: {}", e);
12413                            }
12414                        }
12415                    }
12416                    datasynth_config::schema::GraphExportFormat::Dgl => {
12417                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12418
12419                        let dgl_config = DGLExportConfig {
12420                            common: datasynth_graph::CommonExportConfig {
12421                                export_node_features: true,
12422                                export_edge_features: true,
12423                                export_node_labels: true,
12424                                export_edge_labels: true,
12425                                export_masks: true,
12426                                train_ratio: self.config.graph_export.train_ratio,
12427                                val_ratio: self.config.graph_export.validation_ratio,
12428                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12429                            },
12430                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12431                            include_pickle_script: true, // DGL ecosystem standard helper
12432                        };
12433
12434                        let exporter = DGLExporter::new(dgl_config);
12435                        match exporter.export(&graph, &format_dir) {
12436                            Ok(metadata) => {
12437                                snapshot.exports.insert(
12438                                    format!("{}_{}", graph_type.name, "dgl"),
12439                                    GraphExportInfo {
12440                                        name: graph_type.name.clone(),
12441                                        format: "dgl".to_string(),
12442                                        output_path: format_dir.clone(),
12443                                        node_count: metadata.common.num_nodes,
12444                                        edge_count: metadata.common.num_edges,
12445                                    },
12446                                );
12447                                snapshot.graph_count += 1;
12448                            }
12449                            Err(e) => {
12450                                warn!("Failed to export DGL graph: {}", e);
12451                            }
12452                        }
12453                    }
12454                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12455                        use datasynth_graph::{
12456                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12457                        };
12458
12459                        let rustgraph_config = RustGraphExportConfig {
12460                            include_features: true,
12461                            include_temporal: true,
12462                            include_labels: true,
12463                            source_name: "datasynth".to_string(),
12464                            batch_id: None,
12465                            output_format: RustGraphOutputFormat::JsonLines,
12466                            export_node_properties: true,
12467                            export_edge_properties: true,
12468                            pretty_print: false,
12469                        };
12470
12471                        let exporter = RustGraphExporter::new(rustgraph_config);
12472                        match exporter.export(&graph, &format_dir) {
12473                            Ok(metadata) => {
12474                                snapshot.exports.insert(
12475                                    format!("{}_{}", graph_type.name, "rustgraph"),
12476                                    GraphExportInfo {
12477                                        name: graph_type.name.clone(),
12478                                        format: "rustgraph".to_string(),
12479                                        output_path: format_dir.clone(),
12480                                        node_count: metadata.num_nodes,
12481                                        edge_count: metadata.num_edges,
12482                                    },
12483                                );
12484                                snapshot.graph_count += 1;
12485                            }
12486                            Err(e) => {
12487                                warn!("Failed to export RustGraph: {}", e);
12488                            }
12489                        }
12490                    }
12491                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12492                        // Hypergraph export is handled separately in Phase 10b
12493                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12494                    }
12495                }
12496            }
12497
12498            if let Some(pb) = &pb {
12499                pb.inc(40);
12500            }
12501        }
12502
12503        stats.graph_export_count = snapshot.graph_count;
12504        snapshot.exported = snapshot.graph_count > 0;
12505
12506        if let Some(pb) = pb {
12507            pb.finish_with_message(format!(
12508                "Graphs exported: {} graphs ({} nodes, {} edges)",
12509                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12510            ));
12511        }
12512
12513        Ok(snapshot)
12514    }
12515
12516    /// Build additional graph types (banking, approval, entity) when relevant data
12517    /// is available. These run as a late phase because the data they need (banking
12518    /// snapshot, intercompany snapshot) is only generated after the main graph
12519    /// export phase.
12520    fn build_additional_graphs(
12521        &self,
12522        banking: &BankingSnapshot,
12523        intercompany: &IntercompanySnapshot,
12524        entries: &[JournalEntry],
12525        stats: &mut EnhancedGenerationStatistics,
12526    ) {
12527        let output_dir = self
12528            .output_path
12529            .clone()
12530            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12531        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12532
12533        // Banking graph: build when banking customers and transactions exist
12534        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12535            info!("Phase 10c: Building banking network graph");
12536            let config = BankingGraphConfig::default();
12537            let mut builder = BankingGraphBuilder::new(config);
12538            builder.add_customers(&banking.customers);
12539            builder.add_accounts(&banking.accounts, &banking.customers);
12540            builder.add_transactions(&banking.transactions);
12541            let graph = builder.build();
12542
12543            let node_count = graph.node_count();
12544            let edge_count = graph.edge_count();
12545            stats.graph_node_count += node_count;
12546            stats.graph_edge_count += edge_count;
12547
12548            // Export as PyG if configured
12549            for format in &self.config.graph_export.formats {
12550                if matches!(
12551                    format,
12552                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12553                ) {
12554                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12555                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12556                        warn!("Failed to create banking graph output dir: {}", e);
12557                        continue;
12558                    }
12559                    let pyg_config = PyGExportConfig::default();
12560                    let exporter = PyGExporter::new(pyg_config);
12561                    if let Err(e) = exporter.export(&graph, &format_dir) {
12562                        warn!("Failed to export banking graph as PyG: {}", e);
12563                    } else {
12564                        info!(
12565                            "Banking network graph exported: {} nodes, {} edges",
12566                            node_count, edge_count
12567                        );
12568                    }
12569                }
12570            }
12571        }
12572
12573        // Approval graph: build from journal entry approval workflows
12574        let approval_entries: Vec<_> = entries
12575            .iter()
12576            .filter(|je| je.header.approval_workflow.is_some())
12577            .collect();
12578
12579        if !approval_entries.is_empty() {
12580            info!(
12581                "Phase 10c: Building approval network graph ({} entries with approvals)",
12582                approval_entries.len()
12583            );
12584            let config = ApprovalGraphConfig::default();
12585            let mut builder = ApprovalGraphBuilder::new(config);
12586
12587            for je in &approval_entries {
12588                if let Some(ref wf) = je.header.approval_workflow {
12589                    for action in &wf.actions {
12590                        let record = datasynth_core::models::ApprovalRecord {
12591                            approval_id: format!(
12592                                "APR-{}-{}",
12593                                je.header.document_id, action.approval_level
12594                            ),
12595                            document_number: je.header.document_id.to_string(),
12596                            document_type: "JE".to_string(),
12597                            company_code: je.company_code().to_string(),
12598                            requester_id: wf.preparer_id.clone(),
12599                            requester_name: Some(wf.preparer_name.clone()),
12600                            approver_id: action.actor_id.clone(),
12601                            approver_name: action.actor_name.clone(),
12602                            approval_date: je.posting_date(),
12603                            action: format!("{:?}", action.action),
12604                            amount: wf.amount,
12605                            approval_limit: None,
12606                            comments: action.comments.clone(),
12607                            delegation_from: None,
12608                            is_auto_approved: false,
12609                        };
12610                        builder.add_approval(&record);
12611                    }
12612                }
12613            }
12614
12615            let graph = builder.build();
12616            let node_count = graph.node_count();
12617            let edge_count = graph.edge_count();
12618            stats.graph_node_count += node_count;
12619            stats.graph_edge_count += edge_count;
12620
12621            // Export as PyG if configured
12622            for format in &self.config.graph_export.formats {
12623                if matches!(
12624                    format,
12625                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12626                ) {
12627                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12628                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12629                        warn!("Failed to create approval graph output dir: {}", e);
12630                        continue;
12631                    }
12632                    let pyg_config = PyGExportConfig::default();
12633                    let exporter = PyGExporter::new(pyg_config);
12634                    if let Err(e) = exporter.export(&graph, &format_dir) {
12635                        warn!("Failed to export approval graph as PyG: {}", e);
12636                    } else {
12637                        info!(
12638                            "Approval network graph exported: {} nodes, {} edges",
12639                            node_count, edge_count
12640                        );
12641                    }
12642                }
12643            }
12644        }
12645
12646        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12647        if self.config.companies.len() >= 2 {
12648            info!(
12649                "Phase 10c: Building entity relationship graph ({} companies)",
12650                self.config.companies.len()
12651            );
12652
12653            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12654                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12655
12656            // Map CompanyConfig → Company objects
12657            let parent_code = &self.config.companies[0].code;
12658            let mut companies: Vec<datasynth_core::models::Company> =
12659                Vec::with_capacity(self.config.companies.len());
12660
12661            // First company is the parent
12662            let first = &self.config.companies[0];
12663            companies.push(datasynth_core::models::Company::parent(
12664                &first.code,
12665                &first.name,
12666                &first.country,
12667                &first.currency,
12668            ));
12669
12670            // Remaining companies are subsidiaries (100% owned by parent)
12671            for cc in self.config.companies.iter().skip(1) {
12672                companies.push(datasynth_core::models::Company::subsidiary(
12673                    &cc.code,
12674                    &cc.name,
12675                    &cc.country,
12676                    &cc.currency,
12677                    parent_code,
12678                    rust_decimal::Decimal::from(100),
12679                ));
12680            }
12681
12682            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12683            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12684                self.config
12685                    .companies
12686                    .iter()
12687                    .skip(1)
12688                    .enumerate()
12689                    .map(|(i, cc)| {
12690                        let mut rel =
12691                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12692                                format!("REL{:03}", i + 1),
12693                                parent_code.clone(),
12694                                cc.code.clone(),
12695                                rust_decimal::Decimal::from(100),
12696                                start_date,
12697                            );
12698                        rel.functional_currency = cc.currency.clone();
12699                        rel
12700                    })
12701                    .collect();
12702
12703            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12704            builder.add_companies(&companies);
12705            builder.add_ownership_relationships(&relationships);
12706
12707            // Thread IC matched-pair transaction edges into the entity graph
12708            for pair in &intercompany.matched_pairs {
12709                builder.add_intercompany_edge(
12710                    &pair.seller_company,
12711                    &pair.buyer_company,
12712                    pair.amount,
12713                    &format!("{:?}", pair.transaction_type),
12714                );
12715            }
12716
12717            let graph = builder.build();
12718            let node_count = graph.node_count();
12719            let edge_count = graph.edge_count();
12720            stats.graph_node_count += node_count;
12721            stats.graph_edge_count += edge_count;
12722
12723            // Export as PyG if configured
12724            for format in &self.config.graph_export.formats {
12725                if matches!(
12726                    format,
12727                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12728                ) {
12729                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12730                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12731                        warn!("Failed to create entity graph output dir: {}", e);
12732                        continue;
12733                    }
12734                    let pyg_config = PyGExportConfig::default();
12735                    let exporter = PyGExporter::new(pyg_config);
12736                    if let Err(e) = exporter.export(&graph, &format_dir) {
12737                        warn!("Failed to export entity graph as PyG: {}", e);
12738                    } else {
12739                        info!(
12740                            "Entity relationship graph exported: {} nodes, {} edges",
12741                            node_count, edge_count
12742                        );
12743                    }
12744                }
12745            }
12746        } else {
12747            debug!(
12748                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12749                self.config.companies.len()
12750            );
12751        }
12752    }
12753
12754    /// Export a multi-layer hypergraph for RustGraph integration.
12755    ///
12756    /// Builds a 3-layer hypergraph:
12757    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12758    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12759    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12760    #[allow(clippy::too_many_arguments)]
12761    fn export_hypergraph(
12762        &self,
12763        coa: &Arc<ChartOfAccounts>,
12764        entries: &[JournalEntry],
12765        document_flows: &DocumentFlowSnapshot,
12766        sourcing: &SourcingSnapshot,
12767        hr: &HrSnapshot,
12768        manufacturing: &ManufacturingSnapshot,
12769        banking: &BankingSnapshot,
12770        audit: &AuditSnapshot,
12771        financial_reporting: &FinancialReportingSnapshot,
12772        ocpm: &OcpmSnapshot,
12773        compliance: &ComplianceRegulationsSnapshot,
12774        stats: &mut EnhancedGenerationStatistics,
12775    ) -> SynthResult<HypergraphExportInfo> {
12776        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12777        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12778        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12779        use datasynth_graph::models::hypergraph::AggregationStrategy;
12780
12781        let hg_settings = &self.config.graph_export.hypergraph;
12782
12783        // Parse aggregation strategy from config string
12784        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12785            "truncate" => AggregationStrategy::Truncate,
12786            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12787            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12788            "importance_sample" => AggregationStrategy::ImportanceSample,
12789            _ => AggregationStrategy::PoolByCounterparty,
12790        };
12791
12792        let builder_config = HypergraphConfig {
12793            max_nodes: hg_settings.max_nodes,
12794            aggregation_strategy,
12795            include_coso: hg_settings.governance_layer.include_coso,
12796            include_controls: hg_settings.governance_layer.include_controls,
12797            include_sox: hg_settings.governance_layer.include_sox,
12798            include_vendors: hg_settings.governance_layer.include_vendors,
12799            include_customers: hg_settings.governance_layer.include_customers,
12800            include_employees: hg_settings.governance_layer.include_employees,
12801            include_p2p: hg_settings.process_layer.include_p2p,
12802            include_o2c: hg_settings.process_layer.include_o2c,
12803            include_s2c: hg_settings.process_layer.include_s2c,
12804            include_h2r: hg_settings.process_layer.include_h2r,
12805            include_mfg: hg_settings.process_layer.include_mfg,
12806            include_bank: hg_settings.process_layer.include_bank,
12807            include_audit: hg_settings.process_layer.include_audit,
12808            include_r2r: hg_settings.process_layer.include_r2r,
12809            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12810            docs_per_counterparty_threshold: hg_settings
12811                .process_layer
12812                .docs_per_counterparty_threshold,
12813            include_accounts: hg_settings.accounting_layer.include_accounts,
12814            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12815            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12816            include_compliance: self.config.compliance_regulations.enabled,
12817            include_tax: true,
12818            include_treasury: true,
12819            include_esg: true,
12820            include_project: true,
12821            include_intercompany: true,
12822            include_temporal_events: true,
12823        };
12824
12825        let mut builder = HypergraphBuilder::new(builder_config);
12826
12827        // Layer 1: Governance & Controls
12828        builder.add_coso_framework();
12829
12830        // Add controls if available (generated during JE generation)
12831        // Controls are generated per-company; we use the standard set
12832        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12833            let controls = InternalControl::standard_controls();
12834            builder.add_controls(&controls);
12835        }
12836
12837        // Add master data
12838        builder.add_vendors(&self.master_data.vendors);
12839        builder.add_customers(&self.master_data.customers);
12840        builder.add_employees(&self.master_data.employees);
12841
12842        // Layer 2: Process Events (all process families)
12843        builder.add_p2p_documents(
12844            &document_flows.purchase_orders,
12845            &document_flows.goods_receipts,
12846            &document_flows.vendor_invoices,
12847            &document_flows.payments,
12848        );
12849        builder.add_o2c_documents(
12850            &document_flows.sales_orders,
12851            &document_flows.deliveries,
12852            &document_flows.customer_invoices,
12853        );
12854        builder.add_s2c_documents(
12855            &sourcing.sourcing_projects,
12856            &sourcing.qualifications,
12857            &sourcing.rfx_events,
12858            &sourcing.bids,
12859            &sourcing.bid_evaluations,
12860            &sourcing.contracts,
12861        );
12862        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12863        builder.add_mfg_documents(
12864            &manufacturing.production_orders,
12865            &manufacturing.quality_inspections,
12866            &manufacturing.cycle_counts,
12867        );
12868        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12869        builder.add_audit_documents(
12870            &audit.engagements,
12871            &audit.workpapers,
12872            &audit.findings,
12873            &audit.evidence,
12874            &audit.risk_assessments,
12875            &audit.judgments,
12876            &audit.materiality_calculations,
12877            &audit.audit_opinions,
12878            &audit.going_concern_assessments,
12879        );
12880        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12881
12882        // OCPM events as hyperedges
12883        if let Some(ref event_log) = ocpm.event_log {
12884            builder.add_ocpm_events(event_log);
12885        }
12886
12887        // Compliance regulations as cross-layer nodes
12888        if self.config.compliance_regulations.enabled
12889            && hg_settings.governance_layer.include_controls
12890        {
12891            // Reconstruct ComplianceStandard objects from the registry
12892            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12893            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12894                .standard_records
12895                .iter()
12896                .filter_map(|r| {
12897                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12898                    registry.get(&sid).cloned()
12899                })
12900                .collect();
12901
12902            builder.add_compliance_regulations(
12903                &standards,
12904                &compliance.findings,
12905                &compliance.filings,
12906            );
12907        }
12908
12909        // Layer 3: Accounting Network
12910        builder.add_accounts(coa);
12911        builder.add_journal_entries_as_hyperedges(entries);
12912
12913        // Build the hypergraph
12914        let hypergraph = builder.build();
12915
12916        // Export
12917        let output_dir = self
12918            .output_path
12919            .clone()
12920            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12921        let hg_dir = output_dir
12922            .join(&self.config.graph_export.output_subdirectory)
12923            .join(&hg_settings.output_subdirectory);
12924
12925        // Branch on output format
12926        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12927            "unified" => {
12928                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12929                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12930                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12931                })?;
12932                (
12933                    metadata.num_nodes,
12934                    metadata.num_edges,
12935                    metadata.num_hyperedges,
12936                )
12937            }
12938            _ => {
12939                // "native" or any unrecognized format → use existing exporter
12940                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12941                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12942                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12943                })?;
12944                (
12945                    metadata.num_nodes,
12946                    metadata.num_edges,
12947                    metadata.num_hyperedges,
12948                )
12949            }
12950        };
12951
12952        // Stream to RustGraph ingest endpoint if configured
12953        #[cfg(feature = "streaming")]
12954        if let Some(ref target_url) = hg_settings.stream_target {
12955            use crate::stream_client::{StreamClient, StreamConfig};
12956            use std::io::Write as _;
12957
12958            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12959            let stream_config = StreamConfig {
12960                target_url: target_url.clone(),
12961                batch_size: hg_settings.stream_batch_size,
12962                api_key,
12963                ..StreamConfig::default()
12964            };
12965
12966            match StreamClient::new(stream_config) {
12967                Ok(mut client) => {
12968                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12969                    match exporter.export_to_writer(&hypergraph, &mut client) {
12970                        Ok(_) => {
12971                            if let Err(e) = client.flush() {
12972                                warn!("Failed to flush stream client: {}", e);
12973                            } else {
12974                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12975                            }
12976                        }
12977                        Err(e) => {
12978                            warn!("Streaming export failed: {}", e);
12979                        }
12980                    }
12981                }
12982                Err(e) => {
12983                    warn!("Failed to create stream client: {}", e);
12984                }
12985            }
12986        }
12987
12988        // Update stats
12989        stats.graph_node_count += num_nodes;
12990        stats.graph_edge_count += num_edges;
12991        stats.graph_export_count += 1;
12992
12993        Ok(HypergraphExportInfo {
12994            node_count: num_nodes,
12995            edge_count: num_edges,
12996            hyperedge_count: num_hyperedges,
12997            output_path: hg_dir,
12998        })
12999    }
13000
13001    /// Generate banking KYC/AML data.
13002    ///
13003    /// Creates banking customers, accounts, and transactions with AML typology injection.
13004    /// Uses the BankingOrchestrator from synth-banking crate.
13005    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13006        let pb = self.create_progress_bar(100, "Generating Banking Data");
13007
13008        // Build the banking orchestrator from config
13009        let orchestrator = BankingOrchestratorBuilder::new()
13010            .config(self.config.banking.clone())
13011            .seed(self.seed + 9000)
13012            .country_pack(self.primary_pack().clone())
13013            .build();
13014
13015        if let Some(pb) = &pb {
13016            pb.inc(10);
13017        }
13018
13019        // Generate the banking data
13020        let result = orchestrator.generate();
13021
13022        if let Some(pb) = &pb {
13023            pb.inc(90);
13024            pb.finish_with_message(format!(
13025                "Banking: {} customers, {} transactions",
13026                result.customers.len(),
13027                result.transactions.len()
13028            ));
13029        }
13030
13031        // Cross-reference banking customers with core master data so that
13032        // banking customer names align with the enterprise customer list.
13033        // We rotate through core customers, overlaying their name and country
13034        // onto the generated banking customers where possible.
13035        let mut banking_customers = result.customers;
13036        let core_customers = &self.master_data.customers;
13037        if !core_customers.is_empty() {
13038            for (i, bc) in banking_customers.iter_mut().enumerate() {
13039                let core = &core_customers[i % core_customers.len()];
13040                bc.name = CustomerName::business(&core.name);
13041                bc.residence_country = core.country.clone();
13042                bc.enterprise_customer_id = Some(core.customer_id.clone());
13043            }
13044            debug!(
13045                "Cross-referenced {} banking customers with {} core customers",
13046                banking_customers.len(),
13047                core_customers.len()
13048            );
13049        }
13050
13051        Ok(BankingSnapshot {
13052            customers: banking_customers,
13053            accounts: result.accounts,
13054            transactions: result.transactions,
13055            transaction_labels: result.transaction_labels,
13056            customer_labels: result.customer_labels,
13057            account_labels: result.account_labels,
13058            relationship_labels: result.relationship_labels,
13059            narratives: result.narratives,
13060            suspicious_count: result.stats.suspicious_count,
13061            scenario_count: result.scenarios.len(),
13062        })
13063    }
13064
13065    /// Calculate total transactions to generate.
13066    fn calculate_total_transactions(&self) -> u64 {
13067        let months = self.config.global.period_months as f64;
13068        self.config
13069            .companies
13070            .iter()
13071            .map(|c| {
13072                let annual = c.annual_transaction_volume.count() as f64;
13073                let weighted = annual * c.volume_weight;
13074                (weighted * months / 12.0) as u64
13075            })
13076            .sum()
13077    }
13078
13079    /// Create a progress bar if progress display is enabled.
13080    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13081        if !self.phase_config.show_progress {
13082            return None;
13083        }
13084
13085        let pb = if let Some(mp) = &self.multi_progress {
13086            mp.add(ProgressBar::new(total))
13087        } else {
13088            ProgressBar::new(total)
13089        };
13090
13091        pb.set_style(
13092            ProgressStyle::default_bar()
13093                .template(&format!(
13094                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13095                ))
13096                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13097                .progress_chars("#>-"),
13098        );
13099
13100        Some(pb)
13101    }
13102
13103    /// Get the generated chart of accounts.
13104    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13105        self.coa.clone()
13106    }
13107
13108    /// Get the generated master data.
13109    pub fn get_master_data(&self) -> &MasterDataSnapshot {
13110        &self.master_data
13111    }
13112
13113    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
13114    fn phase_compliance_regulations(
13115        &mut self,
13116        _stats: &mut EnhancedGenerationStatistics,
13117    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13118        if !self.phase_config.generate_compliance_regulations {
13119            return Ok(ComplianceRegulationsSnapshot::default());
13120        }
13121
13122        info!("Phase: Generating Compliance Regulations Data");
13123
13124        let cr_config = &self.config.compliance_regulations;
13125
13126        // Determine jurisdictions: from config or inferred from companies
13127        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13128            self.config
13129                .companies
13130                .iter()
13131                .map(|c| c.country.clone())
13132                .collect::<std::collections::HashSet<_>>()
13133                .into_iter()
13134                .collect()
13135        } else {
13136            cr_config.jurisdictions.clone()
13137        };
13138
13139        // Determine reference date
13140        let fallback_date =
13141            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13142        let reference_date = cr_config
13143            .reference_date
13144            .as_ref()
13145            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13146            .unwrap_or_else(|| {
13147                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13148                    .unwrap_or(fallback_date)
13149            });
13150
13151        // Generate standards registry data
13152        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13153        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13154        let cross_reference_records = reg_gen.generate_cross_reference_records();
13155        let jurisdiction_records =
13156            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13157
13158        info!(
13159            "  Standards: {} records, {} cross-references, {} jurisdictions",
13160            standard_records.len(),
13161            cross_reference_records.len(),
13162            jurisdiction_records.len()
13163        );
13164
13165        // Generate audit procedures (if enabled)
13166        let audit_procedures = if cr_config.audit_procedures.enabled {
13167            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13168                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13169                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13170                confidence_level: cr_config.audit_procedures.confidence_level,
13171                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13172            };
13173            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13174                self.seed + 9000,
13175                proc_config,
13176            );
13177            let registry = reg_gen.registry();
13178            let mut all_procs = Vec::new();
13179            for jurisdiction in &jurisdictions {
13180                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13181                all_procs.extend(procs);
13182            }
13183            info!("  Audit procedures: {}", all_procs.len());
13184            all_procs
13185        } else {
13186            Vec::new()
13187        };
13188
13189        // Generate compliance findings (if enabled)
13190        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13191            let finding_config =
13192                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13193                    finding_rate: cr_config.findings.finding_rate,
13194                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13195                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13196                    generate_remediation: cr_config.findings.generate_remediation,
13197                };
13198            let mut finding_gen =
13199                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13200                    self.seed + 9100,
13201                    finding_config,
13202                );
13203            let mut all_findings = Vec::new();
13204            for company in &self.config.companies {
13205                let company_findings =
13206                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13207                all_findings.extend(company_findings);
13208            }
13209            info!("  Compliance findings: {}", all_findings.len());
13210            all_findings
13211        } else {
13212            Vec::new()
13213        };
13214
13215        // Generate regulatory filings (if enabled)
13216        let filings = if cr_config.filings.enabled {
13217            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13218                filing_types: cr_config.filings.filing_types.clone(),
13219                generate_status_progression: cr_config.filings.generate_status_progression,
13220            };
13221            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13222                self.seed + 9200,
13223                filing_config,
13224            );
13225            let company_codes: Vec<String> = self
13226                .config
13227                .companies
13228                .iter()
13229                .map(|c| c.code.clone())
13230                .collect();
13231            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13232                .unwrap_or(fallback_date);
13233            let filings = filing_gen.generate_filings(
13234                &company_codes,
13235                &jurisdictions,
13236                start_date,
13237                self.config.global.period_months,
13238            );
13239            info!("  Regulatory filings: {}", filings.len());
13240            filings
13241        } else {
13242            Vec::new()
13243        };
13244
13245        // Build compliance graph (if enabled)
13246        let compliance_graph = if cr_config.graph.enabled {
13247            let graph_config = datasynth_graph::ComplianceGraphConfig {
13248                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13249                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13250                include_cross_references: cr_config.graph.include_cross_references,
13251                include_supersession_edges: cr_config.graph.include_supersession_edges,
13252                include_account_links: cr_config.graph.include_account_links,
13253                include_control_links: cr_config.graph.include_control_links,
13254                include_company_links: cr_config.graph.include_company_links,
13255            };
13256            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13257
13258            // Add standard nodes
13259            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13260                .iter()
13261                .map(|r| datasynth_graph::StandardNodeInput {
13262                    standard_id: r.standard_id.clone(),
13263                    title: r.title.clone(),
13264                    category: r.category.clone(),
13265                    domain: r.domain.clone(),
13266                    is_active: r.is_active,
13267                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13268                    applicable_account_types: r.applicable_account_types.clone(),
13269                    applicable_processes: r.applicable_processes.clone(),
13270                })
13271                .collect();
13272            builder.add_standards(&standard_inputs);
13273
13274            // Add jurisdiction nodes
13275            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13276                jurisdiction_records
13277                    .iter()
13278                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13279                        country_code: r.country_code.clone(),
13280                        country_name: r.country_name.clone(),
13281                        framework: r.accounting_framework.clone(),
13282                        standard_count: r.standard_count,
13283                        tax_rate: r.statutory_tax_rate,
13284                    })
13285                    .collect();
13286            builder.add_jurisdictions(&jurisdiction_inputs);
13287
13288            // Add cross-reference edges
13289            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13290                cross_reference_records
13291                    .iter()
13292                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13293                        from_standard: r.from_standard.clone(),
13294                        to_standard: r.to_standard.clone(),
13295                        relationship: r.relationship.clone(),
13296                        convergence_level: r.convergence_level,
13297                    })
13298                    .collect();
13299            builder.add_cross_references(&xref_inputs);
13300
13301            // Add jurisdiction→standard mappings
13302            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13303                .iter()
13304                .map(|r| datasynth_graph::JurisdictionMappingInput {
13305                    country_code: r.jurisdiction.clone(),
13306                    standard_id: r.standard_id.clone(),
13307                })
13308                .collect();
13309            builder.add_jurisdiction_mappings(&mapping_inputs);
13310
13311            // Add procedure nodes
13312            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13313                .iter()
13314                .map(|p| datasynth_graph::ProcedureNodeInput {
13315                    procedure_id: p.procedure_id.clone(),
13316                    standard_id: p.standard_id.clone(),
13317                    procedure_type: p.procedure_type.clone(),
13318                    sample_size: p.sample_size,
13319                    confidence_level: p.confidence_level,
13320                })
13321                .collect();
13322            builder.add_procedures(&proc_inputs);
13323
13324            // Add finding nodes
13325            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13326                .iter()
13327                .map(|f| datasynth_graph::FindingNodeInput {
13328                    finding_id: f.finding_id.to_string(),
13329                    standard_id: f
13330                        .related_standards
13331                        .first()
13332                        .map(|s| s.as_str().to_string())
13333                        .unwrap_or_default(),
13334                    severity: f.severity.to_string(),
13335                    deficiency_level: f.deficiency_level.to_string(),
13336                    severity_score: f.deficiency_level.severity_score(),
13337                    control_id: f.control_id.clone(),
13338                    affected_accounts: f.affected_accounts.clone(),
13339                })
13340                .collect();
13341            builder.add_findings(&finding_inputs);
13342
13343            // Cross-domain: link standards to accounts from chart of accounts
13344            if cr_config.graph.include_account_links {
13345                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13346                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13347                for std_record in &standard_records {
13348                    if let Some(std_obj) =
13349                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13350                            &std_record.standard_id,
13351                        ))
13352                    {
13353                        for acct_type in &std_obj.applicable_account_types {
13354                            account_links.push(datasynth_graph::AccountLinkInput {
13355                                standard_id: std_record.standard_id.clone(),
13356                                account_code: acct_type.clone(),
13357                                account_name: acct_type.clone(),
13358                            });
13359                        }
13360                    }
13361                }
13362                builder.add_account_links(&account_links);
13363            }
13364
13365            // Cross-domain: link standards to internal controls
13366            if cr_config.graph.include_control_links {
13367                let mut control_links = Vec::new();
13368                // SOX/PCAOB standards link to all controls
13369                let sox_like_ids: Vec<String> = standard_records
13370                    .iter()
13371                    .filter(|r| {
13372                        r.standard_id.starts_with("SOX")
13373                            || r.standard_id.starts_with("PCAOB-AS-2201")
13374                    })
13375                    .map(|r| r.standard_id.clone())
13376                    .collect();
13377                // Get control IDs from config (C001-C060 standard controls)
13378                let control_ids = [
13379                    ("C001", "Cash Controls"),
13380                    ("C002", "Large Transaction Approval"),
13381                    ("C010", "PO Approval"),
13382                    ("C011", "Three-Way Match"),
13383                    ("C020", "Revenue Recognition"),
13384                    ("C021", "Credit Check"),
13385                    ("C030", "Manual JE Approval"),
13386                    ("C031", "Period Close Review"),
13387                    ("C032", "Account Reconciliation"),
13388                    ("C040", "Payroll Processing"),
13389                    ("C050", "Fixed Asset Capitalization"),
13390                    ("C060", "Intercompany Elimination"),
13391                ];
13392                for sox_id in &sox_like_ids {
13393                    for (ctrl_id, ctrl_name) in &control_ids {
13394                        control_links.push(datasynth_graph::ControlLinkInput {
13395                            standard_id: sox_id.clone(),
13396                            control_id: ctrl_id.to_string(),
13397                            control_name: ctrl_name.to_string(),
13398                        });
13399                    }
13400                }
13401                builder.add_control_links(&control_links);
13402            }
13403
13404            // Cross-domain: filing nodes with company links
13405            if cr_config.graph.include_company_links {
13406                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13407                    .iter()
13408                    .enumerate()
13409                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13410                        filing_id: format!("F{:04}", i + 1),
13411                        filing_type: f.filing_type.to_string(),
13412                        company_code: f.company_code.clone(),
13413                        jurisdiction: f.jurisdiction.clone(),
13414                        status: format!("{:?}", f.status),
13415                    })
13416                    .collect();
13417                builder.add_filings(&filing_inputs);
13418            }
13419
13420            let graph = builder.build();
13421            info!(
13422                "  Compliance graph: {} nodes, {} edges",
13423                graph.nodes.len(),
13424                graph.edges.len()
13425            );
13426            Some(graph)
13427        } else {
13428            None
13429        };
13430
13431        self.check_resources_with_log("post-compliance-regulations")?;
13432
13433        Ok(ComplianceRegulationsSnapshot {
13434            standard_records,
13435            cross_reference_records,
13436            jurisdiction_records,
13437            audit_procedures,
13438            findings,
13439            filings,
13440            compliance_graph,
13441        })
13442    }
13443
13444    /// Build a lineage graph describing config → phase → output relationships.
13445    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13446        use super::lineage::LineageGraphBuilder;
13447
13448        let mut builder = LineageGraphBuilder::new();
13449
13450        // Config sections
13451        builder.add_config_section("config:global", "Global Config");
13452        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13453        builder.add_config_section("config:transactions", "Transaction Config");
13454
13455        // Generator phases
13456        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13457        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13458
13459        // Config → phase edges
13460        builder.configured_by("phase:coa", "config:chart_of_accounts");
13461        builder.configured_by("phase:je", "config:transactions");
13462
13463        // Output files
13464        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13465        builder.produced_by("output:je", "phase:je");
13466
13467        // Optional phases based on config
13468        if self.phase_config.generate_master_data {
13469            builder.add_config_section("config:master_data", "Master Data Config");
13470            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13471            builder.configured_by("phase:master_data", "config:master_data");
13472            builder.input_to("phase:master_data", "phase:je");
13473        }
13474
13475        if self.phase_config.generate_document_flows {
13476            builder.add_config_section("config:document_flows", "Document Flow Config");
13477            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13478            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13479            builder.configured_by("phase:p2p", "config:document_flows");
13480            builder.configured_by("phase:o2c", "config:document_flows");
13481
13482            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13483            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13484            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13485            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13486            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13487
13488            builder.produced_by("output:po", "phase:p2p");
13489            builder.produced_by("output:gr", "phase:p2p");
13490            builder.produced_by("output:vi", "phase:p2p");
13491            builder.produced_by("output:so", "phase:o2c");
13492            builder.produced_by("output:ci", "phase:o2c");
13493        }
13494
13495        if self.phase_config.inject_anomalies {
13496            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13497            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13498            builder.configured_by("phase:anomaly", "config:fraud");
13499            builder.add_output_file(
13500                "output:labels",
13501                "Anomaly Labels",
13502                "labels/anomaly_labels.csv",
13503            );
13504            builder.produced_by("output:labels", "phase:anomaly");
13505        }
13506
13507        if self.phase_config.generate_audit {
13508            builder.add_config_section("config:audit", "Audit Config");
13509            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13510            builder.configured_by("phase:audit", "config:audit");
13511        }
13512
13513        if self.phase_config.generate_banking {
13514            builder.add_config_section("config:banking", "Banking Config");
13515            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13516            builder.configured_by("phase:banking", "config:banking");
13517        }
13518
13519        if self.config.llm.enabled {
13520            builder.add_config_section("config:llm", "LLM Enrichment Config");
13521            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13522            builder.configured_by("phase:llm_enrichment", "config:llm");
13523        }
13524
13525        if self.config.diffusion.enabled {
13526            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13527            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13528            builder.configured_by("phase:diffusion", "config:diffusion");
13529        }
13530
13531        if self.config.causal.enabled {
13532            builder.add_config_section("config:causal", "Causal Generation Config");
13533            builder.add_generator_phase("phase:causal", "Causal Overlay");
13534            builder.configured_by("phase:causal", "config:causal");
13535        }
13536
13537        builder.build()
13538    }
13539
13540    // -----------------------------------------------------------------------
13541    // Trial-balance helpers used to replace hardcoded proxy values
13542    // -----------------------------------------------------------------------
13543
13544    /// Compute total revenue for a company from its journal entries.
13545    ///
13546    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13547    /// net credits on all revenue-account lines filtered to `company_code`.
13548    fn compute_company_revenue(
13549        entries: &[JournalEntry],
13550        company_code: &str,
13551    ) -> rust_decimal::Decimal {
13552        use rust_decimal::Decimal;
13553        let mut revenue = Decimal::ZERO;
13554        for je in entries {
13555            if je.header.company_code != company_code {
13556                continue;
13557            }
13558            for line in &je.lines {
13559                if line.gl_account.starts_with('4') {
13560                    // Revenue is credit-normal
13561                    revenue += line.credit_amount - line.debit_amount;
13562                }
13563            }
13564        }
13565        revenue.max(Decimal::ZERO)
13566    }
13567
13568    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13569    ///
13570    /// Asset accounts start with "1"; liability accounts start with "2".
13571    fn compute_entity_net_assets(
13572        entries: &[JournalEntry],
13573        entity_code: &str,
13574    ) -> rust_decimal::Decimal {
13575        use rust_decimal::Decimal;
13576        let mut asset_net = Decimal::ZERO;
13577        let mut liability_net = Decimal::ZERO;
13578        for je in entries {
13579            if je.header.company_code != entity_code {
13580                continue;
13581            }
13582            for line in &je.lines {
13583                if line.gl_account.starts_with('1') {
13584                    asset_net += line.debit_amount - line.credit_amount;
13585                } else if line.gl_account.starts_with('2') {
13586                    liability_net += line.credit_amount - line.debit_amount;
13587                }
13588            }
13589        }
13590        asset_net - liability_net
13591    }
13592}
13593
13594/// Get the directory name for a graph export format.
13595fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13596    match format {
13597        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13598        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13599        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13600        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13601        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13602    }
13603}
13604
13605/// Aggregate journal entry lines into per-account trial balance rows.
13606///
13607/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13608/// debit/credit totals and a net balance (debit minus credit).
13609fn compute_trial_balance_entries(
13610    entries: &[JournalEntry],
13611    entity_code: &str,
13612    fiscal_year: i32,
13613    coa: Option<&ChartOfAccounts>,
13614) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13615    use std::collections::BTreeMap;
13616
13617    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13618        BTreeMap::new();
13619
13620    for je in entries {
13621        for line in &je.lines {
13622            let entry = balances.entry(line.account_code.clone()).or_default();
13623            entry.0 += line.debit_amount;
13624            entry.1 += line.credit_amount;
13625        }
13626    }
13627
13628    balances
13629        .into_iter()
13630        .map(
13631            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13632                account_description: coa
13633                    .and_then(|c| c.get_account(&account_code))
13634                    .map(|a| a.description().to_string())
13635                    .unwrap_or_else(|| account_code.clone()),
13636                account_code,
13637                debit_balance: debit,
13638                credit_balance: credit,
13639                net_balance: debit - credit,
13640                entity_code: entity_code.to_string(),
13641                period: format!("FY{}", fiscal_year),
13642            },
13643        )
13644        .collect()
13645}
13646
13647#[cfg(test)]
13648#[allow(clippy::unwrap_used)]
13649mod tests {
13650    use super::*;
13651    use datasynth_config::schema::*;
13652
13653    fn create_test_config() -> GeneratorConfig {
13654        GeneratorConfig {
13655            global: GlobalConfig {
13656                industry: IndustrySector::Manufacturing,
13657                start_date: "2024-01-01".to_string(),
13658                period_months: 1,
13659                seed: Some(42),
13660                parallel: false,
13661                group_currency: "USD".to_string(),
13662                presentation_currency: None,
13663                worker_threads: 0,
13664                memory_limit_mb: 0,
13665                fiscal_year_months: None,
13666            },
13667            companies: vec![CompanyConfig {
13668                code: "1000".to_string(),
13669                name: "Test Company".to_string(),
13670                currency: "USD".to_string(),
13671                functional_currency: None,
13672                country: "US".to_string(),
13673                annual_transaction_volume: TransactionVolume::TenK,
13674                volume_weight: 1.0,
13675                fiscal_year_variant: "K4".to_string(),
13676            }],
13677            chart_of_accounts: ChartOfAccountsConfig {
13678                complexity: CoAComplexity::Small,
13679                industry_specific: true,
13680                custom_accounts: None,
13681                min_hierarchy_depth: 2,
13682                max_hierarchy_depth: 4,
13683            },
13684            transactions: TransactionConfig::default(),
13685            output: OutputConfig::default(),
13686            fraud: FraudConfig::default(),
13687            internal_controls: InternalControlsConfig::default(),
13688            business_processes: BusinessProcessConfig::default(),
13689            user_personas: UserPersonaConfig::default(),
13690            templates: TemplateConfig::default(),
13691            approval: ApprovalConfig::default(),
13692            departments: DepartmentConfig::default(),
13693            master_data: MasterDataConfig::default(),
13694            document_flows: DocumentFlowConfig::default(),
13695            intercompany: IntercompanyConfig::default(),
13696            balance: BalanceConfig::default(),
13697            ocpm: OcpmConfig::default(),
13698            audit: AuditGenerationConfig::default(),
13699            banking: datasynth_banking::BankingConfig::default(),
13700            data_quality: DataQualitySchemaConfig::default(),
13701            scenario: ScenarioConfig::default(),
13702            temporal: TemporalDriftConfig::default(),
13703            graph_export: GraphExportConfig::default(),
13704            streaming: StreamingSchemaConfig::default(),
13705            rate_limit: RateLimitSchemaConfig::default(),
13706            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13707            relationships: RelationshipSchemaConfig::default(),
13708            accounting_standards: AccountingStandardsConfig::default(),
13709            audit_standards: AuditStandardsConfig::default(),
13710            distributions: Default::default(),
13711            temporal_patterns: Default::default(),
13712            vendor_network: VendorNetworkSchemaConfig::default(),
13713            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13714            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13715            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13716            organizational_events: OrganizationalEventsSchemaConfig::default(),
13717            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13718            market_drift: MarketDriftSchemaConfig::default(),
13719            drift_labeling: DriftLabelingSchemaConfig::default(),
13720            anomaly_injection: Default::default(),
13721            industry_specific: Default::default(),
13722            fingerprint_privacy: Default::default(),
13723            quality_gates: Default::default(),
13724            compliance: Default::default(),
13725            webhooks: Default::default(),
13726            llm: Default::default(),
13727            diffusion: Default::default(),
13728            causal: Default::default(),
13729            source_to_pay: Default::default(),
13730            financial_reporting: Default::default(),
13731            hr: Default::default(),
13732            manufacturing: Default::default(),
13733            sales_quotes: Default::default(),
13734            tax: Default::default(),
13735            treasury: Default::default(),
13736            project_accounting: Default::default(),
13737            esg: Default::default(),
13738            country_packs: None,
13739            scenarios: Default::default(),
13740            session: Default::default(),
13741            compliance_regulations: Default::default(),
13742        }
13743    }
13744
13745    #[test]
13746    fn test_enhanced_orchestrator_creation() {
13747        let config = create_test_config();
13748        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13749        assert!(orchestrator.is_ok());
13750    }
13751
13752    #[test]
13753    fn test_minimal_generation() {
13754        let config = create_test_config();
13755        let phase_config = PhaseConfig {
13756            generate_master_data: false,
13757            generate_document_flows: false,
13758            generate_journal_entries: true,
13759            inject_anomalies: false,
13760            show_progress: false,
13761            ..Default::default()
13762        };
13763
13764        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13765        let result = orchestrator.generate();
13766
13767        assert!(result.is_ok());
13768        let result = result.unwrap();
13769        assert!(!result.journal_entries.is_empty());
13770    }
13771
13772    #[test]
13773    fn test_master_data_generation() {
13774        let config = create_test_config();
13775        let phase_config = PhaseConfig {
13776            generate_master_data: true,
13777            generate_document_flows: false,
13778            generate_journal_entries: false,
13779            inject_anomalies: false,
13780            show_progress: false,
13781            vendors_per_company: 5,
13782            customers_per_company: 5,
13783            materials_per_company: 10,
13784            assets_per_company: 5,
13785            employees_per_company: 10,
13786            ..Default::default()
13787        };
13788
13789        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13790        let result = orchestrator.generate().unwrap();
13791
13792        assert!(!result.master_data.vendors.is_empty());
13793        assert!(!result.master_data.customers.is_empty());
13794        assert!(!result.master_data.materials.is_empty());
13795    }
13796
13797    #[test]
13798    fn test_document_flow_generation() {
13799        let config = create_test_config();
13800        let phase_config = PhaseConfig {
13801            generate_master_data: true,
13802            generate_document_flows: true,
13803            generate_journal_entries: false,
13804            inject_anomalies: false,
13805            inject_data_quality: false,
13806            validate_balances: false,
13807            generate_ocpm_events: false,
13808            show_progress: false,
13809            vendors_per_company: 5,
13810            customers_per_company: 5,
13811            materials_per_company: 10,
13812            assets_per_company: 5,
13813            employees_per_company: 10,
13814            p2p_chains: 5,
13815            o2c_chains: 5,
13816            ..Default::default()
13817        };
13818
13819        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13820        let result = orchestrator.generate().unwrap();
13821
13822        // Should have generated P2P and O2C chains
13823        assert!(!result.document_flows.p2p_chains.is_empty());
13824        assert!(!result.document_flows.o2c_chains.is_empty());
13825
13826        // Flattened documents should be populated
13827        assert!(!result.document_flows.purchase_orders.is_empty());
13828        assert!(!result.document_flows.sales_orders.is_empty());
13829    }
13830
13831    #[test]
13832    fn test_anomaly_injection() {
13833        let config = create_test_config();
13834        let phase_config = PhaseConfig {
13835            generate_master_data: false,
13836            generate_document_flows: false,
13837            generate_journal_entries: true,
13838            inject_anomalies: true,
13839            show_progress: false,
13840            ..Default::default()
13841        };
13842
13843        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13844        let result = orchestrator.generate().unwrap();
13845
13846        // Should have journal entries
13847        assert!(!result.journal_entries.is_empty());
13848
13849        // With ~833 entries and 2% rate, expect some anomalies
13850        // Note: This is probabilistic, so we just verify the structure exists
13851        assert!(result.anomaly_labels.summary.is_some());
13852    }
13853
13854    #[test]
13855    fn test_full_generation_pipeline() {
13856        let config = create_test_config();
13857        let phase_config = PhaseConfig {
13858            generate_master_data: true,
13859            generate_document_flows: true,
13860            generate_journal_entries: true,
13861            inject_anomalies: false,
13862            inject_data_quality: false,
13863            validate_balances: true,
13864            generate_ocpm_events: false,
13865            show_progress: false,
13866            vendors_per_company: 3,
13867            customers_per_company: 3,
13868            materials_per_company: 5,
13869            assets_per_company: 3,
13870            employees_per_company: 5,
13871            p2p_chains: 3,
13872            o2c_chains: 3,
13873            ..Default::default()
13874        };
13875
13876        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13877        let result = orchestrator.generate().unwrap();
13878
13879        // All phases should have results
13880        assert!(!result.master_data.vendors.is_empty());
13881        assert!(!result.master_data.customers.is_empty());
13882        assert!(!result.document_flows.p2p_chains.is_empty());
13883        assert!(!result.document_flows.o2c_chains.is_empty());
13884        assert!(!result.journal_entries.is_empty());
13885        assert!(result.statistics.accounts_count > 0);
13886
13887        // Subledger linking should have run
13888        assert!(!result.subledger.ap_invoices.is_empty());
13889        assert!(!result.subledger.ar_invoices.is_empty());
13890
13891        // Balance validation should have run
13892        assert!(result.balance_validation.validated);
13893        assert!(result.balance_validation.entries_processed > 0);
13894    }
13895
13896    #[test]
13897    fn test_subledger_linking() {
13898        let config = create_test_config();
13899        let phase_config = PhaseConfig {
13900            generate_master_data: true,
13901            generate_document_flows: true,
13902            generate_journal_entries: false,
13903            inject_anomalies: false,
13904            inject_data_quality: false,
13905            validate_balances: false,
13906            generate_ocpm_events: false,
13907            show_progress: false,
13908            vendors_per_company: 5,
13909            customers_per_company: 5,
13910            materials_per_company: 10,
13911            assets_per_company: 3,
13912            employees_per_company: 5,
13913            p2p_chains: 5,
13914            o2c_chains: 5,
13915            ..Default::default()
13916        };
13917
13918        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13919        let result = orchestrator.generate().unwrap();
13920
13921        // Should have document flows
13922        assert!(!result.document_flows.vendor_invoices.is_empty());
13923        assert!(!result.document_flows.customer_invoices.is_empty());
13924
13925        // Subledger should be linked from document flows
13926        assert!(!result.subledger.ap_invoices.is_empty());
13927        assert!(!result.subledger.ar_invoices.is_empty());
13928
13929        // AP invoices count should match vendor invoices count
13930        assert_eq!(
13931            result.subledger.ap_invoices.len(),
13932            result.document_flows.vendor_invoices.len()
13933        );
13934
13935        // AR invoices count should match customer invoices count
13936        assert_eq!(
13937            result.subledger.ar_invoices.len(),
13938            result.document_flows.customer_invoices.len()
13939        );
13940
13941        // Statistics should reflect subledger counts
13942        assert_eq!(
13943            result.statistics.ap_invoice_count,
13944            result.subledger.ap_invoices.len()
13945        );
13946        assert_eq!(
13947            result.statistics.ar_invoice_count,
13948            result.subledger.ar_invoices.len()
13949        );
13950    }
13951
13952    #[test]
13953    fn test_balance_validation() {
13954        let config = create_test_config();
13955        let phase_config = PhaseConfig {
13956            generate_master_data: false,
13957            generate_document_flows: false,
13958            generate_journal_entries: true,
13959            inject_anomalies: false,
13960            validate_balances: true,
13961            show_progress: false,
13962            ..Default::default()
13963        };
13964
13965        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13966        let result = orchestrator.generate().unwrap();
13967
13968        // Balance validation should run
13969        assert!(result.balance_validation.validated);
13970        assert!(result.balance_validation.entries_processed > 0);
13971
13972        // Generated JEs should be balanced (no unbalanced entries)
13973        assert!(!result.balance_validation.has_unbalanced_entries);
13974
13975        // Total debits should equal total credits
13976        assert_eq!(
13977            result.balance_validation.total_debits,
13978            result.balance_validation.total_credits
13979        );
13980    }
13981
13982    #[test]
13983    fn test_statistics_accuracy() {
13984        let config = create_test_config();
13985        let phase_config = PhaseConfig {
13986            generate_master_data: true,
13987            generate_document_flows: false,
13988            generate_journal_entries: true,
13989            inject_anomalies: false,
13990            show_progress: false,
13991            vendors_per_company: 10,
13992            customers_per_company: 20,
13993            materials_per_company: 15,
13994            assets_per_company: 5,
13995            employees_per_company: 8,
13996            ..Default::default()
13997        };
13998
13999        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14000        let result = orchestrator.generate().unwrap();
14001
14002        // Statistics should match actual data
14003        assert_eq!(
14004            result.statistics.vendor_count,
14005            result.master_data.vendors.len()
14006        );
14007        assert_eq!(
14008            result.statistics.customer_count,
14009            result.master_data.customers.len()
14010        );
14011        assert_eq!(
14012            result.statistics.material_count,
14013            result.master_data.materials.len()
14014        );
14015        assert_eq!(
14016            result.statistics.total_entries as usize,
14017            result.journal_entries.len()
14018        );
14019    }
14020
14021    #[test]
14022    fn test_phase_config_defaults() {
14023        let config = PhaseConfig::default();
14024        assert!(config.generate_master_data);
14025        assert!(config.generate_document_flows);
14026        assert!(config.generate_journal_entries);
14027        assert!(!config.inject_anomalies);
14028        assert!(config.validate_balances);
14029        assert!(config.show_progress);
14030        assert!(config.vendors_per_company > 0);
14031        assert!(config.customers_per_company > 0);
14032    }
14033
14034    #[test]
14035    fn test_get_coa_before_generation() {
14036        let config = create_test_config();
14037        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14038
14039        // Before generation, CoA should be None
14040        assert!(orchestrator.get_coa().is_none());
14041    }
14042
14043    #[test]
14044    fn test_get_coa_after_generation() {
14045        let config = create_test_config();
14046        let phase_config = PhaseConfig {
14047            generate_master_data: false,
14048            generate_document_flows: false,
14049            generate_journal_entries: true,
14050            inject_anomalies: false,
14051            show_progress: false,
14052            ..Default::default()
14053        };
14054
14055        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14056        let _ = orchestrator.generate().unwrap();
14057
14058        // After generation, CoA should be available
14059        assert!(orchestrator.get_coa().is_some());
14060    }
14061
14062    #[test]
14063    fn test_get_master_data() {
14064        let config = create_test_config();
14065        let phase_config = PhaseConfig {
14066            generate_master_data: true,
14067            generate_document_flows: false,
14068            generate_journal_entries: false,
14069            inject_anomalies: false,
14070            show_progress: false,
14071            vendors_per_company: 5,
14072            customers_per_company: 5,
14073            materials_per_company: 5,
14074            assets_per_company: 5,
14075            employees_per_company: 5,
14076            ..Default::default()
14077        };
14078
14079        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14080        let result = orchestrator.generate().unwrap();
14081
14082        // After generate(), master_data is moved into the result
14083        assert!(!result.master_data.vendors.is_empty());
14084    }
14085
14086    #[test]
14087    fn test_with_progress_builder() {
14088        let config = create_test_config();
14089        let orchestrator = EnhancedOrchestrator::with_defaults(config)
14090            .unwrap()
14091            .with_progress(false);
14092
14093        // Should still work without progress
14094        assert!(!orchestrator.phase_config.show_progress);
14095    }
14096
14097    #[test]
14098    fn test_multi_company_generation() {
14099        let mut config = create_test_config();
14100        config.companies.push(CompanyConfig {
14101            code: "2000".to_string(),
14102            name: "Subsidiary".to_string(),
14103            currency: "EUR".to_string(),
14104            functional_currency: None,
14105            country: "DE".to_string(),
14106            annual_transaction_volume: TransactionVolume::TenK,
14107            volume_weight: 0.5,
14108            fiscal_year_variant: "K4".to_string(),
14109        });
14110
14111        let phase_config = PhaseConfig {
14112            generate_master_data: true,
14113            generate_document_flows: false,
14114            generate_journal_entries: true,
14115            inject_anomalies: false,
14116            show_progress: false,
14117            vendors_per_company: 5,
14118            customers_per_company: 5,
14119            materials_per_company: 5,
14120            assets_per_company: 5,
14121            employees_per_company: 5,
14122            ..Default::default()
14123        };
14124
14125        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14126        let result = orchestrator.generate().unwrap();
14127
14128        // Should have master data for both companies
14129        assert!(result.statistics.vendor_count >= 10); // 5 per company
14130        assert!(result.statistics.customer_count >= 10);
14131        assert!(result.statistics.companies_count == 2);
14132    }
14133
14134    #[test]
14135    fn test_empty_master_data_skips_document_flows() {
14136        let config = create_test_config();
14137        let phase_config = PhaseConfig {
14138            generate_master_data: false,   // Skip master data
14139            generate_document_flows: true, // Try to generate flows
14140            generate_journal_entries: false,
14141            inject_anomalies: false,
14142            show_progress: false,
14143            ..Default::default()
14144        };
14145
14146        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14147        let result = orchestrator.generate().unwrap();
14148
14149        // Without master data, document flows should be empty
14150        assert!(result.document_flows.p2p_chains.is_empty());
14151        assert!(result.document_flows.o2c_chains.is_empty());
14152    }
14153
14154    #[test]
14155    fn test_journal_entry_line_item_count() {
14156        let config = create_test_config();
14157        let phase_config = PhaseConfig {
14158            generate_master_data: false,
14159            generate_document_flows: false,
14160            generate_journal_entries: true,
14161            inject_anomalies: false,
14162            show_progress: false,
14163            ..Default::default()
14164        };
14165
14166        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14167        let result = orchestrator.generate().unwrap();
14168
14169        // Total line items should match sum of all entry line counts
14170        let calculated_line_items: u64 = result
14171            .journal_entries
14172            .iter()
14173            .map(|e| e.line_count() as u64)
14174            .sum();
14175        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14176    }
14177
14178    #[test]
14179    fn test_audit_generation() {
14180        let config = create_test_config();
14181        let phase_config = PhaseConfig {
14182            generate_master_data: false,
14183            generate_document_flows: false,
14184            generate_journal_entries: true,
14185            inject_anomalies: false,
14186            show_progress: false,
14187            generate_audit: true,
14188            audit_engagements: 2,
14189            workpapers_per_engagement: 5,
14190            evidence_per_workpaper: 2,
14191            risks_per_engagement: 3,
14192            findings_per_engagement: 2,
14193            judgments_per_engagement: 2,
14194            ..Default::default()
14195        };
14196
14197        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14198        let result = orchestrator.generate().unwrap();
14199
14200        // Should have generated audit data
14201        assert_eq!(result.audit.engagements.len(), 2);
14202        assert!(!result.audit.workpapers.is_empty());
14203        assert!(!result.audit.evidence.is_empty());
14204        assert!(!result.audit.risk_assessments.is_empty());
14205        assert!(!result.audit.findings.is_empty());
14206        assert!(!result.audit.judgments.is_empty());
14207
14208        // New ISA entity collections should also be populated
14209        assert!(
14210            !result.audit.confirmations.is_empty(),
14211            "ISA 505 confirmations should be generated"
14212        );
14213        assert!(
14214            !result.audit.confirmation_responses.is_empty(),
14215            "ISA 505 confirmation responses should be generated"
14216        );
14217        assert!(
14218            !result.audit.procedure_steps.is_empty(),
14219            "ISA 330 procedure steps should be generated"
14220        );
14221        // Samples may or may not be generated depending on workpaper sampling methods
14222        assert!(
14223            !result.audit.analytical_results.is_empty(),
14224            "ISA 520 analytical procedures should be generated"
14225        );
14226        assert!(
14227            !result.audit.ia_functions.is_empty(),
14228            "ISA 610 IA functions should be generated (one per engagement)"
14229        );
14230        assert!(
14231            !result.audit.related_parties.is_empty(),
14232            "ISA 550 related parties should be generated"
14233        );
14234
14235        // Statistics should match
14236        assert_eq!(
14237            result.statistics.audit_engagement_count,
14238            result.audit.engagements.len()
14239        );
14240        assert_eq!(
14241            result.statistics.audit_workpaper_count,
14242            result.audit.workpapers.len()
14243        );
14244        assert_eq!(
14245            result.statistics.audit_evidence_count,
14246            result.audit.evidence.len()
14247        );
14248        assert_eq!(
14249            result.statistics.audit_risk_count,
14250            result.audit.risk_assessments.len()
14251        );
14252        assert_eq!(
14253            result.statistics.audit_finding_count,
14254            result.audit.findings.len()
14255        );
14256        assert_eq!(
14257            result.statistics.audit_judgment_count,
14258            result.audit.judgments.len()
14259        );
14260        assert_eq!(
14261            result.statistics.audit_confirmation_count,
14262            result.audit.confirmations.len()
14263        );
14264        assert_eq!(
14265            result.statistics.audit_confirmation_response_count,
14266            result.audit.confirmation_responses.len()
14267        );
14268        assert_eq!(
14269            result.statistics.audit_procedure_step_count,
14270            result.audit.procedure_steps.len()
14271        );
14272        assert_eq!(
14273            result.statistics.audit_sample_count,
14274            result.audit.samples.len()
14275        );
14276        assert_eq!(
14277            result.statistics.audit_analytical_result_count,
14278            result.audit.analytical_results.len()
14279        );
14280        assert_eq!(
14281            result.statistics.audit_ia_function_count,
14282            result.audit.ia_functions.len()
14283        );
14284        assert_eq!(
14285            result.statistics.audit_ia_report_count,
14286            result.audit.ia_reports.len()
14287        );
14288        assert_eq!(
14289            result.statistics.audit_related_party_count,
14290            result.audit.related_parties.len()
14291        );
14292        assert_eq!(
14293            result.statistics.audit_related_party_transaction_count,
14294            result.audit.related_party_transactions.len()
14295        );
14296    }
14297
14298    #[test]
14299    fn test_new_phases_disabled_by_default() {
14300        let config = create_test_config();
14301        // Verify new config fields default to disabled
14302        assert!(!config.llm.enabled);
14303        assert!(!config.diffusion.enabled);
14304        assert!(!config.causal.enabled);
14305
14306        let phase_config = PhaseConfig {
14307            generate_master_data: false,
14308            generate_document_flows: false,
14309            generate_journal_entries: true,
14310            inject_anomalies: false,
14311            show_progress: false,
14312            ..Default::default()
14313        };
14314
14315        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14316        let result = orchestrator.generate().unwrap();
14317
14318        // All new phase statistics should be zero when disabled
14319        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14320        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14321        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14322        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14323        assert_eq!(result.statistics.causal_generation_ms, 0);
14324        assert_eq!(result.statistics.causal_samples_generated, 0);
14325        assert!(result.statistics.causal_validation_passed.is_none());
14326        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14327        assert!(result.counterfactual_pairs.is_empty());
14328    }
14329
14330    #[test]
14331    fn test_counterfactual_generation_enabled() {
14332        let config = create_test_config();
14333        let phase_config = PhaseConfig {
14334            generate_master_data: false,
14335            generate_document_flows: false,
14336            generate_journal_entries: true,
14337            inject_anomalies: false,
14338            show_progress: false,
14339            generate_counterfactuals: true,
14340            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14341            ..Default::default()
14342        };
14343
14344        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14345        let result = orchestrator.generate().unwrap();
14346
14347        // With JE generation enabled, counterfactual pairs should be generated
14348        if !result.journal_entries.is_empty() {
14349            assert_eq!(
14350                result.counterfactual_pairs.len(),
14351                result.journal_entries.len()
14352            );
14353            assert_eq!(
14354                result.statistics.counterfactual_pair_count,
14355                result.journal_entries.len()
14356            );
14357            // Each pair should have a distinct pair_id
14358            let ids: std::collections::HashSet<_> = result
14359                .counterfactual_pairs
14360                .iter()
14361                .map(|p| p.pair_id.clone())
14362                .collect();
14363            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14364        }
14365    }
14366
14367    #[test]
14368    fn test_llm_enrichment_enabled() {
14369        let mut config = create_test_config();
14370        config.llm.enabled = true;
14371        config.llm.max_vendor_enrichments = 3;
14372
14373        let phase_config = PhaseConfig {
14374            generate_master_data: true,
14375            generate_document_flows: false,
14376            generate_journal_entries: false,
14377            inject_anomalies: false,
14378            show_progress: false,
14379            vendors_per_company: 5,
14380            customers_per_company: 3,
14381            materials_per_company: 3,
14382            assets_per_company: 3,
14383            employees_per_company: 3,
14384            ..Default::default()
14385        };
14386
14387        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14388        let result = orchestrator.generate().unwrap();
14389
14390        // LLM enrichment should have run
14391        assert!(result.statistics.llm_vendors_enriched > 0);
14392        assert!(result.statistics.llm_vendors_enriched <= 3);
14393    }
14394
14395    #[test]
14396    fn test_diffusion_enhancement_enabled() {
14397        let mut config = create_test_config();
14398        config.diffusion.enabled = true;
14399        config.diffusion.n_steps = 50;
14400        config.diffusion.sample_size = 20;
14401
14402        let phase_config = PhaseConfig {
14403            generate_master_data: false,
14404            generate_document_flows: false,
14405            generate_journal_entries: true,
14406            inject_anomalies: false,
14407            show_progress: false,
14408            ..Default::default()
14409        };
14410
14411        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14412        let result = orchestrator.generate().unwrap();
14413
14414        // Diffusion phase should have generated samples
14415        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14416    }
14417
14418    #[test]
14419    fn test_causal_overlay_enabled() {
14420        let mut config = create_test_config();
14421        config.causal.enabled = true;
14422        config.causal.template = "fraud_detection".to_string();
14423        config.causal.sample_size = 100;
14424        config.causal.validate = true;
14425
14426        let phase_config = PhaseConfig {
14427            generate_master_data: false,
14428            generate_document_flows: false,
14429            generate_journal_entries: true,
14430            inject_anomalies: false,
14431            show_progress: false,
14432            ..Default::default()
14433        };
14434
14435        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14436        let result = orchestrator.generate().unwrap();
14437
14438        // Causal phase should have generated samples
14439        assert_eq!(result.statistics.causal_samples_generated, 100);
14440        // Validation should have run
14441        assert!(result.statistics.causal_validation_passed.is_some());
14442    }
14443
14444    #[test]
14445    fn test_causal_overlay_revenue_cycle_template() {
14446        let mut config = create_test_config();
14447        config.causal.enabled = true;
14448        config.causal.template = "revenue_cycle".to_string();
14449        config.causal.sample_size = 50;
14450        config.causal.validate = false;
14451
14452        let phase_config = PhaseConfig {
14453            generate_master_data: false,
14454            generate_document_flows: false,
14455            generate_journal_entries: true,
14456            inject_anomalies: false,
14457            show_progress: false,
14458            ..Default::default()
14459        };
14460
14461        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14462        let result = orchestrator.generate().unwrap();
14463
14464        // Causal phase should have generated samples
14465        assert_eq!(result.statistics.causal_samples_generated, 50);
14466        // Validation was disabled
14467        assert!(result.statistics.causal_validation_passed.is_none());
14468    }
14469
14470    #[test]
14471    fn test_all_new_phases_enabled_together() {
14472        let mut config = create_test_config();
14473        config.llm.enabled = true;
14474        config.llm.max_vendor_enrichments = 2;
14475        config.diffusion.enabled = true;
14476        config.diffusion.n_steps = 20;
14477        config.diffusion.sample_size = 10;
14478        config.causal.enabled = true;
14479        config.causal.sample_size = 50;
14480        config.causal.validate = true;
14481
14482        let phase_config = PhaseConfig {
14483            generate_master_data: true,
14484            generate_document_flows: false,
14485            generate_journal_entries: true,
14486            inject_anomalies: false,
14487            show_progress: false,
14488            vendors_per_company: 5,
14489            customers_per_company: 3,
14490            materials_per_company: 3,
14491            assets_per_company: 3,
14492            employees_per_company: 3,
14493            ..Default::default()
14494        };
14495
14496        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14497        let result = orchestrator.generate().unwrap();
14498
14499        // All three phases should have run
14500        assert!(result.statistics.llm_vendors_enriched > 0);
14501        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14502        assert_eq!(result.statistics.causal_samples_generated, 50);
14503        assert!(result.statistics.causal_validation_passed.is_some());
14504    }
14505
14506    #[test]
14507    fn test_statistics_serialization_with_new_fields() {
14508        let stats = EnhancedGenerationStatistics {
14509            total_entries: 100,
14510            total_line_items: 500,
14511            llm_enrichment_ms: 42,
14512            llm_vendors_enriched: 10,
14513            diffusion_enhancement_ms: 100,
14514            diffusion_samples_generated: 50,
14515            causal_generation_ms: 200,
14516            causal_samples_generated: 100,
14517            causal_validation_passed: Some(true),
14518            ..Default::default()
14519        };
14520
14521        let json = serde_json::to_string(&stats).unwrap();
14522        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14523
14524        assert_eq!(deserialized.llm_enrichment_ms, 42);
14525        assert_eq!(deserialized.llm_vendors_enriched, 10);
14526        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14527        assert_eq!(deserialized.diffusion_samples_generated, 50);
14528        assert_eq!(deserialized.causal_generation_ms, 200);
14529        assert_eq!(deserialized.causal_samples_generated, 100);
14530        assert_eq!(deserialized.causal_validation_passed, Some(true));
14531    }
14532
14533    #[test]
14534    fn test_statistics_backward_compat_deserialization() {
14535        // Old JSON without the new fields should still deserialize
14536        let old_json = r#"{
14537            "total_entries": 100,
14538            "total_line_items": 500,
14539            "accounts_count": 50,
14540            "companies_count": 1,
14541            "period_months": 12,
14542            "vendor_count": 10,
14543            "customer_count": 20,
14544            "material_count": 15,
14545            "asset_count": 5,
14546            "employee_count": 8,
14547            "p2p_chain_count": 5,
14548            "o2c_chain_count": 5,
14549            "ap_invoice_count": 5,
14550            "ar_invoice_count": 5,
14551            "ocpm_event_count": 0,
14552            "ocpm_object_count": 0,
14553            "ocpm_case_count": 0,
14554            "audit_engagement_count": 0,
14555            "audit_workpaper_count": 0,
14556            "audit_evidence_count": 0,
14557            "audit_risk_count": 0,
14558            "audit_finding_count": 0,
14559            "audit_judgment_count": 0,
14560            "anomalies_injected": 0,
14561            "data_quality_issues": 0,
14562            "banking_customer_count": 0,
14563            "banking_account_count": 0,
14564            "banking_transaction_count": 0,
14565            "banking_suspicious_count": 0,
14566            "graph_export_count": 0,
14567            "graph_node_count": 0,
14568            "graph_edge_count": 0
14569        }"#;
14570
14571        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14572
14573        // New fields should default to 0 / None
14574        assert_eq!(stats.llm_enrichment_ms, 0);
14575        assert_eq!(stats.llm_vendors_enriched, 0);
14576        assert_eq!(stats.diffusion_enhancement_ms, 0);
14577        assert_eq!(stats.diffusion_samples_generated, 0);
14578        assert_eq!(stats.causal_generation_ms, 0);
14579        assert_eq!(stats.causal_samples_generated, 0);
14580        assert!(stats.causal_validation_passed.is_none());
14581    }
14582}