Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    MaterialGenerator,
117    O2CDocumentChain,
118    O2CGenerator,
119    O2CGeneratorConfig,
120    O2CPaymentBehavior,
121    P2PDocumentChain,
122    // Document flow generators
123    P2PGenerator,
124    P2PGeneratorConfig,
125    P2PPaymentBehavior,
126    PaymentReference,
127    // Provisions and contingencies generator (IAS 37 / ASC 450)
128    ProvisionGenerator,
129    QualificationGenerator,
130    RfxGenerator,
131    RiskAssessmentGenerator,
132    // Balance validation
133    RunningBalanceTracker,
134    ScorecardGenerator,
135    // Segment reporting generator (IFRS 8 / ASC 280)
136    SegmentGenerator,
137    SegmentSeed,
138    SourcingProjectGenerator,
139    SpendAnalysisGenerator,
140    ValidationError,
141    // Master data generators
142    VendorGenerator,
143    WorkpaperGenerator,
144};
145use datasynth_graph::{
146    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148    TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153    OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177// ============================================================================
178// Configuration Conversion Functions
179// ============================================================================
180
181/// Convert P2P flow config from schema to generator config.
182fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183    let payment_behavior = &schema_config.payment_behavior;
184    let late_dist = &payment_behavior.late_payment_days_distribution;
185
186    P2PGeneratorConfig {
187        three_way_match_rate: schema_config.three_way_match_rate,
188        partial_delivery_rate: schema_config.partial_delivery_rate,
189        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190        price_variance_rate: schema_config.price_variance_rate,
191        max_price_variance_percent: schema_config.max_price_variance_percent,
192        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195        payment_method_distribution: vec![
196            (PaymentMethod::BankTransfer, 0.60),
197            (PaymentMethod::Check, 0.25),
198            (PaymentMethod::Wire, 0.10),
199            (PaymentMethod::CreditCard, 0.05),
200        ],
201        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202        payment_behavior: P2PPaymentBehavior {
203            late_payment_rate: payment_behavior.late_payment_rate,
204            late_payment_distribution: LatePaymentDistribution {
205                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206                late_8_to_14: late_dist.late_8_to_14,
207                very_late_15_to_30: late_dist.very_late_15_to_30,
208                severely_late_31_to_60: late_dist.severely_late_31_to_60,
209                extremely_late_over_60: late_dist.extremely_late_over_60,
210            },
211            partial_payment_rate: payment_behavior.partial_payment_rate,
212            payment_correction_rate: payment_behavior.payment_correction_rate,
213            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214        },
215    }
216}
217
218/// Convert O2C flow config from schema to generator config.
219fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220    let payment_behavior = &schema_config.payment_behavior;
221
222    O2CGeneratorConfig {
223        credit_check_failure_rate: schema_config.credit_check_failure_rate,
224        partial_shipment_rate: schema_config.partial_shipment_rate,
225        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229        bad_debt_rate: schema_config.bad_debt_rate,
230        returns_rate: schema_config.return_rate,
231        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232        payment_method_distribution: vec![
233            (PaymentMethod::BankTransfer, 0.50),
234            (PaymentMethod::Check, 0.30),
235            (PaymentMethod::Wire, 0.15),
236            (PaymentMethod::CreditCard, 0.05),
237        ],
238        payment_behavior: O2CPaymentBehavior {
239            partial_payment_rate: payment_behavior.partial_payments.rate,
240            short_payment_rate: payment_behavior.short_payments.rate,
241            max_short_percent: payment_behavior.short_payments.max_short_percent,
242            on_account_rate: payment_behavior.on_account_payments.rate,
243            payment_correction_rate: payment_behavior.payment_corrections.rate,
244            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245        },
246    }
247}
248
249/// Configuration for which generation phases to run.
250#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252    /// Generate master data (vendors, customers, materials, assets, employees).
253    pub generate_master_data: bool,
254    /// Generate document flows (P2P, O2C).
255    pub generate_document_flows: bool,
256    /// Generate OCPM events from document flows.
257    pub generate_ocpm_events: bool,
258    /// Generate journal entries.
259    pub generate_journal_entries: bool,
260    /// Inject anomalies.
261    pub inject_anomalies: bool,
262    /// Inject data quality variations (typos, missing values, format variations).
263    pub inject_data_quality: bool,
264    /// Validate balance sheet equation after generation.
265    pub validate_balances: bool,
266    /// Show progress bars.
267    pub show_progress: bool,
268    /// Number of vendors to generate per company.
269    pub vendors_per_company: usize,
270    /// Number of customers to generate per company.
271    pub customers_per_company: usize,
272    /// Number of materials to generate per company.
273    pub materials_per_company: usize,
274    /// Number of assets to generate per company.
275    pub assets_per_company: usize,
276    /// Number of employees to generate per company.
277    pub employees_per_company: usize,
278    /// Number of P2P chains to generate.
279    pub p2p_chains: usize,
280    /// Number of O2C chains to generate.
281    pub o2c_chains: usize,
282    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
283    pub generate_audit: bool,
284    /// Number of audit engagements to generate.
285    pub audit_engagements: usize,
286    /// Number of workpapers per engagement.
287    pub workpapers_per_engagement: usize,
288    /// Number of evidence items per workpaper.
289    pub evidence_per_workpaper: usize,
290    /// Number of risk assessments per engagement.
291    pub risks_per_engagement: usize,
292    /// Number of findings per engagement.
293    pub findings_per_engagement: usize,
294    /// Number of professional judgments per engagement.
295    pub judgments_per_engagement: usize,
296    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
297    pub generate_banking: bool,
298    /// Generate graph exports (accounting network for ML training).
299    pub generate_graph_export: bool,
300    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
301    pub generate_sourcing: bool,
302    /// Generate bank reconciliations from payments.
303    pub generate_bank_reconciliation: bool,
304    /// Generate financial statements from trial balances.
305    pub generate_financial_statements: bool,
306    /// Generate accounting standards data (revenue recognition, impairment).
307    pub generate_accounting_standards: bool,
308    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
309    pub generate_manufacturing: bool,
310    /// Generate sales quotes, management KPIs, and budgets.
311    pub generate_sales_kpi_budgets: bool,
312    /// Generate tax jurisdictions and tax codes.
313    pub generate_tax: bool,
314    /// Generate ESG data (emissions, energy, water, waste, social, governance).
315    pub generate_esg: bool,
316    /// Generate intercompany transactions and eliminations.
317    pub generate_intercompany: bool,
318    /// Generate process evolution and organizational events.
319    pub generate_evolution_events: bool,
320    /// Generate counterfactual (original, mutated) JE pairs for ML training.
321    pub generate_counterfactuals: bool,
322    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
323    pub generate_compliance_regulations: bool,
324    /// Generate period-close journal entries (tax provision, income statement close).
325    pub generate_period_close: bool,
326    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
327    pub generate_hr: bool,
328    /// Generate treasury data (cash management, hedging, debt, pooling).
329    pub generate_treasury: bool,
330    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
331    pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335    fn default() -> Self {
336        Self {
337            generate_master_data: true,
338            generate_document_flows: true,
339            generate_ocpm_events: false, // Off by default
340            generate_journal_entries: true,
341            inject_anomalies: false,
342            inject_data_quality: false, // Off by default (to preserve clean test data)
343            validate_balances: true,
344            show_progress: true,
345            vendors_per_company: 50,
346            customers_per_company: 100,
347            materials_per_company: 200,
348            assets_per_company: 50,
349            employees_per_company: 100,
350            p2p_chains: 100,
351            o2c_chains: 100,
352            generate_audit: false, // Off by default
353            audit_engagements: 5,
354            workpapers_per_engagement: 20,
355            evidence_per_workpaper: 5,
356            risks_per_engagement: 15,
357            findings_per_engagement: 8,
358            judgments_per_engagement: 10,
359            generate_banking: false,                // Off by default
360            generate_graph_export: false,           // Off by default
361            generate_sourcing: false,               // Off by default
362            generate_bank_reconciliation: false,    // Off by default
363            generate_financial_statements: false,   // Off by default
364            generate_accounting_standards: false,   // Off by default
365            generate_manufacturing: false,          // Off by default
366            generate_sales_kpi_budgets: false,      // Off by default
367            generate_tax: false,                    // Off by default
368            generate_esg: false,                    // Off by default
369            generate_intercompany: false,           // Off by default
370            generate_evolution_events: true,        // On by default
371            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
372            generate_compliance_regulations: false, // Off by default
373            generate_period_close: true,            // On by default
374            generate_hr: false,                     // Off by default
375            generate_treasury: false,               // Off by default
376            generate_project_accounting: false,     // Off by default
377        }
378    }
379}
380
381impl PhaseConfig {
382    /// Derive phase flags from [`GeneratorConfig`].
383    ///
384    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
385    /// CLI flags can override individual fields after calling this method.
386    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387        Self {
388            // Always-on phases
389            generate_master_data: true,
390            generate_document_flows: true,
391            generate_journal_entries: true,
392            validate_balances: true,
393            generate_period_close: true,
394            generate_evolution_events: true,
395            show_progress: true,
396
397            // Feature-gated phases — derived from config sections
398            generate_audit: cfg.audit.enabled,
399            generate_banking: cfg.banking.enabled,
400            generate_graph_export: cfg.graph_export.enabled,
401            generate_sourcing: cfg.source_to_pay.enabled,
402            generate_intercompany: cfg.intercompany.enabled,
403            generate_financial_statements: cfg.financial_reporting.enabled,
404            generate_bank_reconciliation: cfg.financial_reporting.enabled,
405            generate_accounting_standards: cfg.accounting_standards.enabled,
406            generate_manufacturing: cfg.manufacturing.enabled,
407            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408            generate_tax: cfg.tax.enabled,
409            generate_esg: cfg.esg.enabled,
410            generate_ocpm_events: cfg.ocpm.enabled,
411            generate_compliance_regulations: cfg.compliance_regulations.enabled,
412            generate_hr: cfg.hr.enabled,
413            generate_treasury: cfg.treasury.enabled,
414            generate_project_accounting: cfg.project_accounting.enabled,
415
416            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
417            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
418
419            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420            inject_data_quality: cfg.data_quality.enabled,
421
422            // Count defaults (CLI can override after calling this method)
423            vendors_per_company: 50,
424            customers_per_company: 100,
425            materials_per_company: 200,
426            assets_per_company: 50,
427            employees_per_company: 100,
428            p2p_chains: 100,
429            o2c_chains: 100,
430            audit_engagements: 5,
431            workpapers_per_engagement: 20,
432            evidence_per_workpaper: 5,
433            risks_per_engagement: 15,
434            findings_per_engagement: 8,
435            judgments_per_engagement: 10,
436        }
437    }
438}
439
440/// Master data snapshot containing all generated entities.
441#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443    /// Generated vendors.
444    pub vendors: Vec<Vendor>,
445    /// Generated customers.
446    pub customers: Vec<Customer>,
447    /// Generated materials.
448    pub materials: Vec<Material>,
449    /// Generated fixed assets.
450    pub assets: Vec<FixedAsset>,
451    /// Generated employees.
452    pub employees: Vec<Employee>,
453    /// Generated cost center hierarchy (two-level: departments + sub-departments).
454    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
455    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
456    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
457}
458
459/// Info about a completed hypergraph export.
460#[derive(Debug, Clone)]
461pub struct HypergraphExportInfo {
462    /// Number of nodes exported.
463    pub node_count: usize,
464    /// Number of pairwise edges exported.
465    pub edge_count: usize,
466    /// Number of hyperedges exported.
467    pub hyperedge_count: usize,
468    /// Output directory path.
469    pub output_path: PathBuf,
470}
471
472/// Document flow snapshot containing all generated document chains.
473#[derive(Debug, Clone, Default)]
474pub struct DocumentFlowSnapshot {
475    /// P2P document chains.
476    pub p2p_chains: Vec<P2PDocumentChain>,
477    /// O2C document chains.
478    pub o2c_chains: Vec<O2CDocumentChain>,
479    /// All purchase orders (flattened).
480    pub purchase_orders: Vec<documents::PurchaseOrder>,
481    /// All goods receipts (flattened).
482    pub goods_receipts: Vec<documents::GoodsReceipt>,
483    /// All vendor invoices (flattened).
484    pub vendor_invoices: Vec<documents::VendorInvoice>,
485    /// All sales orders (flattened).
486    pub sales_orders: Vec<documents::SalesOrder>,
487    /// All deliveries (flattened).
488    pub deliveries: Vec<documents::Delivery>,
489    /// All customer invoices (flattened).
490    pub customer_invoices: Vec<documents::CustomerInvoice>,
491    /// All payments (flattened).
492    pub payments: Vec<documents::Payment>,
493    /// Cross-document references collected from all document headers
494    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
495    pub document_references: Vec<documents::DocumentReference>,
496}
497
498/// Subledger snapshot containing generated subledger records.
499#[derive(Debug, Clone, Default)]
500pub struct SubledgerSnapshot {
501    /// AP invoices linked from document flow vendor invoices.
502    pub ap_invoices: Vec<APInvoice>,
503    /// AR invoices linked from document flow customer invoices.
504    pub ar_invoices: Vec<ARInvoice>,
505    /// FA subledger records (asset acquisitions from FA generator).
506    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
507    /// Inventory positions from inventory generator.
508    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
509    /// Inventory movements from inventory generator.
510    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
511    /// AR aging reports, one per company, computed after payment settlement.
512    pub ar_aging_reports: Vec<ARAgingReport>,
513    /// AP aging reports, one per company, computed after payment settlement.
514    pub ap_aging_reports: Vec<APAgingReport>,
515    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
516    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
517    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
518    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
519    /// Dunning runs executed after AR aging (one per company per dunning cycle).
520    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
521    /// Dunning letters generated across all dunning runs.
522    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
523}
524
525/// OCPM snapshot containing generated OCPM event log data.
526#[derive(Debug, Clone, Default)]
527pub struct OcpmSnapshot {
528    /// OCPM event log (if generated)
529    pub event_log: Option<OcpmEventLog>,
530    /// Number of events generated
531    pub event_count: usize,
532    /// Number of objects generated
533    pub object_count: usize,
534    /// Number of cases generated
535    pub case_count: usize,
536}
537
538/// Audit data snapshot containing all generated audit-related entities.
539#[derive(Debug, Clone, Default)]
540pub struct AuditSnapshot {
541    /// Audit engagements per ISA 210/220.
542    pub engagements: Vec<AuditEngagement>,
543    /// Workpapers per ISA 230.
544    pub workpapers: Vec<Workpaper>,
545    /// Audit evidence per ISA 500.
546    pub evidence: Vec<AuditEvidence>,
547    /// Risk assessments per ISA 315/330.
548    pub risk_assessments: Vec<RiskAssessment>,
549    /// Audit findings per ISA 265.
550    pub findings: Vec<AuditFinding>,
551    /// Professional judgments per ISA 200.
552    pub judgments: Vec<ProfessionalJudgment>,
553    /// External confirmations per ISA 505.
554    pub confirmations: Vec<ExternalConfirmation>,
555    /// Confirmation responses per ISA 505.
556    pub confirmation_responses: Vec<ConfirmationResponse>,
557    /// Audit procedure steps per ISA 330/530.
558    pub procedure_steps: Vec<AuditProcedureStep>,
559    /// Audit samples per ISA 530.
560    pub samples: Vec<AuditSample>,
561    /// Analytical procedure results per ISA 520.
562    pub analytical_results: Vec<AnalyticalProcedureResult>,
563    /// Internal audit functions per ISA 610.
564    pub ia_functions: Vec<InternalAuditFunction>,
565    /// Internal audit reports per ISA 610.
566    pub ia_reports: Vec<InternalAuditReport>,
567    /// Related parties per ISA 550.
568    pub related_parties: Vec<RelatedParty>,
569    /// Related party transactions per ISA 550.
570    pub related_party_transactions: Vec<RelatedPartyTransaction>,
571    // ---- ISA 600: Group Audits ----
572    /// Component auditors assigned by jurisdiction (ISA 600).
573    pub component_auditors: Vec<ComponentAuditor>,
574    /// Group audit plan with materiality allocations (ISA 600).
575    pub group_audit_plan: Option<GroupAuditPlan>,
576    /// Component instructions issued to component auditors (ISA 600).
577    pub component_instructions: Vec<ComponentInstruction>,
578    /// Reports received from component auditors (ISA 600).
579    pub component_reports: Vec<ComponentAuditorReport>,
580    // ---- ISA 210: Engagement Letters ----
581    /// Engagement letters per ISA 210.
582    pub engagement_letters: Vec<EngagementLetter>,
583    // ---- ISA 560 / IAS 10: Subsequent Events ----
584    /// Subsequent events per ISA 560 / IAS 10.
585    pub subsequent_events: Vec<SubsequentEvent>,
586    // ---- ISA 402: Service Organization Controls ----
587    /// Service organizations identified per ISA 402.
588    pub service_organizations: Vec<ServiceOrganization>,
589    /// SOC reports obtained per ISA 402.
590    pub soc_reports: Vec<SocReport>,
591    /// User entity controls documented per ISA 402.
592    pub user_entity_controls: Vec<UserEntityControl>,
593    // ---- ISA 570: Going Concern ----
594    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
595    pub going_concern_assessments:
596        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
597    // ---- ISA 540: Accounting Estimates ----
598    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
599    pub accounting_estimates:
600        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
601    // ---- ISA 700/701/705/706: Audit Opinions ----
602    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
603    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
604    /// Key Audit Matters per ISA 701 (flattened across all opinions).
605    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
606    // ---- SOX 302 / 404 ----
607    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
608    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
609    /// SOX Section 404 ICFR assessments (one per entity per year).
610    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
611    // ---- ISA 320: Materiality ----
612    /// Materiality calculations per entity per period (ISA 320).
613    pub materiality_calculations:
614        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
615    // ---- ISA 315: Combined Risk Assessments ----
616    /// Combined Risk Assessments per account area / assertion (ISA 315).
617    pub combined_risk_assessments:
618        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
619    // ---- ISA 530: Sampling Plans ----
620    /// Sampling plans per CRA at Moderate or higher (ISA 530).
621    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
622    /// Individual sampled items (key items + representative items) per ISA 530.
623    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
624    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
625    /// Significant classes of transactions per ISA 315 (one set per entity).
626    pub significant_transaction_classes:
627        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
628    // ---- ISA 520: Unusual Item Markers ----
629    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
630    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
631    // ---- ISA 520: Analytical Relationships ----
632    /// Analytical relationships (ratios, trends, correlations) per entity.
633    pub analytical_relationships:
634        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
635    // ---- PCAOB-ISA Cross-Reference ----
636    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
637    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
638    // ---- ISA Standard Reference ----
639    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
640    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
641    // ---- ISA 220 / ISA 300: Audit Scopes ----
642    /// Audit scope records (one per engagement) describing the audit boundary.
643    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
644    // ---- FSM Event Trail ----
645    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
646    /// Contains the ordered sequence of state-transition and procedure-step events
647    /// generated by the audit FSM engine.
648    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
649}
650
651/// Banking KYC/AML data snapshot containing all generated banking entities.
652#[derive(Debug, Clone, Default)]
653pub struct BankingSnapshot {
654    /// Banking customers (retail, business, trust).
655    pub customers: Vec<BankingCustomer>,
656    /// Bank accounts.
657    pub accounts: Vec<BankAccount>,
658    /// Bank transactions with AML labels.
659    pub transactions: Vec<BankTransaction>,
660    /// Transaction-level AML labels with features.
661    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
662    /// Customer-level AML labels.
663    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
664    /// Account-level AML labels.
665    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
666    /// Relationship-level AML labels.
667    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
668    /// Case narratives for AML scenarios.
669    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
670    /// Number of suspicious transactions.
671    pub suspicious_count: usize,
672    /// Number of AML scenarios generated.
673    pub scenario_count: usize,
674}
675
676/// Graph export snapshot containing exported graph metadata.
677#[derive(Debug, Clone, Default, Serialize)]
678pub struct GraphExportSnapshot {
679    /// Whether graph export was performed.
680    pub exported: bool,
681    /// Number of graphs exported.
682    pub graph_count: usize,
683    /// Exported graph metadata (by format name).
684    pub exports: HashMap<String, GraphExportInfo>,
685}
686
687/// Information about an exported graph.
688#[derive(Debug, Clone, Serialize)]
689pub struct GraphExportInfo {
690    /// Graph name.
691    pub name: String,
692    /// Export format (pytorch_geometric, neo4j, dgl).
693    pub format: String,
694    /// Output directory path.
695    pub output_path: PathBuf,
696    /// Number of nodes.
697    pub node_count: usize,
698    /// Number of edges.
699    pub edge_count: usize,
700}
701
702/// S2C sourcing data snapshot.
703#[derive(Debug, Clone, Default)]
704pub struct SourcingSnapshot {
705    /// Spend analyses.
706    pub spend_analyses: Vec<SpendAnalysis>,
707    /// Sourcing projects.
708    pub sourcing_projects: Vec<SourcingProject>,
709    /// Supplier qualifications.
710    pub qualifications: Vec<SupplierQualification>,
711    /// RFx events (RFI, RFP, RFQ).
712    pub rfx_events: Vec<RfxEvent>,
713    /// Supplier bids.
714    pub bids: Vec<SupplierBid>,
715    /// Bid evaluations.
716    pub bid_evaluations: Vec<BidEvaluation>,
717    /// Procurement contracts.
718    pub contracts: Vec<ProcurementContract>,
719    /// Catalog items.
720    pub catalog_items: Vec<CatalogItem>,
721    /// Supplier scorecards.
722    pub scorecards: Vec<SupplierScorecard>,
723}
724
725/// A single period's trial balance with metadata.
726#[derive(Debug, Clone, Serialize, Deserialize)]
727pub struct PeriodTrialBalance {
728    /// Fiscal year.
729    pub fiscal_year: u16,
730    /// Fiscal period (1-12).
731    pub fiscal_period: u8,
732    /// Period start date.
733    pub period_start: NaiveDate,
734    /// Period end date.
735    pub period_end: NaiveDate,
736    /// Trial balance entries for this period.
737    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
738}
739
740/// Financial reporting snapshot (financial statements + bank reconciliations).
741#[derive(Debug, Clone, Default)]
742pub struct FinancialReportingSnapshot {
743    /// Financial statements (balance sheet, income statement, cash flow).
744    /// For multi-entity configs this includes all standalone statements.
745    pub financial_statements: Vec<FinancialStatement>,
746    /// Standalone financial statements keyed by entity code.
747    /// Each entity has its own slice of statements.
748    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
749    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
750    pub consolidated_statements: Vec<FinancialStatement>,
751    /// Consolidation schedules (one per period) showing pre/post elimination detail.
752    pub consolidation_schedules: Vec<ConsolidationSchedule>,
753    /// Bank reconciliations.
754    pub bank_reconciliations: Vec<BankReconciliation>,
755    /// Period-close trial balances (one per period).
756    pub trial_balances: Vec<PeriodTrialBalance>,
757    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
758    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
759    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
760    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
761    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
762    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
763}
764
765/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
766#[derive(Debug, Clone, Default)]
767pub struct HrSnapshot {
768    /// Payroll runs (actual data).
769    pub payroll_runs: Vec<PayrollRun>,
770    /// Payroll line items (actual data).
771    pub payroll_line_items: Vec<PayrollLineItem>,
772    /// Time entries (actual data).
773    pub time_entries: Vec<TimeEntry>,
774    /// Expense reports (actual data).
775    pub expense_reports: Vec<ExpenseReport>,
776    /// Benefit enrollments (actual data).
777    pub benefit_enrollments: Vec<BenefitEnrollment>,
778    /// Defined benefit pension plans (IAS 19 / ASC 715).
779    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
780    /// Pension obligation (DBO) roll-forwards.
781    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
782    /// Plan asset roll-forwards.
783    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
784    /// Pension disclosures.
785    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
786    /// Journal entries generated from pension expense and OCI remeasurements.
787    pub pension_journal_entries: Vec<JournalEntry>,
788    /// Stock grants (ASC 718 / IFRS 2).
789    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
790    /// Stock-based compensation period expense records.
791    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
792    /// Journal entries generated from stock-based compensation expense.
793    pub stock_comp_journal_entries: Vec<JournalEntry>,
794    /// Payroll runs.
795    pub payroll_run_count: usize,
796    /// Payroll line item count.
797    pub payroll_line_item_count: usize,
798    /// Time entry count.
799    pub time_entry_count: usize,
800    /// Expense report count.
801    pub expense_report_count: usize,
802    /// Benefit enrollment count.
803    pub benefit_enrollment_count: usize,
804    /// Pension plan count.
805    pub pension_plan_count: usize,
806    /// Stock grant count.
807    pub stock_grant_count: usize,
808}
809
810/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
811#[derive(Debug, Clone, Default)]
812pub struct AccountingStandardsSnapshot {
813    /// Revenue recognition contracts (actual data).
814    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
815    /// Impairment tests (actual data).
816    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
817    /// Business combinations (IFRS 3 / ASC 805).
818    pub business_combinations:
819        Vec<datasynth_core::models::business_combination::BusinessCombination>,
820    /// Journal entries generated from business combinations (Day 1 + amortization).
821    pub business_combination_journal_entries: Vec<JournalEntry>,
822    /// ECL models (IFRS 9 / ASC 326).
823    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
824    /// ECL provision movements.
825    pub ecl_provision_movements:
826        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
827    /// Journal entries from ECL provision.
828    pub ecl_journal_entries: Vec<JournalEntry>,
829    /// Provisions (IAS 37 / ASC 450).
830    pub provisions: Vec<datasynth_core::models::provision::Provision>,
831    /// Provision movement roll-forwards (IAS 37 / ASC 450).
832    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
833    /// Contingent liabilities (IAS 37 / ASC 450).
834    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
835    /// Journal entries from provisions.
836    pub provision_journal_entries: Vec<JournalEntry>,
837    /// IAS 21 functional currency translation results (one per entity per period).
838    pub currency_translation_results:
839        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
840    /// Revenue recognition contract count.
841    pub revenue_contract_count: usize,
842    /// Impairment test count.
843    pub impairment_test_count: usize,
844    /// Business combination count.
845    pub business_combination_count: usize,
846    /// ECL model count.
847    pub ecl_model_count: usize,
848    /// Provision count.
849    pub provision_count: usize,
850    /// Currency translation result count (IAS 21).
851    pub currency_translation_count: usize,
852}
853
854/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
855#[derive(Debug, Clone, Default)]
856pub struct ComplianceRegulationsSnapshot {
857    /// Flattened standard records for output.
858    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
859    /// Cross-reference records.
860    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
861    /// Jurisdiction profile records.
862    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
863    /// Generated audit procedures.
864    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
865    /// Generated compliance findings.
866    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
867    /// Generated regulatory filings.
868    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
869    /// Compliance graph (if graph integration enabled).
870    pub compliance_graph: Option<datasynth_graph::Graph>,
871}
872
873/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
874#[derive(Debug, Clone, Default)]
875pub struct ManufacturingSnapshot {
876    /// Production orders (actual data).
877    pub production_orders: Vec<ProductionOrder>,
878    /// Quality inspections (actual data).
879    pub quality_inspections: Vec<QualityInspection>,
880    /// Cycle counts (actual data).
881    pub cycle_counts: Vec<CycleCount>,
882    /// BOM components (actual data).
883    pub bom_components: Vec<BomComponent>,
884    /// Inventory movements (actual data).
885    pub inventory_movements: Vec<InventoryMovement>,
886    /// Production order count.
887    pub production_order_count: usize,
888    /// Quality inspection count.
889    pub quality_inspection_count: usize,
890    /// Cycle count count.
891    pub cycle_count_count: usize,
892    /// BOM component count.
893    pub bom_component_count: usize,
894    /// Inventory movement count.
895    pub inventory_movement_count: usize,
896}
897
898/// Sales, KPI, and budget data snapshot.
899#[derive(Debug, Clone, Default)]
900pub struct SalesKpiBudgetsSnapshot {
901    /// Sales quotes (actual data).
902    pub sales_quotes: Vec<SalesQuote>,
903    /// Management KPIs (actual data).
904    pub kpis: Vec<ManagementKpi>,
905    /// Budgets (actual data).
906    pub budgets: Vec<Budget>,
907    /// Sales quote count.
908    pub sales_quote_count: usize,
909    /// Management KPI count.
910    pub kpi_count: usize,
911    /// Budget line count.
912    pub budget_line_count: usize,
913}
914
915/// Anomaly labels generated during injection.
916#[derive(Debug, Clone, Default)]
917pub struct AnomalyLabels {
918    /// All anomaly labels.
919    pub labels: Vec<LabeledAnomaly>,
920    /// Summary statistics.
921    pub summary: Option<AnomalySummary>,
922    /// Count by anomaly type.
923    pub by_type: HashMap<String, usize>,
924}
925
926/// Balance validation results from running balance tracker.
927#[derive(Debug, Clone, Default)]
928pub struct BalanceValidationResult {
929    /// Whether validation was performed.
930    pub validated: bool,
931    /// Whether balance sheet equation is satisfied.
932    pub is_balanced: bool,
933    /// Number of entries processed.
934    pub entries_processed: u64,
935    /// Total debits across all entries.
936    pub total_debits: rust_decimal::Decimal,
937    /// Total credits across all entries.
938    pub total_credits: rust_decimal::Decimal,
939    /// Number of accounts tracked.
940    pub accounts_tracked: usize,
941    /// Number of companies tracked.
942    pub companies_tracked: usize,
943    /// Validation errors encountered.
944    pub validation_errors: Vec<ValidationError>,
945    /// Whether any unbalanced entries were found.
946    pub has_unbalanced_entries: bool,
947}
948
949/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
950#[derive(Debug, Clone, Default)]
951pub struct TaxSnapshot {
952    /// Tax jurisdictions.
953    pub jurisdictions: Vec<TaxJurisdiction>,
954    /// Tax codes.
955    pub codes: Vec<TaxCode>,
956    /// Tax lines computed on documents.
957    pub tax_lines: Vec<TaxLine>,
958    /// Tax returns filed per period.
959    pub tax_returns: Vec<TaxReturn>,
960    /// Tax provisions.
961    pub tax_provisions: Vec<TaxProvision>,
962    /// Withholding tax records.
963    pub withholding_records: Vec<WithholdingTaxRecord>,
964    /// Tax anomaly labels.
965    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
966    /// Jurisdiction count.
967    pub jurisdiction_count: usize,
968    /// Code count.
969    pub code_count: usize,
970    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
971    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
972}
973
974/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
975#[derive(Debug, Clone, Default, Serialize, Deserialize)]
976pub struct IntercompanySnapshot {
977    /// Group ownership structure (parent/subsidiary/associate relationships).
978    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
979    /// IC matched pairs (transaction pairs between related entities).
980    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
981    /// IC journal entries generated from matched pairs (seller side).
982    pub seller_journal_entries: Vec<JournalEntry>,
983    /// IC journal entries generated from matched pairs (buyer side).
984    pub buyer_journal_entries: Vec<JournalEntry>,
985    /// Elimination entries for consolidation.
986    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
987    /// NCI measurements derived from group structure ownership percentages.
988    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
989    /// IC matched pair count.
990    pub matched_pair_count: usize,
991    /// IC elimination entry count.
992    pub elimination_entry_count: usize,
993    /// IC matching rate (0.0 to 1.0).
994    pub match_rate: f64,
995}
996
997/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
998#[derive(Debug, Clone, Default)]
999pub struct EsgSnapshot {
1000    /// Emission records (scope 1, 2, 3).
1001    pub emissions: Vec<EmissionRecord>,
1002    /// Energy consumption records.
1003    pub energy: Vec<EnergyConsumption>,
1004    /// Water usage records.
1005    pub water: Vec<WaterUsage>,
1006    /// Waste records.
1007    pub waste: Vec<WasteRecord>,
1008    /// Workforce diversity metrics.
1009    pub diversity: Vec<WorkforceDiversityMetric>,
1010    /// Pay equity metrics.
1011    pub pay_equity: Vec<PayEquityMetric>,
1012    /// Safety incidents.
1013    pub safety_incidents: Vec<SafetyIncident>,
1014    /// Safety metrics.
1015    pub safety_metrics: Vec<SafetyMetric>,
1016    /// Governance metrics.
1017    pub governance: Vec<GovernanceMetric>,
1018    /// Supplier ESG assessments.
1019    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1020    /// Materiality assessments.
1021    pub materiality: Vec<MaterialityAssessment>,
1022    /// ESG disclosures.
1023    pub disclosures: Vec<EsgDisclosure>,
1024    /// Climate scenarios.
1025    pub climate_scenarios: Vec<ClimateScenario>,
1026    /// ESG anomaly labels.
1027    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1028    /// Total emission record count.
1029    pub emission_count: usize,
1030    /// Total disclosure count.
1031    pub disclosure_count: usize,
1032}
1033
1034/// Treasury data snapshot (cash management, hedging, debt, pooling).
1035#[derive(Debug, Clone, Default)]
1036pub struct TreasurySnapshot {
1037    /// Cash positions (daily balances per account).
1038    pub cash_positions: Vec<CashPosition>,
1039    /// Cash forecasts.
1040    pub cash_forecasts: Vec<CashForecast>,
1041    /// Cash pools.
1042    pub cash_pools: Vec<CashPool>,
1043    /// Cash pool sweep transactions.
1044    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1045    /// Hedging instruments.
1046    pub hedging_instruments: Vec<HedgingInstrument>,
1047    /// Hedge relationships (ASC 815/IFRS 9 designations).
1048    pub hedge_relationships: Vec<HedgeRelationship>,
1049    /// Debt instruments.
1050    pub debt_instruments: Vec<DebtInstrument>,
1051    /// Bank guarantees and letters of credit.
1052    pub bank_guarantees: Vec<BankGuarantee>,
1053    /// Intercompany netting runs.
1054    pub netting_runs: Vec<NettingRun>,
1055    /// Treasury anomaly labels.
1056    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1057}
1058
1059/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1060#[derive(Debug, Clone, Default)]
1061pub struct ProjectAccountingSnapshot {
1062    /// Projects with WBS hierarchies.
1063    pub projects: Vec<Project>,
1064    /// Project cost lines (linked from source documents).
1065    pub cost_lines: Vec<ProjectCostLine>,
1066    /// Revenue recognition records.
1067    pub revenue_records: Vec<ProjectRevenue>,
1068    /// Earned value metrics.
1069    pub earned_value_metrics: Vec<EarnedValueMetric>,
1070    /// Change orders.
1071    pub change_orders: Vec<ChangeOrder>,
1072    /// Project milestones.
1073    pub milestones: Vec<ProjectMilestone>,
1074}
1075
1076/// Complete result of enhanced generation run.
1077#[derive(Debug, Default)]
1078pub struct EnhancedGenerationResult {
1079    /// Generated chart of accounts.
1080    pub chart_of_accounts: ChartOfAccounts,
1081    /// Master data snapshot.
1082    pub master_data: MasterDataSnapshot,
1083    /// Document flow snapshot.
1084    pub document_flows: DocumentFlowSnapshot,
1085    /// Subledger snapshot (linked from document flows).
1086    pub subledger: SubledgerSnapshot,
1087    /// OCPM event log snapshot (if OCPM generation enabled).
1088    pub ocpm: OcpmSnapshot,
1089    /// Audit data snapshot (if audit generation enabled).
1090    pub audit: AuditSnapshot,
1091    /// Banking KYC/AML data snapshot (if banking generation enabled).
1092    pub banking: BankingSnapshot,
1093    /// Graph export snapshot (if graph export enabled).
1094    pub graph_export: GraphExportSnapshot,
1095    /// S2C sourcing data snapshot (if sourcing generation enabled).
1096    pub sourcing: SourcingSnapshot,
1097    /// Financial reporting snapshot (financial statements + bank reconciliations).
1098    pub financial_reporting: FinancialReportingSnapshot,
1099    /// HR data snapshot (payroll, time entries, expenses).
1100    pub hr: HrSnapshot,
1101    /// Accounting standards snapshot (revenue recognition, impairment).
1102    pub accounting_standards: AccountingStandardsSnapshot,
1103    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1104    pub manufacturing: ManufacturingSnapshot,
1105    /// Sales, KPI, and budget snapshot.
1106    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1107    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1108    pub tax: TaxSnapshot,
1109    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1110    pub esg: EsgSnapshot,
1111    /// Treasury data snapshot (cash management, hedging, debt).
1112    pub treasury: TreasurySnapshot,
1113    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1114    pub project_accounting: ProjectAccountingSnapshot,
1115    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1116    pub process_evolution: Vec<ProcessEvolutionEvent>,
1117    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1118    pub organizational_events: Vec<OrganizationalEvent>,
1119    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1120    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1121    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1122    pub intercompany: IntercompanySnapshot,
1123    /// Generated journal entries.
1124    pub journal_entries: Vec<JournalEntry>,
1125    /// Anomaly labels (if injection enabled).
1126    pub anomaly_labels: AnomalyLabels,
1127    /// Balance validation results (if validation enabled).
1128    pub balance_validation: BalanceValidationResult,
1129    /// Data quality statistics (if injection enabled).
1130    pub data_quality_stats: DataQualityStats,
1131    /// Data quality issue records (if injection enabled).
1132    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1133    /// Generation statistics.
1134    pub statistics: EnhancedGenerationStatistics,
1135    /// Data lineage graph (if tracking enabled).
1136    pub lineage: Option<super::lineage::LineageGraph>,
1137    /// Quality gate evaluation result.
1138    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1139    /// Internal controls (if controls generation enabled).
1140    pub internal_controls: Vec<InternalControl>,
1141    /// SoD (Segregation of Duties) violations identified during control application.
1142    ///
1143    /// Each record corresponds to a journal entry where `sod_violation == true`.
1144    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1145    /// Opening balances (if opening balance generation enabled).
1146    pub opening_balances: Vec<GeneratedOpeningBalance>,
1147    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1148    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1149    /// Counterfactual (original, mutated) JE pairs for ML training.
1150    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1151    /// Fraud red-flag indicators on P2P/O2C documents.
1152    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1153    /// Collusion rings (coordinated fraud networks).
1154    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1155    /// Bi-temporal version chains for vendor entities.
1156    pub temporal_vendor_chains:
1157        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1158    /// Entity relationship graph (nodes + edges with strength scores).
1159    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1160    /// Cross-process links (P2P ↔ O2C via inventory movements).
1161    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1162    /// Industry-specific GL accounts and metadata.
1163    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1164    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1165    pub compliance_regulations: ComplianceRegulationsSnapshot,
1166}
1167
1168/// Enhanced statistics about a generation run.
1169#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1170pub struct EnhancedGenerationStatistics {
1171    /// Total journal entries generated.
1172    pub total_entries: u64,
1173    /// Total line items generated.
1174    pub total_line_items: u64,
1175    /// Number of accounts in CoA.
1176    pub accounts_count: usize,
1177    /// Number of companies.
1178    pub companies_count: usize,
1179    /// Period in months.
1180    pub period_months: u32,
1181    /// Master data counts.
1182    pub vendor_count: usize,
1183    pub customer_count: usize,
1184    pub material_count: usize,
1185    pub asset_count: usize,
1186    pub employee_count: usize,
1187    /// Document flow counts.
1188    pub p2p_chain_count: usize,
1189    pub o2c_chain_count: usize,
1190    /// Subledger counts.
1191    pub ap_invoice_count: usize,
1192    pub ar_invoice_count: usize,
1193    /// OCPM counts.
1194    pub ocpm_event_count: usize,
1195    pub ocpm_object_count: usize,
1196    pub ocpm_case_count: usize,
1197    /// Audit counts.
1198    pub audit_engagement_count: usize,
1199    pub audit_workpaper_count: usize,
1200    pub audit_evidence_count: usize,
1201    pub audit_risk_count: usize,
1202    pub audit_finding_count: usize,
1203    pub audit_judgment_count: usize,
1204    /// ISA 505 confirmation counts.
1205    #[serde(default)]
1206    pub audit_confirmation_count: usize,
1207    #[serde(default)]
1208    pub audit_confirmation_response_count: usize,
1209    /// ISA 330/530 procedure step and sample counts.
1210    #[serde(default)]
1211    pub audit_procedure_step_count: usize,
1212    #[serde(default)]
1213    pub audit_sample_count: usize,
1214    /// ISA 520 analytical procedure counts.
1215    #[serde(default)]
1216    pub audit_analytical_result_count: usize,
1217    /// ISA 610 internal audit counts.
1218    #[serde(default)]
1219    pub audit_ia_function_count: usize,
1220    #[serde(default)]
1221    pub audit_ia_report_count: usize,
1222    /// ISA 550 related party counts.
1223    #[serde(default)]
1224    pub audit_related_party_count: usize,
1225    #[serde(default)]
1226    pub audit_related_party_transaction_count: usize,
1227    /// Anomaly counts.
1228    pub anomalies_injected: usize,
1229    /// Data quality issue counts.
1230    pub data_quality_issues: usize,
1231    /// Banking counts.
1232    pub banking_customer_count: usize,
1233    pub banking_account_count: usize,
1234    pub banking_transaction_count: usize,
1235    pub banking_suspicious_count: usize,
1236    /// Graph export counts.
1237    pub graph_export_count: usize,
1238    pub graph_node_count: usize,
1239    pub graph_edge_count: usize,
1240    /// LLM enrichment timing (milliseconds).
1241    #[serde(default)]
1242    pub llm_enrichment_ms: u64,
1243    /// Number of vendor names enriched by LLM.
1244    #[serde(default)]
1245    pub llm_vendors_enriched: usize,
1246    /// Diffusion enhancement timing (milliseconds).
1247    #[serde(default)]
1248    pub diffusion_enhancement_ms: u64,
1249    /// Number of diffusion samples generated.
1250    #[serde(default)]
1251    pub diffusion_samples_generated: usize,
1252    /// Causal generation timing (milliseconds).
1253    #[serde(default)]
1254    pub causal_generation_ms: u64,
1255    /// Number of causal samples generated.
1256    #[serde(default)]
1257    pub causal_samples_generated: usize,
1258    /// Whether causal validation passed.
1259    #[serde(default)]
1260    pub causal_validation_passed: Option<bool>,
1261    /// S2C sourcing counts.
1262    #[serde(default)]
1263    pub sourcing_project_count: usize,
1264    #[serde(default)]
1265    pub rfx_event_count: usize,
1266    #[serde(default)]
1267    pub bid_count: usize,
1268    #[serde(default)]
1269    pub contract_count: usize,
1270    #[serde(default)]
1271    pub catalog_item_count: usize,
1272    #[serde(default)]
1273    pub scorecard_count: usize,
1274    /// Financial reporting counts.
1275    #[serde(default)]
1276    pub financial_statement_count: usize,
1277    #[serde(default)]
1278    pub bank_reconciliation_count: usize,
1279    /// HR counts.
1280    #[serde(default)]
1281    pub payroll_run_count: usize,
1282    #[serde(default)]
1283    pub time_entry_count: usize,
1284    #[serde(default)]
1285    pub expense_report_count: usize,
1286    #[serde(default)]
1287    pub benefit_enrollment_count: usize,
1288    #[serde(default)]
1289    pub pension_plan_count: usize,
1290    #[serde(default)]
1291    pub stock_grant_count: usize,
1292    /// Accounting standards counts.
1293    #[serde(default)]
1294    pub revenue_contract_count: usize,
1295    #[serde(default)]
1296    pub impairment_test_count: usize,
1297    #[serde(default)]
1298    pub business_combination_count: usize,
1299    #[serde(default)]
1300    pub ecl_model_count: usize,
1301    #[serde(default)]
1302    pub provision_count: usize,
1303    /// Manufacturing counts.
1304    #[serde(default)]
1305    pub production_order_count: usize,
1306    #[serde(default)]
1307    pub quality_inspection_count: usize,
1308    #[serde(default)]
1309    pub cycle_count_count: usize,
1310    #[serde(default)]
1311    pub bom_component_count: usize,
1312    #[serde(default)]
1313    pub inventory_movement_count: usize,
1314    /// Sales & reporting counts.
1315    #[serde(default)]
1316    pub sales_quote_count: usize,
1317    #[serde(default)]
1318    pub kpi_count: usize,
1319    #[serde(default)]
1320    pub budget_line_count: usize,
1321    /// Tax counts.
1322    #[serde(default)]
1323    pub tax_jurisdiction_count: usize,
1324    #[serde(default)]
1325    pub tax_code_count: usize,
1326    /// ESG counts.
1327    #[serde(default)]
1328    pub esg_emission_count: usize,
1329    #[serde(default)]
1330    pub esg_disclosure_count: usize,
1331    /// Intercompany counts.
1332    #[serde(default)]
1333    pub ic_matched_pair_count: usize,
1334    #[serde(default)]
1335    pub ic_elimination_count: usize,
1336    /// Number of intercompany journal entries (seller + buyer side).
1337    #[serde(default)]
1338    pub ic_transaction_count: usize,
1339    /// Number of fixed asset subledger records.
1340    #[serde(default)]
1341    pub fa_subledger_count: usize,
1342    /// Number of inventory subledger records.
1343    #[serde(default)]
1344    pub inventory_subledger_count: usize,
1345    /// Treasury debt instrument count.
1346    #[serde(default)]
1347    pub treasury_debt_instrument_count: usize,
1348    /// Treasury hedging instrument count.
1349    #[serde(default)]
1350    pub treasury_hedging_instrument_count: usize,
1351    /// Project accounting project count.
1352    #[serde(default)]
1353    pub project_count: usize,
1354    /// Project accounting change order count.
1355    #[serde(default)]
1356    pub project_change_order_count: usize,
1357    /// Tax provision count.
1358    #[serde(default)]
1359    pub tax_provision_count: usize,
1360    /// Opening balance count.
1361    #[serde(default)]
1362    pub opening_balance_count: usize,
1363    /// Subledger reconciliation count.
1364    #[serde(default)]
1365    pub subledger_reconciliation_count: usize,
1366    /// Tax line count.
1367    #[serde(default)]
1368    pub tax_line_count: usize,
1369    /// Project cost line count.
1370    #[serde(default)]
1371    pub project_cost_line_count: usize,
1372    /// Cash position count.
1373    #[serde(default)]
1374    pub cash_position_count: usize,
1375    /// Cash forecast count.
1376    #[serde(default)]
1377    pub cash_forecast_count: usize,
1378    /// Cash pool count.
1379    #[serde(default)]
1380    pub cash_pool_count: usize,
1381    /// Process evolution event count.
1382    #[serde(default)]
1383    pub process_evolution_event_count: usize,
1384    /// Organizational event count.
1385    #[serde(default)]
1386    pub organizational_event_count: usize,
1387    /// Counterfactual pair count.
1388    #[serde(default)]
1389    pub counterfactual_pair_count: usize,
1390    /// Number of fraud red-flag indicators generated.
1391    #[serde(default)]
1392    pub red_flag_count: usize,
1393    /// Number of collusion rings generated.
1394    #[serde(default)]
1395    pub collusion_ring_count: usize,
1396    /// Number of bi-temporal vendor version chains generated.
1397    #[serde(default)]
1398    pub temporal_version_chain_count: usize,
1399    /// Number of nodes in the entity relationship graph.
1400    #[serde(default)]
1401    pub entity_relationship_node_count: usize,
1402    /// Number of edges in the entity relationship graph.
1403    #[serde(default)]
1404    pub entity_relationship_edge_count: usize,
1405    /// Number of cross-process links generated.
1406    #[serde(default)]
1407    pub cross_process_link_count: usize,
1408    /// Number of disruption events generated.
1409    #[serde(default)]
1410    pub disruption_event_count: usize,
1411    /// Number of industry-specific GL accounts generated.
1412    #[serde(default)]
1413    pub industry_gl_account_count: usize,
1414    /// Number of period-close journal entries generated (tax provision + closing entries).
1415    #[serde(default)]
1416    pub period_close_je_count: usize,
1417}
1418
1419/// Enhanced orchestrator with full feature integration.
1420pub struct EnhancedOrchestrator {
1421    config: GeneratorConfig,
1422    phase_config: PhaseConfig,
1423    coa: Option<Arc<ChartOfAccounts>>,
1424    master_data: MasterDataSnapshot,
1425    seed: u64,
1426    multi_progress: Option<MultiProgress>,
1427    /// Resource guard for memory, disk, and CPU monitoring
1428    resource_guard: ResourceGuard,
1429    /// Output path for disk space monitoring
1430    output_path: Option<PathBuf>,
1431    /// Copula generators for preserving correlations (from fingerprint)
1432    copula_generators: Vec<CopulaGeneratorSpec>,
1433    /// Country pack registry for localized data generation
1434    country_pack_registry: datasynth_core::CountryPackRegistry,
1435    /// Optional streaming sink for phase-by-phase output
1436    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1437}
1438
1439impl EnhancedOrchestrator {
1440    /// Create a new enhanced orchestrator.
1441    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1442        datasynth_config::validate_config(&config)?;
1443
1444        let seed = config.global.seed.unwrap_or_else(rand::random);
1445
1446        // Build resource guard from config
1447        let resource_guard = Self::build_resource_guard(&config, None);
1448
1449        // Build country pack registry from config
1450        let country_pack_registry = match &config.country_packs {
1451            Some(cp) => {
1452                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1453                    .map_err(|e| SynthError::config(e.to_string()))?
1454            }
1455            None => datasynth_core::CountryPackRegistry::builtin_only()
1456                .map_err(|e| SynthError::config(e.to_string()))?,
1457        };
1458
1459        Ok(Self {
1460            config,
1461            phase_config,
1462            coa: None,
1463            master_data: MasterDataSnapshot::default(),
1464            seed,
1465            multi_progress: None,
1466            resource_guard,
1467            output_path: None,
1468            copula_generators: Vec::new(),
1469            country_pack_registry,
1470            phase_sink: None,
1471        })
1472    }
1473
1474    /// Create with default phase config.
1475    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1476        Self::new(config, PhaseConfig::default())
1477    }
1478
1479    /// Set a streaming phase sink for real-time output.
1480    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1481        self.phase_sink = Some(sink);
1482        self
1483    }
1484
1485    /// Emit a batch of items to the phase sink (if configured).
1486    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1487        if let Some(ref sink) = self.phase_sink {
1488            for item in items {
1489                if let Ok(value) = serde_json::to_value(item) {
1490                    if let Err(e) = sink.emit(phase, type_name, &value) {
1491                        warn!(
1492                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1493                        );
1494                    }
1495                }
1496            }
1497            if let Err(e) = sink.phase_complete(phase) {
1498                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1499            }
1500        }
1501    }
1502
1503    /// Enable/disable progress bars.
1504    pub fn with_progress(mut self, show: bool) -> Self {
1505        self.phase_config.show_progress = show;
1506        if show {
1507            self.multi_progress = Some(MultiProgress::new());
1508        }
1509        self
1510    }
1511
1512    /// Set the output path for disk space monitoring.
1513    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1514        let path = path.into();
1515        self.output_path = Some(path.clone());
1516        // Rebuild resource guard with the output path
1517        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1518        self
1519    }
1520
1521    /// Access the country pack registry.
1522    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1523        &self.country_pack_registry
1524    }
1525
1526    /// Look up a country pack by country code string.
1527    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1528        self.country_pack_registry.get_by_str(country)
1529    }
1530
1531    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1532    /// company, defaulting to `"US"` if no companies are configured.
1533    fn primary_country_code(&self) -> &str {
1534        self.config
1535            .companies
1536            .first()
1537            .map(|c| c.country.as_str())
1538            .unwrap_or("US")
1539    }
1540
1541    /// Resolve the country pack for the primary (first) company.
1542    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1543        self.country_pack_for(self.primary_country_code())
1544    }
1545
1546    /// Resolve the CoA framework from config/country-pack.
1547    fn resolve_coa_framework(&self) -> CoAFramework {
1548        if self.config.accounting_standards.enabled {
1549            match self.config.accounting_standards.framework {
1550                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1551                    return CoAFramework::FrenchPcg;
1552                }
1553                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1554                    return CoAFramework::GermanSkr04;
1555                }
1556                _ => {}
1557            }
1558        }
1559        // Fallback: derive from country pack
1560        let pack = self.primary_pack();
1561        match pack.accounting.framework.as_str() {
1562            "french_gaap" => CoAFramework::FrenchPcg,
1563            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1564            _ => CoAFramework::UsGaap,
1565        }
1566    }
1567
1568    /// Check if copula generators are available.
1569    ///
1570    /// Returns true if the orchestrator has copula generators for preserving
1571    /// correlations (typically from fingerprint-based generation).
1572    pub fn has_copulas(&self) -> bool {
1573        !self.copula_generators.is_empty()
1574    }
1575
1576    /// Get the copula generators.
1577    ///
1578    /// Returns a reference to the copula generators for use during generation.
1579    /// These can be used to generate correlated samples that preserve the
1580    /// statistical relationships from the source data.
1581    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1582        &self.copula_generators
1583    }
1584
1585    /// Get a mutable reference to the copula generators.
1586    ///
1587    /// Allows generators to sample from copulas during data generation.
1588    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1589        &mut self.copula_generators
1590    }
1591
1592    /// Sample correlated values from a named copula.
1593    ///
1594    /// Returns None if the copula doesn't exist.
1595    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1596        self.copula_generators
1597            .iter_mut()
1598            .find(|c| c.name == copula_name)
1599            .map(|c| c.generator.sample())
1600    }
1601
1602    /// Create an orchestrator from a fingerprint file.
1603    ///
1604    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1605    /// and creates an orchestrator configured to generate data matching
1606    /// the statistical properties of the original data.
1607    ///
1608    /// # Arguments
1609    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1610    /// * `phase_config` - Phase configuration for generation
1611    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1612    ///
1613    /// # Example
1614    /// ```no_run
1615    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1616    /// use std::path::Path;
1617    ///
1618    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1619    ///     Path::new("fingerprint.dsf"),
1620    ///     PhaseConfig::default(),
1621    ///     1.0,
1622    /// ).unwrap();
1623    /// ```
1624    pub fn from_fingerprint(
1625        fingerprint_path: &std::path::Path,
1626        phase_config: PhaseConfig,
1627        scale: f64,
1628    ) -> SynthResult<Self> {
1629        info!("Loading fingerprint from: {}", fingerprint_path.display());
1630
1631        // Read the fingerprint
1632        let reader = FingerprintReader::new();
1633        let fingerprint = reader
1634            .read_from_file(fingerprint_path)
1635            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1636
1637        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1638    }
1639
1640    /// Create an orchestrator from a loaded fingerprint.
1641    ///
1642    /// # Arguments
1643    /// * `fingerprint` - The loaded fingerprint
1644    /// * `phase_config` - Phase configuration for generation
1645    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1646    pub fn from_fingerprint_data(
1647        fingerprint: Fingerprint,
1648        phase_config: PhaseConfig,
1649        scale: f64,
1650    ) -> SynthResult<Self> {
1651        info!(
1652            "Synthesizing config from fingerprint (version: {}, tables: {})",
1653            fingerprint.manifest.version,
1654            fingerprint.schema.tables.len()
1655        );
1656
1657        // Generate a seed for the synthesis
1658        let seed: u64 = rand::random();
1659        info!("Fingerprint synthesis seed: {}", seed);
1660
1661        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1662        let options = SynthesisOptions {
1663            scale,
1664            seed: Some(seed),
1665            preserve_correlations: true,
1666            inject_anomalies: true,
1667        };
1668        let synthesizer = ConfigSynthesizer::with_options(options);
1669
1670        // Synthesize full result including copula generators
1671        let synthesis_result = synthesizer
1672            .synthesize_full(&fingerprint, seed)
1673            .map_err(|e| {
1674                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1675            })?;
1676
1677        // Start with a base config from the fingerprint's industry if available
1678        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1679            Self::base_config_for_industry(industry)
1680        } else {
1681            Self::base_config_for_industry("manufacturing")
1682        };
1683
1684        // Apply the synthesized patches
1685        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1686
1687        // Log synthesis results
1688        info!(
1689            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1690            fingerprint.schema.tables.len(),
1691            scale,
1692            synthesis_result.copula_generators.len()
1693        );
1694
1695        if !synthesis_result.copula_generators.is_empty() {
1696            for spec in &synthesis_result.copula_generators {
1697                info!(
1698                    "  Copula '{}' for table '{}': {} columns",
1699                    spec.name,
1700                    spec.table,
1701                    spec.columns.len()
1702                );
1703            }
1704        }
1705
1706        // Create the orchestrator with the synthesized config
1707        let mut orchestrator = Self::new(config, phase_config)?;
1708
1709        // Store copula generators for use during generation
1710        orchestrator.copula_generators = synthesis_result.copula_generators;
1711
1712        Ok(orchestrator)
1713    }
1714
1715    /// Create a base config for a given industry.
1716    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1717        use datasynth_config::presets::create_preset;
1718        use datasynth_config::TransactionVolume;
1719        use datasynth_core::models::{CoAComplexity, IndustrySector};
1720
1721        let sector = match industry.to_lowercase().as_str() {
1722            "manufacturing" => IndustrySector::Manufacturing,
1723            "retail" => IndustrySector::Retail,
1724            "financial" | "financial_services" => IndustrySector::FinancialServices,
1725            "healthcare" => IndustrySector::Healthcare,
1726            "technology" | "tech" => IndustrySector::Technology,
1727            _ => IndustrySector::Manufacturing,
1728        };
1729
1730        // Create a preset with reasonable defaults
1731        create_preset(
1732            sector,
1733            1,  // company count
1734            12, // period months
1735            CoAComplexity::Medium,
1736            TransactionVolume::TenK,
1737        )
1738    }
1739
1740    /// Apply a config patch to a GeneratorConfig.
1741    fn apply_config_patch(
1742        mut config: GeneratorConfig,
1743        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1744    ) -> GeneratorConfig {
1745        use datasynth_fingerprint::synthesis::ConfigValue;
1746
1747        for (key, value) in patch.values() {
1748            match (key.as_str(), value) {
1749                // Transaction count is handled via TransactionVolume enum on companies
1750                // Log it but cannot directly set it (would need to modify company volumes)
1751                ("transactions.count", ConfigValue::Integer(n)) => {
1752                    info!(
1753                        "Fingerprint suggests {} transactions (apply via company volumes)",
1754                        n
1755                    );
1756                }
1757                ("global.period_months", ConfigValue::Integer(n)) => {
1758                    config.global.period_months = (*n).clamp(1, 120) as u32;
1759                }
1760                ("global.start_date", ConfigValue::String(s)) => {
1761                    config.global.start_date = s.clone();
1762                }
1763                ("global.seed", ConfigValue::Integer(n)) => {
1764                    config.global.seed = Some(*n as u64);
1765                }
1766                ("fraud.enabled", ConfigValue::Bool(b)) => {
1767                    config.fraud.enabled = *b;
1768                }
1769                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1770                    config.fraud.fraud_rate = *f;
1771                }
1772                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1773                    config.data_quality.enabled = *b;
1774                }
1775                // Handle anomaly injection paths (mapped to fraud config)
1776                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1777                    config.fraud.enabled = *b;
1778                }
1779                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1780                    config.fraud.fraud_rate = *f;
1781                }
1782                _ => {
1783                    debug!("Ignoring unknown config patch key: {}", key);
1784                }
1785            }
1786        }
1787
1788        config
1789    }
1790
1791    /// Build a resource guard from the configuration.
1792    fn build_resource_guard(
1793        config: &GeneratorConfig,
1794        output_path: Option<PathBuf>,
1795    ) -> ResourceGuard {
1796        let mut builder = ResourceGuardBuilder::new();
1797
1798        // Configure memory limit if set
1799        if config.global.memory_limit_mb > 0 {
1800            builder = builder.memory_limit(config.global.memory_limit_mb);
1801        }
1802
1803        // Configure disk monitoring for output path
1804        if let Some(path) = output_path {
1805            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1806        }
1807
1808        // Use conservative degradation settings for production safety
1809        builder = builder.conservative();
1810
1811        builder.build()
1812    }
1813
1814    /// Check resources (memory, disk, CPU) and return degradation level.
1815    ///
1816    /// Returns an error if hard limits are exceeded.
1817    /// Returns Ok(DegradationLevel) indicating current resource state.
1818    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1819        self.resource_guard.check()
1820    }
1821
1822    /// Check resources with logging.
1823    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1824        let level = self.resource_guard.check()?;
1825
1826        if level != DegradationLevel::Normal {
1827            warn!(
1828                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1829                phase,
1830                level,
1831                self.resource_guard.current_memory_mb(),
1832                self.resource_guard.available_disk_mb()
1833            );
1834        }
1835
1836        Ok(level)
1837    }
1838
1839    /// Get current degradation actions based on resource state.
1840    fn get_degradation_actions(&self) -> DegradationActions {
1841        self.resource_guard.get_actions()
1842    }
1843
1844    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1845    fn check_memory_limit(&self) -> SynthResult<()> {
1846        self.check_resources()?;
1847        Ok(())
1848    }
1849
1850    /// Run the complete generation workflow.
1851    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1852        info!("Starting enhanced generation workflow");
1853        info!(
1854            "Config: industry={:?}, period_months={}, companies={}",
1855            self.config.global.industry,
1856            self.config.global.period_months,
1857            self.config.companies.len()
1858        );
1859
1860        // Initial resource check before starting
1861        let initial_level = self.check_resources_with_log("initial")?;
1862        if initial_level == DegradationLevel::Emergency {
1863            return Err(SynthError::resource(
1864                "Insufficient resources to start generation",
1865            ));
1866        }
1867
1868        let mut stats = EnhancedGenerationStatistics {
1869            companies_count: self.config.companies.len(),
1870            period_months: self.config.global.period_months,
1871            ..Default::default()
1872        };
1873
1874        // Phase 1: Chart of Accounts
1875        let coa = self.phase_chart_of_accounts(&mut stats)?;
1876
1877        // Phase 2: Master Data
1878        self.phase_master_data(&mut stats)?;
1879
1880        // Emit master data to stream sink
1881        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1882        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1883        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1884
1885        // Phase 3: Document Flows + Subledger Linking
1886        let (mut document_flows, mut subledger, fa_journal_entries) =
1887            self.phase_document_flows(&mut stats)?;
1888
1889        // Emit document flows to stream sink
1890        self.emit_phase_items(
1891            "document_flows",
1892            "PurchaseOrder",
1893            &document_flows.purchase_orders,
1894        );
1895        self.emit_phase_items(
1896            "document_flows",
1897            "GoodsReceipt",
1898            &document_flows.goods_receipts,
1899        );
1900        self.emit_phase_items(
1901            "document_flows",
1902            "VendorInvoice",
1903            &document_flows.vendor_invoices,
1904        );
1905        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1906        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1907
1908        // Phase 3b: Opening Balances (before JE generation)
1909        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1910
1911        // Phase 3c: Convert opening balances to journal entries and prepend them.
1912        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1913        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1914        // balance map type.
1915        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1916            .iter()
1917            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1918            .collect();
1919        if !opening_balance_jes.is_empty() {
1920            debug!(
1921                "Prepending {} opening balance JEs to entries",
1922                opening_balance_jes.len()
1923            );
1924        }
1925
1926        // Phase 4: Journal Entries
1927        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1928
1929        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1930        // starts from the correct initial state.
1931        if !opening_balance_jes.is_empty() {
1932            let mut combined = opening_balance_jes;
1933            combined.extend(entries);
1934            entries = combined;
1935        }
1936
1937        // Phase 4c: Append FA acquisition journal entries to main entries
1938        if !fa_journal_entries.is_empty() {
1939            debug!(
1940                "Appending {} FA acquisition JEs to main entries",
1941                fa_journal_entries.len()
1942            );
1943            entries.extend(fa_journal_entries);
1944        }
1945
1946        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1947        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1948
1949        // Get current degradation actions for optional phases
1950        let actions = self.get_degradation_actions();
1951
1952        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1953        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1954
1955        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1956        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1957        if !sourcing.contracts.is_empty() {
1958            let mut linked_count = 0usize;
1959            // Collect (vendor_id, po_id) pairs from P2P chains
1960            let po_vendor_pairs: Vec<(String, String)> = document_flows
1961                .p2p_chains
1962                .iter()
1963                .map(|chain| {
1964                    (
1965                        chain.purchase_order.vendor_id.clone(),
1966                        chain.purchase_order.header.document_id.clone(),
1967                    )
1968                })
1969                .collect();
1970
1971            for chain in &mut document_flows.p2p_chains {
1972                if chain.purchase_order.contract_id.is_none() {
1973                    if let Some(contract) = sourcing
1974                        .contracts
1975                        .iter()
1976                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1977                    {
1978                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1979                        linked_count += 1;
1980                    }
1981                }
1982            }
1983
1984            // Populate reverse FK: purchase_order_ids on each contract
1985            for contract in &mut sourcing.contracts {
1986                let po_ids: Vec<String> = po_vendor_pairs
1987                    .iter()
1988                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
1989                    .map(|(_, po_id)| po_id.clone())
1990                    .collect();
1991                if !po_ids.is_empty() {
1992                    contract.purchase_order_ids = po_ids;
1993                }
1994            }
1995
1996            if linked_count > 0 {
1997                debug!(
1998                    "Linked {} purchase orders to S2C contracts by vendor match",
1999                    linked_count
2000                );
2001            }
2002        }
2003
2004        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2005        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2006
2007        // Phase 5c: Append IC journal entries to main entries
2008        if !intercompany.seller_journal_entries.is_empty()
2009            || !intercompany.buyer_journal_entries.is_empty()
2010        {
2011            let ic_je_count = intercompany.seller_journal_entries.len()
2012                + intercompany.buyer_journal_entries.len();
2013            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2014            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2015            debug!(
2016                "Appended {} IC journal entries to main entries",
2017                ic_je_count
2018            );
2019        }
2020
2021        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2022        if !intercompany.elimination_entries.is_empty() {
2023            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2024                &intercompany.elimination_entries,
2025            );
2026            if !elim_jes.is_empty() {
2027                debug!(
2028                    "Appended {} elimination journal entries to main entries",
2029                    elim_jes.len()
2030                );
2031                // IC elimination net-zero validation
2032                let elim_debit: rust_decimal::Decimal =
2033                    elim_jes.iter().map(|je| je.total_debit()).sum();
2034                let elim_credit: rust_decimal::Decimal =
2035                    elim_jes.iter().map(|je| je.total_credit()).sum();
2036                if elim_debit != elim_credit {
2037                    warn!(
2038                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2039                        elim_debit,
2040                        elim_credit,
2041                        elim_debit - elim_credit
2042                    );
2043                }
2044                entries.extend(elim_jes);
2045            }
2046        }
2047
2048        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2049        let hr = self.phase_hr_data(&mut stats)?;
2050
2051        // Phase 6b: Generate JEs from payroll runs
2052        if !hr.payroll_runs.is_empty() {
2053            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2054            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2055            entries.extend(payroll_jes);
2056        }
2057
2058        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2059        if !hr.pension_journal_entries.is_empty() {
2060            debug!(
2061                "Generated {} JEs from pension plans",
2062                hr.pension_journal_entries.len()
2063            );
2064            entries.extend(hr.pension_journal_entries.iter().cloned());
2065        }
2066
2067        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2068        if !hr.stock_comp_journal_entries.is_empty() {
2069            debug!(
2070                "Generated {} JEs from stock-based compensation",
2071                hr.stock_comp_journal_entries.len()
2072            );
2073            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2074        }
2075
2076        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2077        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2078
2079        // Phase 7a: Generate JEs from production orders
2080        if !manufacturing_snap.production_orders.is_empty() {
2081            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2082            debug!("Generated {} JEs from production orders", mfg_jes.len());
2083            entries.extend(mfg_jes);
2084        }
2085
2086        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2087        //
2088        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2089        // subledger inventory positions.  Here we reconcile them so that position balances
2090        // reflect the actual stock movements within the generation period.
2091        if !manufacturing_snap.inventory_movements.is_empty()
2092            && !subledger.inventory_positions.is_empty()
2093        {
2094            use datasynth_core::models::MovementType as MfgMovementType;
2095            let mut receipt_count = 0usize;
2096            let mut issue_count = 0usize;
2097            for movement in &manufacturing_snap.inventory_movements {
2098                // Find a matching position by material code and company
2099                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2100                    p.material_id == movement.material_code
2101                        && p.company_code == movement.entity_code
2102                }) {
2103                    match movement.movement_type {
2104                        MfgMovementType::GoodsReceipt => {
2105                            // Increase stock and update weighted-average cost
2106                            pos.add_quantity(
2107                                movement.quantity,
2108                                movement.value,
2109                                movement.movement_date,
2110                            );
2111                            receipt_count += 1;
2112                        }
2113                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2114                            // Decrease stock (best-effort; silently skip if insufficient)
2115                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2116                            issue_count += 1;
2117                        }
2118                        _ => {}
2119                    }
2120                }
2121            }
2122            debug!(
2123                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2124                manufacturing_snap.inventory_movements.len(),
2125                receipt_count,
2126                issue_count,
2127            );
2128        }
2129
2130        // Update final entry/line-item stats after all JE-generating phases
2131        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2132        if !entries.is_empty() {
2133            stats.total_entries = entries.len() as u64;
2134            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2135            debug!(
2136                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2137                stats.total_entries, stats.total_line_items
2138            );
2139        }
2140
2141        // Phase 7b: Apply internal controls to journal entries
2142        if self.config.internal_controls.enabled && !entries.is_empty() {
2143            info!("Phase 7b: Applying internal controls to journal entries");
2144            let control_config = ControlGeneratorConfig {
2145                exception_rate: self.config.internal_controls.exception_rate,
2146                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2147                enable_sox_marking: true,
2148                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2149                    self.config.internal_controls.sox_materiality_threshold,
2150                )
2151                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2152                ..Default::default()
2153            };
2154            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2155            for entry in &mut entries {
2156                control_gen.apply_controls(entry, &coa);
2157            }
2158            let with_controls = entries
2159                .iter()
2160                .filter(|e| !e.header.control_ids.is_empty())
2161                .count();
2162            info!(
2163                "Applied controls to {} entries ({} with control IDs assigned)",
2164                entries.len(),
2165                with_controls
2166            );
2167        }
2168
2169        // Phase 7c: Extract SoD violations from annotated journal entries.
2170        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2171        // Here we materialise those flags into standalone SodViolation records.
2172        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2173            .iter()
2174            .filter(|e| e.header.sod_violation)
2175            .filter_map(|e| {
2176                e.header.sod_conflict_type.map(|ct| {
2177                    use datasynth_core::models::{RiskLevel, SodViolation};
2178                    let severity = match ct {
2179                        datasynth_core::models::SodConflictType::PaymentReleaser
2180                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2181                            RiskLevel::Critical
2182                        }
2183                        datasynth_core::models::SodConflictType::PreparerApprover
2184                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2185                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2186                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2187                            RiskLevel::High
2188                        }
2189                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2190                            RiskLevel::Medium
2191                        }
2192                    };
2193                    let action = format!(
2194                        "SoD conflict {:?} on entry {} ({})",
2195                        ct, e.header.document_id, e.header.company_code
2196                    );
2197                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2198                })
2199            })
2200            .collect();
2201        if !sod_violations.is_empty() {
2202            info!(
2203                "Phase 7c: Extracted {} SoD violations from {} entries",
2204                sod_violations.len(),
2205                entries.len()
2206            );
2207        }
2208
2209        // Emit journal entries to stream sink (after all JE-generating phases)
2210        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2211
2212        // Phase 8: Anomaly Injection (after all JE-generating phases)
2213        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2214
2215        // Emit anomaly labels to stream sink
2216        self.emit_phase_items(
2217            "anomaly_injection",
2218            "LabeledAnomaly",
2219            &anomaly_labels.labels,
2220        );
2221
2222        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2223        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2224
2225        // Emit red flags to stream sink
2226        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2227
2228        // Phase 26b: Collusion Ring Generation (after red flags)
2229        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2230
2231        // Emit collusion rings to stream sink
2232        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2233
2234        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2235        let balance_validation = self.phase_balance_validation(&entries)?;
2236
2237        // Phase 9b: GL-to-Subledger Reconciliation
2238        let subledger_reconciliation =
2239            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2240
2241        // Phase 10: Data Quality Injection
2242        let (data_quality_stats, quality_issues) =
2243            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2244
2245        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2246        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2247
2248        // Phase 11: Audit Data
2249        let audit = self.phase_audit_data(&entries, &mut stats)?;
2250
2251        // Phase 12: Banking KYC/AML Data
2252        let banking = self.phase_banking_data(&mut stats)?;
2253
2254        // Phase 13: Graph Export
2255        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2256
2257        // Phase 14: LLM Enrichment
2258        self.phase_llm_enrichment(&mut stats);
2259
2260        // Phase 15: Diffusion Enhancement
2261        self.phase_diffusion_enhancement(&mut stats);
2262
2263        // Phase 16: Causal Overlay
2264        self.phase_causal_overlay(&mut stats);
2265
2266        // Phase 17: Bank Reconciliation + Financial Statements
2267        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2268        // provision data (from accounting_standards / tax snapshots) can be wired in.
2269        let mut financial_reporting = self.phase_financial_reporting(
2270            &document_flows,
2271            &entries,
2272            &coa,
2273            &hr,
2274            &audit,
2275            &mut stats,
2276        )?;
2277
2278        // BS coherence check: assets = liabilities + equity
2279        {
2280            use datasynth_core::models::StatementType;
2281            for stmt in &financial_reporting.consolidated_statements {
2282                if stmt.statement_type == StatementType::BalanceSheet {
2283                    let total_assets: rust_decimal::Decimal = stmt
2284                        .line_items
2285                        .iter()
2286                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2287                        .map(|li| li.amount)
2288                        .sum();
2289                    let total_le: rust_decimal::Decimal = stmt
2290                        .line_items
2291                        .iter()
2292                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2293                        .map(|li| li.amount)
2294                        .sum();
2295                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2296                        warn!(
2297                            "BS equation imbalance: assets={}, L+E={}",
2298                            total_assets, total_le
2299                        );
2300                    }
2301                }
2302            }
2303        }
2304
2305        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2306        let accounting_standards =
2307            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2308
2309        // Phase 18a: Merge ECL journal entries into main GL
2310        if !accounting_standards.ecl_journal_entries.is_empty() {
2311            debug!(
2312                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2313                accounting_standards.ecl_journal_entries.len()
2314            );
2315            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2316        }
2317
2318        // Phase 18a: Merge provision journal entries into main GL
2319        if !accounting_standards.provision_journal_entries.is_empty() {
2320            debug!(
2321                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2322                accounting_standards.provision_journal_entries.len()
2323            );
2324            entries.extend(
2325                accounting_standards
2326                    .provision_journal_entries
2327                    .iter()
2328                    .cloned(),
2329            );
2330        }
2331
2332        // Phase 18b: OCPM Events (after all process data is available)
2333        let ocpm = self.phase_ocpm_events(
2334            &document_flows,
2335            &sourcing,
2336            &hr,
2337            &manufacturing_snap,
2338            &banking,
2339            &audit,
2340            &financial_reporting,
2341            &mut stats,
2342        )?;
2343
2344        // Emit OCPM events to stream sink
2345        if let Some(ref event_log) = ocpm.event_log {
2346            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2347        }
2348
2349        // Phase 19: Sales Quotes, Management KPIs, Budgets
2350        let sales_kpi_budgets =
2351            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2352
2353        // Phase 20: Tax Generation
2354        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2355
2356        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2357        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2358        self.generate_notes_to_financial_statements(
2359            &mut financial_reporting,
2360            &accounting_standards,
2361            &tax,
2362            &hr,
2363            &audit,
2364        );
2365
2366        // Phase 21: ESG Data Generation
2367        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2368
2369        // Phase 22: Treasury Data Generation
2370        let treasury =
2371            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2372
2373        // Phase 23: Project Accounting Data Generation
2374        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2375
2376        // Phase 24: Process Evolution + Organizational Events
2377        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2378
2379        // Phase 24b: Disruption Events
2380        let disruption_events = self.phase_disruption_events(&mut stats)?;
2381
2382        // Phase 27: Bi-Temporal Vendor Version Chains
2383        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2384
2385        // Phase 28: Entity Relationship Graph + Cross-Process Links
2386        let (entity_relationship_graph, cross_process_links) =
2387            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2388
2389        // Phase 29: Industry-specific GL accounts
2390        let industry_output = self.phase_industry_data(&mut stats);
2391
2392        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2393        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2394
2395        // Phase 19b: Hypergraph Export (after all data is available)
2396        self.phase_hypergraph_export(
2397            &coa,
2398            &entries,
2399            &document_flows,
2400            &sourcing,
2401            &hr,
2402            &manufacturing_snap,
2403            &banking,
2404            &audit,
2405            &financial_reporting,
2406            &ocpm,
2407            &compliance_regulations,
2408            &mut stats,
2409        )?;
2410
2411        // Phase 10c: Additional graph builders (approval, entity, banking)
2412        // These run after all data is available since they need banking/IC data.
2413        if self.phase_config.generate_graph_export {
2414            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2415        }
2416
2417        // Log informational messages for config sections not yet fully wired
2418        if self.config.streaming.enabled {
2419            info!("Note: streaming config is enabled but batch mode does not use it");
2420        }
2421        if self.config.vendor_network.enabled {
2422            debug!("Vendor network config available; relationship graph generation is partial");
2423        }
2424        if self.config.customer_segmentation.enabled {
2425            debug!("Customer segmentation config available; segment-aware generation is partial");
2426        }
2427
2428        // Log final resource statistics
2429        let resource_stats = self.resource_guard.stats();
2430        info!(
2431            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2432            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2433            resource_stats.disk.estimated_bytes_written,
2434            resource_stats.degradation_level
2435        );
2436
2437        // Flush any remaining stream sink data
2438        if let Some(ref sink) = self.phase_sink {
2439            if let Err(e) = sink.flush() {
2440                warn!("Stream sink flush failed: {e}");
2441            }
2442        }
2443
2444        // Build data lineage graph
2445        let lineage = self.build_lineage_graph();
2446
2447        // Evaluate quality gates if enabled in config
2448        let gate_result = if self.config.quality_gates.enabled {
2449            let profile_name = &self.config.quality_gates.profile;
2450            match datasynth_eval::gates::get_profile(profile_name) {
2451                Some(profile) => {
2452                    // Build an evaluation populated with actual generation metrics.
2453                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2454
2455                    // Populate balance sheet evaluation from balance validation results
2456                    if balance_validation.validated {
2457                        eval.coherence.balance =
2458                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2459                                equation_balanced: balance_validation.is_balanced,
2460                                max_imbalance: (balance_validation.total_debits
2461                                    - balance_validation.total_credits)
2462                                    .abs(),
2463                                periods_evaluated: 1,
2464                                periods_imbalanced: if balance_validation.is_balanced {
2465                                    0
2466                                } else {
2467                                    1
2468                                },
2469                                period_results: Vec::new(),
2470                                companies_evaluated: self.config.companies.len(),
2471                            });
2472                    }
2473
2474                    // Set coherence passes based on balance validation
2475                    eval.coherence.passes = balance_validation.is_balanced;
2476                    if !balance_validation.is_balanced {
2477                        eval.coherence
2478                            .failures
2479                            .push("Balance sheet equation not satisfied".to_string());
2480                    }
2481
2482                    // Set statistical score based on entry count (basic sanity)
2483                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2484                    eval.statistical.passes = !entries.is_empty();
2485
2486                    // Set quality score from data quality stats
2487                    eval.quality.overall_score = 0.9; // Default high for generated data
2488                    eval.quality.passes = true;
2489
2490                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2491                    info!(
2492                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2493                        profile_name, result.gates_passed, result.gates_total, result.summary
2494                    );
2495                    Some(result)
2496                }
2497                None => {
2498                    warn!(
2499                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2500                        profile_name
2501                    );
2502                    None
2503                }
2504            }
2505        } else {
2506            None
2507        };
2508
2509        // Generate internal controls if enabled
2510        let internal_controls = if self.config.internal_controls.enabled {
2511            InternalControl::standard_controls()
2512        } else {
2513            Vec::new()
2514        };
2515
2516        Ok(EnhancedGenerationResult {
2517            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2518            master_data: std::mem::take(&mut self.master_data),
2519            document_flows,
2520            subledger,
2521            ocpm,
2522            audit,
2523            banking,
2524            graph_export,
2525            sourcing,
2526            financial_reporting,
2527            hr,
2528            accounting_standards,
2529            manufacturing: manufacturing_snap,
2530            sales_kpi_budgets,
2531            tax,
2532            esg: esg_snap,
2533            treasury,
2534            project_accounting,
2535            process_evolution,
2536            organizational_events,
2537            disruption_events,
2538            intercompany,
2539            journal_entries: entries,
2540            anomaly_labels,
2541            balance_validation,
2542            data_quality_stats,
2543            quality_issues,
2544            statistics: stats,
2545            lineage: Some(lineage),
2546            gate_result,
2547            internal_controls,
2548            sod_violations,
2549            opening_balances,
2550            subledger_reconciliation,
2551            counterfactual_pairs,
2552            red_flags,
2553            collusion_rings,
2554            temporal_vendor_chains,
2555            entity_relationship_graph,
2556            cross_process_links,
2557            industry_output,
2558            compliance_regulations,
2559        })
2560    }
2561
2562    // ========================================================================
2563    // Generation Phase Methods
2564    // ========================================================================
2565
2566    /// Phase 1: Generate Chart of Accounts and update statistics.
2567    fn phase_chart_of_accounts(
2568        &mut self,
2569        stats: &mut EnhancedGenerationStatistics,
2570    ) -> SynthResult<Arc<ChartOfAccounts>> {
2571        info!("Phase 1: Generating Chart of Accounts");
2572        let coa = self.generate_coa()?;
2573        stats.accounts_count = coa.account_count();
2574        info!(
2575            "Chart of Accounts generated: {} accounts",
2576            stats.accounts_count
2577        );
2578        self.check_resources_with_log("post-coa")?;
2579        Ok(coa)
2580    }
2581
2582    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2583    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2584        if self.phase_config.generate_master_data {
2585            info!("Phase 2: Generating Master Data");
2586            self.generate_master_data()?;
2587            stats.vendor_count = self.master_data.vendors.len();
2588            stats.customer_count = self.master_data.customers.len();
2589            stats.material_count = self.master_data.materials.len();
2590            stats.asset_count = self.master_data.assets.len();
2591            stats.employee_count = self.master_data.employees.len();
2592            info!(
2593                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2594                stats.vendor_count, stats.customer_count, stats.material_count,
2595                stats.asset_count, stats.employee_count
2596            );
2597            self.check_resources_with_log("post-master-data")?;
2598        } else {
2599            debug!("Phase 2: Skipped (master data generation disabled)");
2600        }
2601        Ok(())
2602    }
2603
2604    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2605    fn phase_document_flows(
2606        &mut self,
2607        stats: &mut EnhancedGenerationStatistics,
2608    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2609        let mut document_flows = DocumentFlowSnapshot::default();
2610        let mut subledger = SubledgerSnapshot::default();
2611        // Dunning JEs (interest + charges) accumulated here and merged into the
2612        // main FA-JE list below so they appear in the GL.
2613        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2614
2615        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2616            info!("Phase 3: Generating Document Flows");
2617            self.generate_document_flows(&mut document_flows)?;
2618            stats.p2p_chain_count = document_flows.p2p_chains.len();
2619            stats.o2c_chain_count = document_flows.o2c_chains.len();
2620            info!(
2621                "Document flows generated: {} P2P chains, {} O2C chains",
2622                stats.p2p_chain_count, stats.o2c_chain_count
2623            );
2624
2625            // Phase 3b: Link document flows to subledgers (for data coherence)
2626            debug!("Phase 3b: Linking document flows to subledgers");
2627            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2628            stats.ap_invoice_count = subledger.ap_invoices.len();
2629            stats.ar_invoice_count = subledger.ar_invoices.len();
2630            debug!(
2631                "Subledgers linked: {} AP invoices, {} AR invoices",
2632                stats.ap_invoice_count, stats.ar_invoice_count
2633            );
2634
2635            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
2636            // Without this step the subledger is systematically overstated because
2637            // amount_remaining is set at invoice creation and never reduced by
2638            // the payments that were generated in the document-flow phase.
2639            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2640            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2641            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2642            debug!("Payment settlements applied to AP and AR subledgers");
2643
2644            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
2645            // The as-of date is the last day of the configured period.
2646            if let Ok(start_date) =
2647                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2648            {
2649                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2650                    - chrono::Days::new(1);
2651                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2652                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
2653                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
2654                // derived from JE-level aggregation and will typically differ. This is a known
2655                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
2656                // generated independently. A future reconciliation phase should align them by
2657                // using subledger totals as the authoritative source for BS Receivables.
2658                for company in &self.config.companies {
2659                    let ar_report = ARAgingReport::from_invoices(
2660                        company.code.clone(),
2661                        &subledger.ar_invoices,
2662                        as_of_date,
2663                    );
2664                    subledger.ar_aging_reports.push(ar_report);
2665
2666                    let ap_report = APAgingReport::from_invoices(
2667                        company.code.clone(),
2668                        &subledger.ap_invoices,
2669                        as_of_date,
2670                    );
2671                    subledger.ap_aging_reports.push(ap_report);
2672                }
2673                debug!(
2674                    "AR/AP aging reports built: {} AR, {} AP",
2675                    subledger.ar_aging_reports.len(),
2676                    subledger.ap_aging_reports.len()
2677                );
2678
2679                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
2680                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2681                {
2682                    use datasynth_generators::DunningGenerator;
2683                    let mut dunning_gen = DunningGenerator::new(self.seed + 2000);
2684                    for company in &self.config.companies {
2685                        let currency = company.currency.as_str();
2686                        // Collect mutable references to AR invoices for this company
2687                        // (dunning generator updates dunning_info on invoices in-place).
2688                        let mut company_invoices: Vec<
2689                            datasynth_core::models::subledger::ar::ARInvoice,
2690                        > = subledger
2691                            .ar_invoices
2692                            .iter()
2693                            .filter(|inv| inv.company_code == company.code)
2694                            .cloned()
2695                            .collect();
2696
2697                        if company_invoices.is_empty() {
2698                            continue;
2699                        }
2700
2701                        let result = dunning_gen.execute_dunning_run(
2702                            &company.code,
2703                            as_of_date,
2704                            &mut company_invoices,
2705                            currency,
2706                        );
2707
2708                        // Write back updated dunning info to the main AR invoice list
2709                        for updated in &company_invoices {
2710                            if let Some(orig) = subledger
2711                                .ar_invoices
2712                                .iter_mut()
2713                                .find(|i| i.invoice_number == updated.invoice_number)
2714                            {
2715                                orig.dunning_info = updated.dunning_info.clone();
2716                            }
2717                        }
2718
2719                        subledger.dunning_runs.push(result.dunning_run);
2720                        subledger.dunning_letters.extend(result.letters);
2721                        // Dunning JEs (interest + charges) collected into local buffer.
2722                        dunning_journal_entries.extend(result.journal_entries);
2723                    }
2724                    debug!(
2725                        "Dunning runs complete: {} runs, {} letters",
2726                        subledger.dunning_runs.len(),
2727                        subledger.dunning_letters.len()
2728                    );
2729                }
2730            }
2731
2732            self.check_resources_with_log("post-document-flows")?;
2733        } else {
2734            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2735        }
2736
2737        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2738        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
2739        if !self.master_data.assets.is_empty() {
2740            debug!("Generating FA subledger records");
2741            let company_code = self
2742                .config
2743                .companies
2744                .first()
2745                .map(|c| c.code.as_str())
2746                .unwrap_or("1000");
2747            let currency = self
2748                .config
2749                .companies
2750                .first()
2751                .map(|c| c.currency.as_str())
2752                .unwrap_or("USD");
2753
2754            let mut fa_gen = datasynth_generators::FAGenerator::new(
2755                datasynth_generators::FAGeneratorConfig::default(),
2756                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2757            );
2758
2759            for asset in &self.master_data.assets {
2760                let (record, je) = fa_gen.generate_asset_acquisition(
2761                    company_code,
2762                    &format!("{:?}", asset.asset_class),
2763                    &asset.description,
2764                    asset.acquisition_date,
2765                    currency,
2766                    asset.cost_center.as_deref(),
2767                );
2768                subledger.fa_records.push(record);
2769                fa_journal_entries.push(je);
2770            }
2771
2772            stats.fa_subledger_count = subledger.fa_records.len();
2773            debug!(
2774                "FA subledger records generated: {} (with {} acquisition JEs)",
2775                stats.fa_subledger_count,
2776                fa_journal_entries.len()
2777            );
2778        }
2779
2780        // Generate Inventory subledger records from master data materials
2781        if !self.master_data.materials.is_empty() {
2782            debug!("Generating Inventory subledger records");
2783            let first_company = self.config.companies.first();
2784            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2785            let inv_currency = first_company
2786                .map(|c| c.currency.clone())
2787                .unwrap_or_else(|| "USD".to_string());
2788
2789            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2790                datasynth_generators::InventoryGeneratorConfig::default(),
2791                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2792                inv_currency.clone(),
2793            );
2794
2795            for (i, material) in self.master_data.materials.iter().enumerate() {
2796                let plant = format!("PLANT{:02}", (i % 3) + 1);
2797                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2798                let initial_qty = rust_decimal::Decimal::from(
2799                    material
2800                        .safety_stock
2801                        .to_string()
2802                        .parse::<i64>()
2803                        .unwrap_or(100),
2804                );
2805
2806                let position = inv_gen.generate_position(
2807                    company_code,
2808                    &plant,
2809                    &storage_loc,
2810                    &material.material_id,
2811                    &material.description,
2812                    initial_qty,
2813                    Some(material.standard_cost),
2814                    &inv_currency,
2815                );
2816                subledger.inventory_positions.push(position);
2817            }
2818
2819            stats.inventory_subledger_count = subledger.inventory_positions.len();
2820            debug!(
2821                "Inventory subledger records generated: {}",
2822                stats.inventory_subledger_count
2823            );
2824        }
2825
2826        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
2827        if !subledger.fa_records.is_empty() {
2828            if let Ok(start_date) =
2829                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2830            {
2831                let company_code = self
2832                    .config
2833                    .companies
2834                    .first()
2835                    .map(|c| c.code.as_str())
2836                    .unwrap_or("1000");
2837                let fiscal_year = start_date.year();
2838                let start_period = start_date.month();
2839                let end_period =
2840                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2841
2842                let depr_cfg = FaDepreciationScheduleConfig {
2843                    fiscal_year,
2844                    start_period,
2845                    end_period,
2846                    seed_offset: 800,
2847                };
2848                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2849                let runs = depr_gen.generate(company_code, &subledger.fa_records);
2850                let run_count = runs.len();
2851                subledger.depreciation_runs = runs;
2852                debug!(
2853                    "Depreciation runs generated: {} runs for {} periods",
2854                    run_count, self.config.global.period_months
2855                );
2856            }
2857        }
2858
2859        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
2860        if !subledger.inventory_positions.is_empty() {
2861            if let Ok(start_date) =
2862                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2863            {
2864                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2865                    - chrono::Days::new(1);
2866
2867                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2868                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2869
2870                for company in &self.config.companies {
2871                    let result = inv_val_gen.generate(
2872                        &company.code,
2873                        &subledger.inventory_positions,
2874                        as_of_date,
2875                    );
2876                    subledger.inventory_valuations.push(result);
2877                }
2878                debug!(
2879                    "Inventory valuations generated: {} company reports",
2880                    subledger.inventory_valuations.len()
2881                );
2882            }
2883        }
2884
2885        Ok((document_flows, subledger, fa_journal_entries))
2886    }
2887
2888    /// Phase 3c: Generate OCPM events from document flows.
2889    #[allow(clippy::too_many_arguments)]
2890    fn phase_ocpm_events(
2891        &mut self,
2892        document_flows: &DocumentFlowSnapshot,
2893        sourcing: &SourcingSnapshot,
2894        hr: &HrSnapshot,
2895        manufacturing: &ManufacturingSnapshot,
2896        banking: &BankingSnapshot,
2897        audit: &AuditSnapshot,
2898        financial_reporting: &FinancialReportingSnapshot,
2899        stats: &mut EnhancedGenerationStatistics,
2900    ) -> SynthResult<OcpmSnapshot> {
2901        let degradation = self.check_resources()?;
2902        if degradation >= DegradationLevel::Reduced {
2903            debug!(
2904                "Phase skipped due to resource pressure (degradation: {:?})",
2905                degradation
2906            );
2907            return Ok(OcpmSnapshot::default());
2908        }
2909        if self.phase_config.generate_ocpm_events {
2910            info!("Phase 3c: Generating OCPM Events");
2911            let ocpm_snapshot = self.generate_ocpm_events(
2912                document_flows,
2913                sourcing,
2914                hr,
2915                manufacturing,
2916                banking,
2917                audit,
2918                financial_reporting,
2919            )?;
2920            stats.ocpm_event_count = ocpm_snapshot.event_count;
2921            stats.ocpm_object_count = ocpm_snapshot.object_count;
2922            stats.ocpm_case_count = ocpm_snapshot.case_count;
2923            info!(
2924                "OCPM events generated: {} events, {} objects, {} cases",
2925                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2926            );
2927            self.check_resources_with_log("post-ocpm")?;
2928            Ok(ocpm_snapshot)
2929        } else {
2930            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2931            Ok(OcpmSnapshot::default())
2932        }
2933    }
2934
2935    /// Phase 4: Generate journal entries from document flows and standalone generation.
2936    fn phase_journal_entries(
2937        &mut self,
2938        coa: &Arc<ChartOfAccounts>,
2939        document_flows: &DocumentFlowSnapshot,
2940        _stats: &mut EnhancedGenerationStatistics,
2941    ) -> SynthResult<Vec<JournalEntry>> {
2942        let mut entries = Vec::new();
2943
2944        // Phase 4a: Generate JEs from document flows (for data coherence)
2945        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2946            debug!("Phase 4a: Generating JEs from document flows");
2947            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2948            debug!("Generated {} JEs from document flows", flow_entries.len());
2949            entries.extend(flow_entries);
2950        }
2951
2952        // Phase 4b: Generate standalone journal entries
2953        if self.phase_config.generate_journal_entries {
2954            info!("Phase 4: Generating Journal Entries");
2955            let je_entries = self.generate_journal_entries(coa)?;
2956            info!("Generated {} standalone journal entries", je_entries.len());
2957            entries.extend(je_entries);
2958        } else {
2959            debug!("Phase 4: Skipped (journal entry generation disabled)");
2960        }
2961
2962        if !entries.is_empty() {
2963            // Note: stats.total_entries/total_line_items are set in generate()
2964            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2965            self.check_resources_with_log("post-journal-entries")?;
2966        }
2967
2968        Ok(entries)
2969    }
2970
2971    /// Phase 5: Inject anomalies into journal entries.
2972    fn phase_anomaly_injection(
2973        &mut self,
2974        entries: &mut [JournalEntry],
2975        actions: &DegradationActions,
2976        stats: &mut EnhancedGenerationStatistics,
2977    ) -> SynthResult<AnomalyLabels> {
2978        if self.phase_config.inject_anomalies
2979            && !entries.is_empty()
2980            && !actions.skip_anomaly_injection
2981        {
2982            info!("Phase 5: Injecting Anomalies");
2983            let result = self.inject_anomalies(entries)?;
2984            stats.anomalies_injected = result.labels.len();
2985            info!("Injected {} anomalies", stats.anomalies_injected);
2986            self.check_resources_with_log("post-anomaly-injection")?;
2987            Ok(result)
2988        } else if actions.skip_anomaly_injection {
2989            warn!("Phase 5: Skipped due to resource degradation");
2990            Ok(AnomalyLabels::default())
2991        } else {
2992            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2993            Ok(AnomalyLabels::default())
2994        }
2995    }
2996
2997    /// Phase 6: Validate balance sheet equation on journal entries.
2998    fn phase_balance_validation(
2999        &mut self,
3000        entries: &[JournalEntry],
3001    ) -> SynthResult<BalanceValidationResult> {
3002        if self.phase_config.validate_balances && !entries.is_empty() {
3003            debug!("Phase 6: Validating Balances");
3004            let balance_validation = self.validate_journal_entries(entries)?;
3005            if balance_validation.is_balanced {
3006                debug!("Balance validation passed");
3007            } else {
3008                warn!(
3009                    "Balance validation found {} errors",
3010                    balance_validation.validation_errors.len()
3011                );
3012            }
3013            Ok(balance_validation)
3014        } else {
3015            Ok(BalanceValidationResult::default())
3016        }
3017    }
3018
3019    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3020    fn phase_data_quality_injection(
3021        &mut self,
3022        entries: &mut [JournalEntry],
3023        actions: &DegradationActions,
3024        stats: &mut EnhancedGenerationStatistics,
3025    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3026        if self.phase_config.inject_data_quality
3027            && !entries.is_empty()
3028            && !actions.skip_data_quality
3029        {
3030            info!("Phase 7: Injecting Data Quality Variations");
3031            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3032            stats.data_quality_issues = dq_stats.records_with_issues;
3033            info!("Injected {} data quality issues", stats.data_quality_issues);
3034            self.check_resources_with_log("post-data-quality")?;
3035            Ok((dq_stats, quality_issues))
3036        } else if actions.skip_data_quality {
3037            warn!("Phase 7: Skipped due to resource degradation");
3038            Ok((DataQualityStats::default(), Vec::new()))
3039        } else {
3040            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3041            Ok((DataQualityStats::default(), Vec::new()))
3042        }
3043    }
3044
3045    /// Phase 10b: Generate period-close journal entries.
3046    ///
3047    /// Generates:
3048    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3049    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3050    ///    for the configured period.
3051    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3052    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3053    ///    earnings via the Income Summary (3600) clearing account.
3054    fn phase_period_close(
3055        &mut self,
3056        entries: &mut Vec<JournalEntry>,
3057        subledger: &SubledgerSnapshot,
3058        stats: &mut EnhancedGenerationStatistics,
3059    ) -> SynthResult<()> {
3060        if !self.phase_config.generate_period_close || entries.is_empty() {
3061            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3062            return Ok(());
3063        }
3064
3065        info!("Phase 10b: Generating period-close journal entries");
3066
3067        use datasynth_core::accounts::{
3068            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3069        };
3070        use rust_decimal::Decimal;
3071
3072        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3073            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3074        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3075        // Posting date for close entries is the last day of the period
3076        let close_date = end_date - chrono::Days::new(1);
3077
3078        // Statutory tax rate (21% — configurable rates come in later tiers)
3079        let tax_rate = Decimal::new(21, 2); // 0.21
3080
3081        // Collect company codes from config
3082        let company_codes: Vec<String> = self
3083            .config
3084            .companies
3085            .iter()
3086            .map(|c| c.code.clone())
3087            .collect();
3088
3089        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3090        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3091        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3092
3093        // --- Depreciation JEs (per asset) ---
3094        // Compute period depreciation for each active fixed asset using straight-line method.
3095        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3096        let period_months = self.config.global.period_months;
3097        for asset in &subledger.fa_records {
3098            // Skip assets that are inactive / fully depreciated / non-depreciable
3099            use datasynth_core::models::subledger::fa::AssetStatus;
3100            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3101                continue;
3102            }
3103            let useful_life_months = asset.useful_life_months();
3104            if useful_life_months == 0 {
3105                // Land or CIP — not depreciated
3106                continue;
3107            }
3108            let salvage_value = asset.salvage_value();
3109            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3110            if depreciable_base == Decimal::ZERO {
3111                continue;
3112            }
3113            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3114                * Decimal::from(period_months))
3115            .round_dp(2);
3116            if period_depr <= Decimal::ZERO {
3117                continue;
3118            }
3119
3120            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3121            depr_header.document_type = "CL".to_string();
3122            depr_header.header_text = Some(format!(
3123                "Depreciation - {} {}",
3124                asset.asset_number, asset.description
3125            ));
3126            depr_header.created_by = "CLOSE_ENGINE".to_string();
3127            depr_header.source = TransactionSource::Automated;
3128            depr_header.business_process = Some(BusinessProcess::R2R);
3129
3130            let doc_id = depr_header.document_id;
3131            let mut depr_je = JournalEntry::new(depr_header);
3132
3133            // DR Depreciation Expense (6000)
3134            depr_je.add_line(JournalEntryLine::debit(
3135                doc_id,
3136                1,
3137                expense_accounts::DEPRECIATION.to_string(),
3138                period_depr,
3139            ));
3140            // CR Accumulated Depreciation (1510)
3141            depr_je.add_line(JournalEntryLine::credit(
3142                doc_id,
3143                2,
3144                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3145                period_depr,
3146            ));
3147
3148            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3149            close_jes.push(depr_je);
3150        }
3151
3152        if !subledger.fa_records.is_empty() {
3153            debug!(
3154                "Generated {} depreciation JEs from {} FA records",
3155                close_jes.len(),
3156                subledger.fa_records.len()
3157            );
3158        }
3159
3160        // --- Accrual entries (standard period-end accruals per company) ---
3161        // Generate standard accrued expense entries (utilities, rent, interest) using
3162        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3163        {
3164            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3165            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3166
3167            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3168            let accrual_items: &[(&str, &str, &str)] = &[
3169                ("Accrued Utilities", "6200", "2100"),
3170                ("Accrued Rent", "6300", "2100"),
3171                ("Accrued Interest", "6100", "2150"),
3172            ];
3173
3174            for company_code in &company_codes {
3175                // Estimate company revenue from existing JEs
3176                let company_revenue: Decimal = entries
3177                    .iter()
3178                    .filter(|e| e.header.company_code == *company_code)
3179                    .flat_map(|e| e.lines.iter())
3180                    .filter(|l| l.gl_account.starts_with('4'))
3181                    .map(|l| l.credit_amount - l.debit_amount)
3182                    .fold(Decimal::ZERO, |acc, v| acc + v);
3183
3184                if company_revenue <= Decimal::ZERO {
3185                    continue;
3186                }
3187
3188                // Use 0.5% of period revenue per accrual item as a proxy
3189                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3190                if accrual_base <= Decimal::ZERO {
3191                    continue;
3192                }
3193
3194                for (description, expense_acct, liability_acct) in accrual_items {
3195                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3196                        company_code,
3197                        description,
3198                        accrual_base,
3199                        expense_acct,
3200                        liability_acct,
3201                        close_date,
3202                        None,
3203                    );
3204                    close_jes.push(accrual_je);
3205                    if let Some(rev_je) = reversal_je {
3206                        close_jes.push(rev_je);
3207                    }
3208                }
3209            }
3210
3211            debug!(
3212                "Generated accrual entries for {} companies",
3213                company_codes.len()
3214            );
3215        }
3216
3217        for company_code in &company_codes {
3218            // Calculate net income for this company from existing JEs:
3219            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3220            // Revenue (4xxx): credit-normal, so net = credits - debits
3221            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3222            let mut total_revenue = Decimal::ZERO;
3223            let mut total_expenses = Decimal::ZERO;
3224
3225            for entry in entries.iter() {
3226                if entry.header.company_code != *company_code {
3227                    continue;
3228                }
3229                for line in &entry.lines {
3230                    let category = AccountCategory::from_account(&line.gl_account);
3231                    match category {
3232                        AccountCategory::Revenue => {
3233                            // Revenue is credit-normal: net revenue = credits - debits
3234                            total_revenue += line.credit_amount - line.debit_amount;
3235                        }
3236                        AccountCategory::Cogs
3237                        | AccountCategory::OperatingExpense
3238                        | AccountCategory::OtherIncomeExpense
3239                        | AccountCategory::Tax => {
3240                            // Expenses are debit-normal: net expense = debits - credits
3241                            total_expenses += line.debit_amount - line.credit_amount;
3242                        }
3243                        _ => {}
3244                    }
3245                }
3246            }
3247
3248            let pre_tax_income = total_revenue - total_expenses;
3249
3250            // Skip if no income statement activity
3251            if pre_tax_income == Decimal::ZERO {
3252                debug!(
3253                    "Company {}: no pre-tax income, skipping period close",
3254                    company_code
3255                );
3256                continue;
3257            }
3258
3259            // --- Tax provision / DTA JE ---
3260            if pre_tax_income > Decimal::ZERO {
3261                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3262                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3263
3264                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3265                tax_header.document_type = "CL".to_string();
3266                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3267                tax_header.created_by = "CLOSE_ENGINE".to_string();
3268                tax_header.source = TransactionSource::Automated;
3269                tax_header.business_process = Some(BusinessProcess::R2R);
3270
3271                let doc_id = tax_header.document_id;
3272                let mut tax_je = JournalEntry::new(tax_header);
3273
3274                // DR Tax Expense (8000)
3275                tax_je.add_line(JournalEntryLine::debit(
3276                    doc_id,
3277                    1,
3278                    tax_accounts::TAX_EXPENSE.to_string(),
3279                    tax_amount,
3280                ));
3281                // CR Income Tax Payable (2130)
3282                tax_je.add_line(JournalEntryLine::credit(
3283                    doc_id,
3284                    2,
3285                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3286                    tax_amount,
3287                ));
3288
3289                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3290                close_jes.push(tax_je);
3291            } else {
3292                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3293                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3294                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3295                if dta_amount > Decimal::ZERO {
3296                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3297                    dta_header.document_type = "CL".to_string();
3298                    dta_header.header_text =
3299                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3300                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3301                    dta_header.source = TransactionSource::Automated;
3302                    dta_header.business_process = Some(BusinessProcess::R2R);
3303
3304                    let doc_id = dta_header.document_id;
3305                    let mut dta_je = JournalEntry::new(dta_header);
3306
3307                    // DR Deferred Tax Asset (1600)
3308                    dta_je.add_line(JournalEntryLine::debit(
3309                        doc_id,
3310                        1,
3311                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3312                        dta_amount,
3313                    ));
3314                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3315                    // reflecting the benefit of the future deductible temporary difference.
3316                    dta_je.add_line(JournalEntryLine::credit(
3317                        doc_id,
3318                        2,
3319                        tax_accounts::TAX_EXPENSE.to_string(),
3320                        dta_amount,
3321                    ));
3322
3323                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3324                    close_jes.push(dta_je);
3325                    debug!(
3326                        "Company {}: loss year — recognised DTA of {}",
3327                        company_code, dta_amount
3328                    );
3329                }
3330            }
3331
3332            // --- Income statement closing JE ---
3333            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3334            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3335            // close the pre-tax loss into Retained Earnings as-is.
3336            let tax_provision = if pre_tax_income > Decimal::ZERO {
3337                (pre_tax_income * tax_rate).round_dp(2)
3338            } else {
3339                Decimal::ZERO
3340            };
3341            let net_income = pre_tax_income - tax_provision;
3342
3343            if net_income != Decimal::ZERO {
3344                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3345                close_header.document_type = "CL".to_string();
3346                close_header.header_text =
3347                    Some(format!("Income statement close - {}", company_code));
3348                close_header.created_by = "CLOSE_ENGINE".to_string();
3349                close_header.source = TransactionSource::Automated;
3350                close_header.business_process = Some(BusinessProcess::R2R);
3351
3352                let doc_id = close_header.document_id;
3353                let mut close_je = JournalEntry::new(close_header);
3354
3355                let abs_net_income = net_income.abs();
3356
3357                if net_income > Decimal::ZERO {
3358                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3359                    close_je.add_line(JournalEntryLine::debit(
3360                        doc_id,
3361                        1,
3362                        equity_accounts::INCOME_SUMMARY.to_string(),
3363                        abs_net_income,
3364                    ));
3365                    close_je.add_line(JournalEntryLine::credit(
3366                        doc_id,
3367                        2,
3368                        equity_accounts::RETAINED_EARNINGS.to_string(),
3369                        abs_net_income,
3370                    ));
3371                } else {
3372                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3373                    close_je.add_line(JournalEntryLine::debit(
3374                        doc_id,
3375                        1,
3376                        equity_accounts::RETAINED_EARNINGS.to_string(),
3377                        abs_net_income,
3378                    ));
3379                    close_je.add_line(JournalEntryLine::credit(
3380                        doc_id,
3381                        2,
3382                        equity_accounts::INCOME_SUMMARY.to_string(),
3383                        abs_net_income,
3384                    ));
3385                }
3386
3387                debug_assert!(
3388                    close_je.is_balanced(),
3389                    "Income statement closing JE must be balanced"
3390                );
3391                close_jes.push(close_je);
3392            }
3393        }
3394
3395        let close_count = close_jes.len();
3396        if close_count > 0 {
3397            info!("Generated {} period-close journal entries", close_count);
3398            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3399            entries.extend(close_jes);
3400            stats.period_close_je_count = close_count;
3401
3402            // Update total entry/line-item stats
3403            stats.total_entries = entries.len() as u64;
3404            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3405        } else {
3406            debug!("No period-close entries generated (no income statement activity)");
3407        }
3408
3409        Ok(())
3410    }
3411
3412    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3413    fn phase_audit_data(
3414        &mut self,
3415        entries: &[JournalEntry],
3416        stats: &mut EnhancedGenerationStatistics,
3417    ) -> SynthResult<AuditSnapshot> {
3418        if self.phase_config.generate_audit {
3419            info!("Phase 8: Generating Audit Data");
3420            let audit_snapshot = self.generate_audit_data(entries)?;
3421            stats.audit_engagement_count = audit_snapshot.engagements.len();
3422            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3423            stats.audit_evidence_count = audit_snapshot.evidence.len();
3424            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3425            stats.audit_finding_count = audit_snapshot.findings.len();
3426            stats.audit_judgment_count = audit_snapshot.judgments.len();
3427            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3428            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3429            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3430            stats.audit_sample_count = audit_snapshot.samples.len();
3431            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3432            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3433            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3434            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3435            stats.audit_related_party_transaction_count =
3436                audit_snapshot.related_party_transactions.len();
3437            info!(
3438                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3439                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3440                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3441                 {} RP transactions",
3442                stats.audit_engagement_count,
3443                stats.audit_workpaper_count,
3444                stats.audit_evidence_count,
3445                stats.audit_risk_count,
3446                stats.audit_finding_count,
3447                stats.audit_judgment_count,
3448                stats.audit_confirmation_count,
3449                stats.audit_procedure_step_count,
3450                stats.audit_sample_count,
3451                stats.audit_analytical_result_count,
3452                stats.audit_ia_function_count,
3453                stats.audit_ia_report_count,
3454                stats.audit_related_party_count,
3455                stats.audit_related_party_transaction_count,
3456            );
3457            self.check_resources_with_log("post-audit")?;
3458            Ok(audit_snapshot)
3459        } else {
3460            debug!("Phase 8: Skipped (audit generation disabled)");
3461            Ok(AuditSnapshot::default())
3462        }
3463    }
3464
3465    /// Phase 9: Generate banking KYC/AML data.
3466    fn phase_banking_data(
3467        &mut self,
3468        stats: &mut EnhancedGenerationStatistics,
3469    ) -> SynthResult<BankingSnapshot> {
3470        if self.phase_config.generate_banking {
3471            info!("Phase 9: Generating Banking KYC/AML Data");
3472            let banking_snapshot = self.generate_banking_data()?;
3473            stats.banking_customer_count = banking_snapshot.customers.len();
3474            stats.banking_account_count = banking_snapshot.accounts.len();
3475            stats.banking_transaction_count = banking_snapshot.transactions.len();
3476            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3477            info!(
3478                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3479                stats.banking_customer_count, stats.banking_account_count,
3480                stats.banking_transaction_count, stats.banking_suspicious_count
3481            );
3482            self.check_resources_with_log("post-banking")?;
3483            Ok(banking_snapshot)
3484        } else {
3485            debug!("Phase 9: Skipped (banking generation disabled)");
3486            Ok(BankingSnapshot::default())
3487        }
3488    }
3489
3490    /// Phase 10: Export accounting network graphs for ML training.
3491    fn phase_graph_export(
3492        &mut self,
3493        entries: &[JournalEntry],
3494        coa: &Arc<ChartOfAccounts>,
3495        stats: &mut EnhancedGenerationStatistics,
3496    ) -> SynthResult<GraphExportSnapshot> {
3497        if self.phase_config.generate_graph_export && !entries.is_empty() {
3498            info!("Phase 10: Exporting Accounting Network Graphs");
3499            match self.export_graphs(entries, coa, stats) {
3500                Ok(snapshot) => {
3501                    info!(
3502                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3503                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3504                    );
3505                    Ok(snapshot)
3506                }
3507                Err(e) => {
3508                    warn!("Phase 10: Graph export failed: {}", e);
3509                    Ok(GraphExportSnapshot::default())
3510                }
3511            }
3512        } else {
3513            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3514            Ok(GraphExportSnapshot::default())
3515        }
3516    }
3517
3518    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3519    #[allow(clippy::too_many_arguments)]
3520    fn phase_hypergraph_export(
3521        &self,
3522        coa: &Arc<ChartOfAccounts>,
3523        entries: &[JournalEntry],
3524        document_flows: &DocumentFlowSnapshot,
3525        sourcing: &SourcingSnapshot,
3526        hr: &HrSnapshot,
3527        manufacturing: &ManufacturingSnapshot,
3528        banking: &BankingSnapshot,
3529        audit: &AuditSnapshot,
3530        financial_reporting: &FinancialReportingSnapshot,
3531        ocpm: &OcpmSnapshot,
3532        compliance: &ComplianceRegulationsSnapshot,
3533        stats: &mut EnhancedGenerationStatistics,
3534    ) -> SynthResult<()> {
3535        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3536            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3537            match self.export_hypergraph(
3538                coa,
3539                entries,
3540                document_flows,
3541                sourcing,
3542                hr,
3543                manufacturing,
3544                banking,
3545                audit,
3546                financial_reporting,
3547                ocpm,
3548                compliance,
3549                stats,
3550            ) {
3551                Ok(info) => {
3552                    info!(
3553                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3554                        info.node_count, info.edge_count, info.hyperedge_count
3555                    );
3556                }
3557                Err(e) => {
3558                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3559                }
3560            }
3561        } else {
3562            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3563        }
3564        Ok(())
3565    }
3566
3567    /// Phase 11: LLM Enrichment.
3568    ///
3569    /// Uses an LLM provider (mock by default) to enrich vendor names with
3570    /// realistic, context-aware names. This phase is non-blocking: failures
3571    /// log a warning but do not stop the generation pipeline.
3572    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3573        if !self.config.llm.enabled {
3574            debug!("Phase 11: Skipped (LLM enrichment disabled)");
3575            return;
3576        }
3577
3578        info!("Phase 11: Starting LLM Enrichment");
3579        let start = std::time::Instant::now();
3580
3581        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3582            // Select provider: use HttpLlmProvider when a non-mock provider is configured
3583            // and the corresponding API key environment variable is present.
3584            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3585                let schema_provider = &self.config.llm.provider;
3586                let api_key_env = match schema_provider.as_str() {
3587                    "openai" => Some("OPENAI_API_KEY"),
3588                    "anthropic" => Some("ANTHROPIC_API_KEY"),
3589                    "custom" => Some("LLM_API_KEY"),
3590                    _ => None,
3591                };
3592                if let Some(key_env) = api_key_env {
3593                    if std::env::var(key_env).is_ok() {
3594                        let llm_config = datasynth_core::llm::LlmConfig {
3595                            model: self.config.llm.model.clone(),
3596                            api_key_env: key_env.to_string(),
3597                            ..datasynth_core::llm::LlmConfig::default()
3598                        };
3599                        match HttpLlmProvider::new(llm_config) {
3600                            Ok(p) => Arc::new(p),
3601                            Err(e) => {
3602                                warn!(
3603                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
3604                                    e
3605                                );
3606                                Arc::new(MockLlmProvider::new(self.seed))
3607                            }
3608                        }
3609                    } else {
3610                        Arc::new(MockLlmProvider::new(self.seed))
3611                    }
3612                } else {
3613                    Arc::new(MockLlmProvider::new(self.seed))
3614                }
3615            };
3616            let enricher = VendorLlmEnricher::new(provider);
3617
3618            let industry = format!("{:?}", self.config.global.industry);
3619            let max_enrichments = self
3620                .config
3621                .llm
3622                .max_vendor_enrichments
3623                .min(self.master_data.vendors.len());
3624
3625            let mut enriched_count = 0usize;
3626            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3627                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3628                    Ok(name) => {
3629                        vendor.name = name;
3630                        enriched_count += 1;
3631                    }
3632                    Err(e) => {
3633                        warn!(
3634                            "LLM vendor enrichment failed for {}: {}",
3635                            vendor.vendor_id, e
3636                        );
3637                    }
3638                }
3639            }
3640
3641            enriched_count
3642        }));
3643
3644        match result {
3645            Ok(enriched_count) => {
3646                stats.llm_vendors_enriched = enriched_count;
3647                let elapsed = start.elapsed();
3648                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3649                info!(
3650                    "Phase 11 complete: {} vendors enriched in {}ms",
3651                    enriched_count, stats.llm_enrichment_ms
3652                );
3653            }
3654            Err(_) => {
3655                let elapsed = start.elapsed();
3656                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3657                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3658            }
3659        }
3660    }
3661
3662    /// Phase 12: Diffusion Enhancement.
3663    ///
3664    /// Generates a sample set using the statistical diffusion backend to
3665    /// demonstrate distribution-matching data generation. This phase is
3666    /// non-blocking: failures log a warning but do not stop the pipeline.
3667    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3668        if !self.config.diffusion.enabled {
3669            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3670            return;
3671        }
3672
3673        info!("Phase 12: Starting Diffusion Enhancement");
3674        let start = std::time::Instant::now();
3675
3676        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3677            // Target distribution: transaction amounts (log-normal-like)
3678            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
3679            let stds = vec![2000.0, 1.5, 1.0];
3680
3681            let diffusion_config = DiffusionConfig {
3682                n_steps: self.config.diffusion.n_steps,
3683                seed: self.seed,
3684                ..Default::default()
3685            };
3686
3687            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3688
3689            let n_samples = self.config.diffusion.sample_size;
3690            let n_features = 3; // amount, line_items, approval_level
3691            let samples = backend.generate(n_samples, n_features, self.seed);
3692
3693            samples.len()
3694        }));
3695
3696        match result {
3697            Ok(sample_count) => {
3698                stats.diffusion_samples_generated = sample_count;
3699                let elapsed = start.elapsed();
3700                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3701                info!(
3702                    "Phase 12 complete: {} diffusion samples generated in {}ms",
3703                    sample_count, stats.diffusion_enhancement_ms
3704                );
3705            }
3706            Err(_) => {
3707                let elapsed = start.elapsed();
3708                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3709                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3710            }
3711        }
3712    }
3713
3714    /// Phase 13: Causal Overlay.
3715    ///
3716    /// Builds a structural causal model from a built-in template (e.g.,
3717    /// fraud_detection) and generates causal samples. Optionally validates
3718    /// that the output respects the causal structure. This phase is
3719    /// non-blocking: failures log a warning but do not stop the pipeline.
3720    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3721        if !self.config.causal.enabled {
3722            debug!("Phase 13: Skipped (causal generation disabled)");
3723            return;
3724        }
3725
3726        info!("Phase 13: Starting Causal Overlay");
3727        let start = std::time::Instant::now();
3728
3729        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3730            // Select template based on config
3731            let graph = match self.config.causal.template.as_str() {
3732                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3733                _ => CausalGraph::fraud_detection_template(),
3734            };
3735
3736            let scm = StructuralCausalModel::new(graph.clone())
3737                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3738
3739            let n_samples = self.config.causal.sample_size;
3740            let samples = scm
3741                .generate(n_samples, self.seed)
3742                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3743
3744            // Optionally validate causal structure
3745            let validation_passed = if self.config.causal.validate {
3746                let report = CausalValidator::validate_causal_structure(&samples, &graph);
3747                if report.valid {
3748                    info!(
3749                        "Causal validation passed: all {} checks OK",
3750                        report.checks.len()
3751                    );
3752                } else {
3753                    warn!(
3754                        "Causal validation: {} violations detected: {:?}",
3755                        report.violations.len(),
3756                        report.violations
3757                    );
3758                }
3759                Some(report.valid)
3760            } else {
3761                None
3762            };
3763
3764            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3765        }));
3766
3767        match result {
3768            Ok(Ok((sample_count, validation_passed))) => {
3769                stats.causal_samples_generated = sample_count;
3770                stats.causal_validation_passed = validation_passed;
3771                let elapsed = start.elapsed();
3772                stats.causal_generation_ms = elapsed.as_millis() as u64;
3773                info!(
3774                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3775                    sample_count, stats.causal_generation_ms, validation_passed,
3776                );
3777            }
3778            Ok(Err(e)) => {
3779                let elapsed = start.elapsed();
3780                stats.causal_generation_ms = elapsed.as_millis() as u64;
3781                warn!("Phase 13: Causal generation failed: {}", e);
3782            }
3783            Err(_) => {
3784                let elapsed = start.elapsed();
3785                stats.causal_generation_ms = elapsed.as_millis() as u64;
3786                warn!("Phase 13: Causal generation failed (panic caught), continuing");
3787            }
3788        }
3789    }
3790
3791    /// Phase 14: Generate S2C sourcing data.
3792    fn phase_sourcing_data(
3793        &mut self,
3794        stats: &mut EnhancedGenerationStatistics,
3795    ) -> SynthResult<SourcingSnapshot> {
3796        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3797            debug!("Phase 14: Skipped (sourcing generation disabled)");
3798            return Ok(SourcingSnapshot::default());
3799        }
3800        let degradation = self.check_resources()?;
3801        if degradation >= DegradationLevel::Reduced {
3802            debug!(
3803                "Phase skipped due to resource pressure (degradation: {:?})",
3804                degradation
3805            );
3806            return Ok(SourcingSnapshot::default());
3807        }
3808
3809        info!("Phase 14: Generating S2C Sourcing Data");
3810        let seed = self.seed;
3811
3812        // Gather vendor data from master data
3813        let vendor_ids: Vec<String> = self
3814            .master_data
3815            .vendors
3816            .iter()
3817            .map(|v| v.vendor_id.clone())
3818            .collect();
3819        if vendor_ids.is_empty() {
3820            debug!("Phase 14: Skipped (no vendors available)");
3821            return Ok(SourcingSnapshot::default());
3822        }
3823
3824        let categories: Vec<(String, String)> = vec![
3825            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3826            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3827            ("CAT-IT".to_string(), "IT Equipment".to_string()),
3828            ("CAT-SVC".to_string(), "Professional Services".to_string()),
3829            ("CAT-LOG".to_string(), "Logistics".to_string()),
3830        ];
3831        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3832            .iter()
3833            .map(|(id, name)| {
3834                (
3835                    id.clone(),
3836                    name.clone(),
3837                    rust_decimal::Decimal::from(100_000),
3838                )
3839            })
3840            .collect();
3841
3842        let company_code = self
3843            .config
3844            .companies
3845            .first()
3846            .map(|c| c.code.as_str())
3847            .unwrap_or("1000");
3848        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3849            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3850        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3851        let fiscal_year = start_date.year() as u16;
3852        let owner_ids: Vec<String> = self
3853            .master_data
3854            .employees
3855            .iter()
3856            .take(5)
3857            .map(|e| e.employee_id.clone())
3858            .collect();
3859        let owner_id = owner_ids
3860            .first()
3861            .map(std::string::String::as_str)
3862            .unwrap_or("BUYER-001");
3863
3864        // Step 1: Spend Analysis
3865        let mut spend_gen = SpendAnalysisGenerator::new(seed);
3866        let spend_analyses =
3867            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3868
3869        // Step 2: Sourcing Projects
3870        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3871        let sourcing_projects = if owner_ids.is_empty() {
3872            Vec::new()
3873        } else {
3874            project_gen.generate(
3875                company_code,
3876                &categories_with_spend,
3877                &owner_ids,
3878                start_date,
3879                self.config.global.period_months,
3880            )
3881        };
3882        stats.sourcing_project_count = sourcing_projects.len();
3883
3884        // Step 3: Qualifications
3885        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3886        let mut qual_gen = QualificationGenerator::new(seed + 2);
3887        let qualifications = qual_gen.generate(
3888            company_code,
3889            &qual_vendor_ids,
3890            sourcing_projects.first().map(|p| p.project_id.as_str()),
3891            owner_id,
3892            start_date,
3893        );
3894
3895        // Step 4: RFx Events
3896        let mut rfx_gen = RfxGenerator::new(seed + 3);
3897        let rfx_events: Vec<RfxEvent> = sourcing_projects
3898            .iter()
3899            .map(|proj| {
3900                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3901                rfx_gen.generate(
3902                    company_code,
3903                    &proj.project_id,
3904                    &proj.category_id,
3905                    &qualified_vids,
3906                    owner_id,
3907                    start_date,
3908                    50000.0,
3909                )
3910            })
3911            .collect();
3912        stats.rfx_event_count = rfx_events.len();
3913
3914        // Step 5: Bids
3915        let mut bid_gen = BidGenerator::new(seed + 4);
3916        let mut all_bids = Vec::new();
3917        for rfx in &rfx_events {
3918            let bidder_count = vendor_ids.len().clamp(2, 5);
3919            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3920            let bids = bid_gen.generate(rfx, &responding, start_date);
3921            all_bids.extend(bids);
3922        }
3923        stats.bid_count = all_bids.len();
3924
3925        // Step 6: Bid Evaluations
3926        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3927        let bid_evaluations: Vec<BidEvaluation> = rfx_events
3928            .iter()
3929            .map(|rfx| {
3930                let rfx_bids: Vec<SupplierBid> = all_bids
3931                    .iter()
3932                    .filter(|b| b.rfx_id == rfx.rfx_id)
3933                    .cloned()
3934                    .collect();
3935                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3936            })
3937            .collect();
3938
3939        // Step 7: Contracts from winning bids
3940        let mut contract_gen = ContractGenerator::new(seed + 6);
3941        let contracts: Vec<ProcurementContract> = bid_evaluations
3942            .iter()
3943            .zip(rfx_events.iter())
3944            .filter_map(|(eval, rfx)| {
3945                eval.ranked_bids.first().and_then(|winner| {
3946                    all_bids
3947                        .iter()
3948                        .find(|b| b.bid_id == winner.bid_id)
3949                        .map(|winning_bid| {
3950                            contract_gen.generate_from_bid(
3951                                winning_bid,
3952                                Some(&rfx.sourcing_project_id),
3953                                &rfx.category_id,
3954                                owner_id,
3955                                start_date,
3956                            )
3957                        })
3958                })
3959            })
3960            .collect();
3961        stats.contract_count = contracts.len();
3962
3963        // Step 8: Catalog Items
3964        let mut catalog_gen = CatalogGenerator::new(seed + 7);
3965        let catalog_items = catalog_gen.generate(&contracts);
3966        stats.catalog_item_count = catalog_items.len();
3967
3968        // Step 9: Scorecards
3969        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3970        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3971            .iter()
3972            .fold(
3973                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3974                |mut acc, c| {
3975                    acc.entry(c.vendor_id.clone()).or_default().push(c);
3976                    acc
3977                },
3978            )
3979            .into_iter()
3980            .collect();
3981        let scorecards = scorecard_gen.generate(
3982            company_code,
3983            &vendor_contracts,
3984            start_date,
3985            end_date,
3986            owner_id,
3987        );
3988        stats.scorecard_count = scorecards.len();
3989
3990        // Back-populate cross-references on sourcing projects (Task 35)
3991        // Link each project to its RFx events, contracts, and spend analyses
3992        let mut sourcing_projects = sourcing_projects;
3993        for project in &mut sourcing_projects {
3994            // Link RFx events generated for this project
3995            project.rfx_ids = rfx_events
3996                .iter()
3997                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3998                .map(|rfx| rfx.rfx_id.clone())
3999                .collect();
4000
4001            // Link contract awarded from this project's RFx
4002            project.contract_id = contracts
4003                .iter()
4004                .find(|c| {
4005                    c.sourcing_project_id
4006                        .as_deref()
4007                        .is_some_and(|sp| sp == project.project_id)
4008                })
4009                .map(|c| c.contract_id.clone());
4010
4011            // Link spend analysis for matching category (use category_id as the reference)
4012            project.spend_analysis_id = spend_analyses
4013                .iter()
4014                .find(|sa| sa.category_id == project.category_id)
4015                .map(|sa| sa.category_id.clone());
4016        }
4017
4018        info!(
4019            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4020            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4021            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4022        );
4023        self.check_resources_with_log("post-sourcing")?;
4024
4025        Ok(SourcingSnapshot {
4026            spend_analyses,
4027            sourcing_projects,
4028            qualifications,
4029            rfx_events,
4030            bids: all_bids,
4031            bid_evaluations,
4032            contracts,
4033            catalog_items,
4034            scorecards,
4035        })
4036    }
4037
4038    /// Build a [`GroupStructure`] from the current company configuration.
4039    ///
4040    /// The first company in the configuration is treated as the ultimate parent.
4041    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4042    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4043    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4044        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4045
4046        let parent_code = self
4047            .config
4048            .companies
4049            .first()
4050            .map(|c| c.code.clone())
4051            .unwrap_or_else(|| "PARENT".to_string());
4052
4053        let mut group = GroupStructure::new(parent_code);
4054
4055        for company in self.config.companies.iter().skip(1) {
4056            let sub =
4057                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4058            group.add_subsidiary(sub);
4059        }
4060
4061        group
4062    }
4063
4064    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4065    fn phase_intercompany(
4066        &mut self,
4067        journal_entries: &[JournalEntry],
4068        stats: &mut EnhancedGenerationStatistics,
4069    ) -> SynthResult<IntercompanySnapshot> {
4070        // Skip if intercompany is disabled in config
4071        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4072            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4073            return Ok(IntercompanySnapshot::default());
4074        }
4075
4076        // Intercompany requires at least 2 companies
4077        if self.config.companies.len() < 2 {
4078            debug!(
4079                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4080                self.config.companies.len()
4081            );
4082            return Ok(IntercompanySnapshot::default());
4083        }
4084
4085        info!("Phase 14b: Generating Intercompany Transactions");
4086
4087        // Build the group structure early — used by ISA 600 component auditor scope
4088        // and consolidated financial statement generators downstream.
4089        let group_structure = self.build_group_structure();
4090        debug!(
4091            "Group structure built: parent={}, subsidiaries={}",
4092            group_structure.parent_entity,
4093            group_structure.subsidiaries.len()
4094        );
4095
4096        let seed = self.seed;
4097        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4098            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4099        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4100
4101        // Build ownership structure from company configs
4102        // First company is treated as the parent, remaining are subsidiaries
4103        let parent_code = self.config.companies[0].code.clone();
4104        let mut ownership_structure =
4105            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4106
4107        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4108            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4109                format!("REL{:03}", i + 1),
4110                parent_code.clone(),
4111                company.code.clone(),
4112                rust_decimal::Decimal::from(100), // Default 100% ownership
4113                start_date,
4114            );
4115            ownership_structure.add_relationship(relationship);
4116        }
4117
4118        // Convert config transfer pricing method to core model enum
4119        let tp_method = match self.config.intercompany.transfer_pricing_method {
4120            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4121                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4122            }
4123            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4124                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4125            }
4126            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4127                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4128            }
4129            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4130                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4131            }
4132            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4133                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4134            }
4135        };
4136
4137        // Build IC generator config from schema config
4138        let ic_currency = self
4139            .config
4140            .companies
4141            .first()
4142            .map(|c| c.currency.clone())
4143            .unwrap_or_else(|| "USD".to_string());
4144        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4145            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4146            transfer_pricing_method: tp_method,
4147            markup_percent: rust_decimal::Decimal::from_f64_retain(
4148                self.config.intercompany.markup_percent,
4149            )
4150            .unwrap_or(rust_decimal::Decimal::from(5)),
4151            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4152            default_currency: ic_currency,
4153            ..Default::default()
4154        };
4155
4156        // Create IC generator
4157        let mut ic_generator = datasynth_generators::ICGenerator::new(
4158            ic_gen_config,
4159            ownership_structure.clone(),
4160            seed + 50,
4161        );
4162
4163        // Generate IC transactions for the period
4164        // Use ~3 transactions per day as a reasonable default
4165        let transactions_per_day = 3;
4166        let matched_pairs = ic_generator.generate_transactions_for_period(
4167            start_date,
4168            end_date,
4169            transactions_per_day,
4170        );
4171
4172        // Generate journal entries from matched pairs
4173        let mut seller_entries = Vec::new();
4174        let mut buyer_entries = Vec::new();
4175        let fiscal_year = start_date.year();
4176
4177        for pair in &matched_pairs {
4178            let fiscal_period = pair.posting_date.month();
4179            let (seller_je, buyer_je) =
4180                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4181            seller_entries.push(seller_je);
4182            buyer_entries.push(buyer_je);
4183        }
4184
4185        // Run matching engine
4186        let matching_config = datasynth_generators::ICMatchingConfig {
4187            base_currency: self
4188                .config
4189                .companies
4190                .first()
4191                .map(|c| c.currency.clone())
4192                .unwrap_or_else(|| "USD".to_string()),
4193            ..Default::default()
4194        };
4195        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4196        matching_engine.load_matched_pairs(&matched_pairs);
4197        let matching_result = matching_engine.run_matching(end_date);
4198
4199        // Generate elimination entries if configured
4200        let mut elimination_entries = Vec::new();
4201        if self.config.intercompany.generate_eliminations {
4202            let elim_config = datasynth_generators::EliminationConfig {
4203                consolidation_entity: "GROUP".to_string(),
4204                base_currency: self
4205                    .config
4206                    .companies
4207                    .first()
4208                    .map(|c| c.currency.clone())
4209                    .unwrap_or_else(|| "USD".to_string()),
4210                ..Default::default()
4211            };
4212
4213            let mut elim_generator =
4214                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4215
4216            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4217            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4218                matching_result
4219                    .matched_balances
4220                    .iter()
4221                    .chain(matching_result.unmatched_balances.iter())
4222                    .cloned()
4223                    .collect();
4224
4225            // Build investment and equity maps from the group structure so that the
4226            // elimination generator can produce equity-investment elimination entries
4227            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4228            //
4229            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4230            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4231            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4232            //
4233            // Net assets are derived from the journal entries using account-range heuristics:
4234            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4235            // no JE data is available (IC phase runs early in the generation pipeline).
4236            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4237                std::collections::HashMap::new();
4238            let mut equity_amounts: std::collections::HashMap<
4239                String,
4240                std::collections::HashMap<String, rust_decimal::Decimal>,
4241            > = std::collections::HashMap::new();
4242            {
4243                use rust_decimal::Decimal;
4244                let hundred = Decimal::from(100u32);
4245                let ten_pct = Decimal::new(10, 2); // 0.10
4246                let thirty_pct = Decimal::new(30, 2); // 0.30
4247                let sixty_pct = Decimal::new(60, 2); // 0.60
4248                let parent_code = &group_structure.parent_entity;
4249                for sub in &group_structure.subsidiaries {
4250                    let net_assets = {
4251                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4252                        if na > Decimal::ZERO {
4253                            na
4254                        } else {
4255                            Decimal::from(1_000_000u64)
4256                        }
4257                    };
4258                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4259                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4260                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4261
4262                    // Split subsidiary equity into conventional components:
4263                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4264                    let mut eq_map = std::collections::HashMap::new();
4265                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4266                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4267                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4268                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4269                }
4270            }
4271
4272            let journal = elim_generator.generate_eliminations(
4273                &fiscal_period,
4274                end_date,
4275                &all_balances,
4276                &matched_pairs,
4277                &investment_amounts,
4278                &equity_amounts,
4279            );
4280
4281            elimination_entries = journal.entries.clone();
4282        }
4283
4284        let matched_pair_count = matched_pairs.len();
4285        let elimination_entry_count = elimination_entries.len();
4286        let match_rate = matching_result.match_rate;
4287
4288        stats.ic_matched_pair_count = matched_pair_count;
4289        stats.ic_elimination_count = elimination_entry_count;
4290        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4291
4292        info!(
4293            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4294            matched_pair_count,
4295            stats.ic_transaction_count,
4296            seller_entries.len(),
4297            buyer_entries.len(),
4298            elimination_entry_count,
4299            match_rate * 100.0
4300        );
4301        self.check_resources_with_log("post-intercompany")?;
4302
4303        // ----------------------------------------------------------------
4304        // NCI measurements: derive from group structure ownership percentages
4305        // ----------------------------------------------------------------
4306        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4307            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4308            use rust_decimal::Decimal;
4309
4310            let eight_pct = Decimal::new(8, 2); // 0.08
4311
4312            group_structure
4313                .subsidiaries
4314                .iter()
4315                .filter(|sub| {
4316                    sub.nci_percentage > Decimal::ZERO
4317                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4318                })
4319                .map(|sub| {
4320                    // Compute net assets from actual journal entries for this subsidiary.
4321                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4322                    // IC phase runs before the main JE batch has been populated).
4323                    let net_assets_from_jes =
4324                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4325
4326                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4327                        net_assets_from_jes.round_dp(2)
4328                    } else {
4329                        // Fallback: use a plausible base amount
4330                        Decimal::from(1_000_000u64)
4331                    };
4332
4333                    // Net income approximated as 8% of net assets
4334                    let net_income = (net_assets * eight_pct).round_dp(2);
4335
4336                    NciMeasurement::compute(
4337                        sub.entity_code.clone(),
4338                        sub.nci_percentage,
4339                        net_assets,
4340                        net_income,
4341                    )
4342                })
4343                .collect()
4344        };
4345
4346        if !nci_measurements.is_empty() {
4347            info!(
4348                "NCI measurements: {} subsidiaries with non-controlling interests",
4349                nci_measurements.len()
4350            );
4351        }
4352
4353        Ok(IntercompanySnapshot {
4354            group_structure: Some(group_structure),
4355            matched_pairs,
4356            seller_journal_entries: seller_entries,
4357            buyer_journal_entries: buyer_entries,
4358            elimination_entries,
4359            nci_measurements,
4360            matched_pair_count,
4361            elimination_entry_count,
4362            match_rate,
4363        })
4364    }
4365
4366    /// Phase 15: Generate bank reconciliations and financial statements.
4367    fn phase_financial_reporting(
4368        &mut self,
4369        document_flows: &DocumentFlowSnapshot,
4370        journal_entries: &[JournalEntry],
4371        coa: &Arc<ChartOfAccounts>,
4372        _hr: &HrSnapshot,
4373        _audit: &AuditSnapshot,
4374        stats: &mut EnhancedGenerationStatistics,
4375    ) -> SynthResult<FinancialReportingSnapshot> {
4376        let fs_enabled = self.phase_config.generate_financial_statements
4377            || self.config.financial_reporting.enabled;
4378        let br_enabled = self.phase_config.generate_bank_reconciliation;
4379
4380        if !fs_enabled && !br_enabled {
4381            debug!("Phase 15: Skipped (financial reporting disabled)");
4382            return Ok(FinancialReportingSnapshot::default());
4383        }
4384
4385        info!("Phase 15: Generating Financial Reporting Data");
4386
4387        let seed = self.seed;
4388        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4389            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4390
4391        let mut financial_statements = Vec::new();
4392        let mut bank_reconciliations = Vec::new();
4393        let mut trial_balances = Vec::new();
4394        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4395        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4396            Vec::new();
4397        // Standalone statements keyed by entity code
4398        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4399            std::collections::HashMap::new();
4400        // Consolidated statements (one per period)
4401        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4402        // Consolidation schedules (one per period)
4403        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4404
4405        // Generate financial statements from JE-derived trial balances.
4406        //
4407        // When journal entries are available, we use cumulative trial balances for
4408        // balance sheet accounts and current-period trial balances for income
4409        // statement accounts. We also track prior-period trial balances so the
4410        // generator can produce comparative amounts, and we build a proper
4411        // cash flow statement from working capital changes rather than random data.
4412        if fs_enabled {
4413            let has_journal_entries = !journal_entries.is_empty();
4414
4415            // Use FinancialStatementGenerator for balance sheet and income statement,
4416            // but build cash flow ourselves from TB data when JEs are available.
4417            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4418            // Separate generator for consolidated statements (different seed offset)
4419            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4420
4421            // Collect elimination JEs once (reused across periods)
4422            let elimination_entries: Vec<&JournalEntry> = journal_entries
4423                .iter()
4424                .filter(|je| je.header.is_elimination)
4425                .collect();
4426
4427            // Generate one set of statements per period, per entity
4428            for period in 0..self.config.global.period_months {
4429                let period_start = start_date + chrono::Months::new(period);
4430                let period_end =
4431                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4432                let fiscal_year = period_end.year() as u16;
4433                let fiscal_period = period_end.month() as u8;
4434                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4435
4436                // Build per-entity trial balances for this period (non-elimination JEs)
4437                // We accumulate them for the consolidation step.
4438                let mut entity_tb_map: std::collections::HashMap<
4439                    String,
4440                    std::collections::HashMap<String, rust_decimal::Decimal>,
4441                > = std::collections::HashMap::new();
4442
4443                // --- Standalone: one set of statements per company ---
4444                for (company_idx, company) in self.config.companies.iter().enumerate() {
4445                    let company_code = company.code.as_str();
4446                    let currency = company.currency.as_str();
4447                    // Use a unique seed offset per company to keep statements deterministic
4448                    // and distinct across companies
4449                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4450                    let mut company_fs_gen =
4451                        FinancialStatementGenerator::new(seed + company_seed_offset);
4452
4453                    if has_journal_entries {
4454                        let tb_entries = Self::build_cumulative_trial_balance(
4455                            journal_entries,
4456                            coa,
4457                            company_code,
4458                            start_date,
4459                            period_end,
4460                            fiscal_year,
4461                            fiscal_period,
4462                        );
4463
4464                        // Accumulate per-entity category balances for consolidation
4465                        let entity_cat_map =
4466                            entity_tb_map.entry(company_code.to_string()).or_default();
4467                        for tb_entry in &tb_entries {
4468                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4469                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4470                        }
4471
4472                        let stmts = company_fs_gen.generate(
4473                            company_code,
4474                            currency,
4475                            &tb_entries,
4476                            period_start,
4477                            period_end,
4478                            fiscal_year,
4479                            fiscal_period,
4480                            None,
4481                            "SYS-AUTOCLOSE",
4482                        );
4483
4484                        let mut entity_stmts = Vec::new();
4485                        for stmt in stmts {
4486                            if stmt.statement_type == StatementType::CashFlowStatement {
4487                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4488                                let cf_items = Self::build_cash_flow_from_trial_balances(
4489                                    &tb_entries,
4490                                    None,
4491                                    net_income,
4492                                );
4493                                entity_stmts.push(FinancialStatement {
4494                                    cash_flow_items: cf_items,
4495                                    ..stmt
4496                                });
4497                            } else {
4498                                entity_stmts.push(stmt);
4499                            }
4500                        }
4501
4502                        // Add to the flat financial_statements list (used by KPI/budget)
4503                        financial_statements.extend(entity_stmts.clone());
4504
4505                        // Store standalone per-entity
4506                        standalone_statements
4507                            .entry(company_code.to_string())
4508                            .or_default()
4509                            .extend(entity_stmts);
4510
4511                        // Only store trial balance for the first company in the period
4512                        // to avoid duplicates in the trial_balances list
4513                        if company_idx == 0 {
4514                            trial_balances.push(PeriodTrialBalance {
4515                                fiscal_year,
4516                                fiscal_period,
4517                                period_start,
4518                                period_end,
4519                                entries: tb_entries,
4520                            });
4521                        }
4522                    } else {
4523                        // Fallback: no JEs available
4524                        let tb_entries = Self::build_trial_balance_from_entries(
4525                            journal_entries,
4526                            coa,
4527                            company_code,
4528                            fiscal_year,
4529                            fiscal_period,
4530                        );
4531
4532                        let stmts = company_fs_gen.generate(
4533                            company_code,
4534                            currency,
4535                            &tb_entries,
4536                            period_start,
4537                            period_end,
4538                            fiscal_year,
4539                            fiscal_period,
4540                            None,
4541                            "SYS-AUTOCLOSE",
4542                        );
4543                        financial_statements.extend(stmts.clone());
4544                        standalone_statements
4545                            .entry(company_code.to_string())
4546                            .or_default()
4547                            .extend(stmts);
4548
4549                        if company_idx == 0 && !tb_entries.is_empty() {
4550                            trial_balances.push(PeriodTrialBalance {
4551                                fiscal_year,
4552                                fiscal_period,
4553                                period_start,
4554                                period_end,
4555                                entries: tb_entries,
4556                            });
4557                        }
4558                    }
4559                }
4560
4561                // --- Consolidated: aggregate all entities + apply eliminations ---
4562                // Use the primary (first) company's currency for the consolidated statement
4563                let group_currency = self
4564                    .config
4565                    .companies
4566                    .first()
4567                    .map(|c| c.currency.as_str())
4568                    .unwrap_or("USD");
4569
4570                // Build owned elimination entries for this period
4571                let period_eliminations: Vec<JournalEntry> = elimination_entries
4572                    .iter()
4573                    .filter(|je| {
4574                        je.header.fiscal_year == fiscal_year
4575                            && je.header.fiscal_period == fiscal_period
4576                    })
4577                    .map(|je| (*je).clone())
4578                    .collect();
4579
4580                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4581                    &entity_tb_map,
4582                    &period_eliminations,
4583                    &period_label,
4584                );
4585
4586                // Build a pseudo trial balance from consolidated line items for the
4587                // FinancialStatementGenerator to use (only for cash flow direction).
4588                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4589                    .line_items
4590                    .iter()
4591                    .map(|li| {
4592                        let net = li.post_elimination_total;
4593                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4594                            (net, rust_decimal::Decimal::ZERO)
4595                        } else {
4596                            (rust_decimal::Decimal::ZERO, -net)
4597                        };
4598                        datasynth_generators::TrialBalanceEntry {
4599                            account_code: li.account_category.clone(),
4600                            account_name: li.account_category.clone(),
4601                            category: li.account_category.clone(),
4602                            debit_balance: debit,
4603                            credit_balance: credit,
4604                        }
4605                    })
4606                    .collect();
4607
4608                let mut cons_stmts = cons_gen.generate(
4609                    "GROUP",
4610                    group_currency,
4611                    &cons_tb,
4612                    period_start,
4613                    period_end,
4614                    fiscal_year,
4615                    fiscal_period,
4616                    None,
4617                    "SYS-AUTOCLOSE",
4618                );
4619
4620                // Split consolidated line items by statement type.
4621                // The consolidation generator returns BS items first, then IS items,
4622                // identified by their CONS- prefix and category.
4623                let bs_categories: &[&str] = &[
4624                    "CASH",
4625                    "RECEIVABLES",
4626                    "INVENTORY",
4627                    "FIXEDASSETS",
4628                    "PAYABLES",
4629                    "ACCRUEDLIABILITIES",
4630                    "LONGTERMDEBT",
4631                    "EQUITY",
4632                ];
4633                let (bs_items, is_items): (Vec<_>, Vec<_>) =
4634                    cons_line_items.into_iter().partition(|li| {
4635                        let upper = li.label.to_uppercase();
4636                        bs_categories.iter().any(|c| upper == *c)
4637                    });
4638
4639                for stmt in &mut cons_stmts {
4640                    stmt.is_consolidated = true;
4641                    match stmt.statement_type {
4642                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4643                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4644                        _ => {} // CF and equity change statements keep generator output
4645                    }
4646                }
4647
4648                consolidated_statements.extend(cons_stmts);
4649                consolidation_schedules.push(schedule);
4650            }
4651
4652            // Backward compat: if only 1 company, use existing code path logic
4653            // (prior_cumulative_tb for comparative amounts). Already handled above;
4654            // the prior_ref is omitted to keep this change minimal.
4655            let _ = &mut fs_gen; // suppress unused warning
4656
4657            stats.financial_statement_count = financial_statements.len();
4658            info!(
4659                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4660                stats.financial_statement_count,
4661                consolidated_statements.len(),
4662                has_journal_entries
4663            );
4664
4665            // ----------------------------------------------------------------
4666            // IFRS 8 / ASC 280: Operating Segment Reporting
4667            // ----------------------------------------------------------------
4668            // Build entity seeds from the company configuration.
4669            let entity_seeds: Vec<SegmentSeed> = self
4670                .config
4671                .companies
4672                .iter()
4673                .map(|c| SegmentSeed {
4674                    code: c.code.clone(),
4675                    name: c.name.clone(),
4676                    currency: c.currency.clone(),
4677                })
4678                .collect();
4679
4680            let mut seg_gen = SegmentGenerator::new(seed + 30);
4681
4682            // Generate one set of segment reports per period.
4683            // We extract consolidated revenue / profit / assets from the consolidated
4684            // financial statements produced above, falling back to simple sums when
4685            // no consolidated statements were generated (single-entity path).
4686            for period in 0..self.config.global.period_months {
4687                let period_end =
4688                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4689                let fiscal_year = period_end.year() as u16;
4690                let fiscal_period = period_end.month() as u8;
4691                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4692
4693                use datasynth_core::models::StatementType;
4694
4695                // Try to find consolidated income statement for this period
4696                let cons_is = consolidated_statements.iter().find(|s| {
4697                    s.fiscal_year == fiscal_year
4698                        && s.fiscal_period == fiscal_period
4699                        && s.statement_type == StatementType::IncomeStatement
4700                });
4701                let cons_bs = consolidated_statements.iter().find(|s| {
4702                    s.fiscal_year == fiscal_year
4703                        && s.fiscal_period == fiscal_period
4704                        && s.statement_type == StatementType::BalanceSheet
4705                });
4706
4707                // If consolidated statements not available fall back to the flat list
4708                let is_stmt = cons_is.or_else(|| {
4709                    financial_statements.iter().find(|s| {
4710                        s.fiscal_year == fiscal_year
4711                            && s.fiscal_period == fiscal_period
4712                            && s.statement_type == StatementType::IncomeStatement
4713                    })
4714                });
4715                let bs_stmt = cons_bs.or_else(|| {
4716                    financial_statements.iter().find(|s| {
4717                        s.fiscal_year == fiscal_year
4718                            && s.fiscal_period == fiscal_period
4719                            && s.statement_type == StatementType::BalanceSheet
4720                    })
4721                });
4722
4723                let consolidated_revenue = is_stmt
4724                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4725                    .map(|li| -li.amount) // revenue is stored as negative in IS
4726                    .unwrap_or(rust_decimal::Decimal::ZERO);
4727
4728                let consolidated_profit = is_stmt
4729                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4730                    .map(|li| li.amount)
4731                    .unwrap_or(rust_decimal::Decimal::ZERO);
4732
4733                let consolidated_assets = bs_stmt
4734                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4735                    .map(|li| li.amount)
4736                    .unwrap_or(rust_decimal::Decimal::ZERO);
4737
4738                // Skip periods where we have no financial data
4739                if consolidated_revenue == rust_decimal::Decimal::ZERO
4740                    && consolidated_assets == rust_decimal::Decimal::ZERO
4741                {
4742                    continue;
4743                }
4744
4745                let group_code = self
4746                    .config
4747                    .companies
4748                    .first()
4749                    .map(|c| c.code.as_str())
4750                    .unwrap_or("GROUP");
4751
4752                // Compute period depreciation from JEs with document type "CL" hitting account
4753                // 6000 (depreciation expense).  These are generated by phase_period_close.
4754                let total_depr: rust_decimal::Decimal = journal_entries
4755                    .iter()
4756                    .filter(|je| je.header.document_type == "CL")
4757                    .flat_map(|je| je.lines.iter())
4758                    .filter(|l| l.gl_account.starts_with("6000"))
4759                    .map(|l| l.debit_amount)
4760                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
4761                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
4762                    Some(total_depr)
4763                } else {
4764                    None
4765                };
4766
4767                let (segs, recon) = seg_gen.generate(
4768                    group_code,
4769                    &period_label,
4770                    consolidated_revenue,
4771                    consolidated_profit,
4772                    consolidated_assets,
4773                    &entity_seeds,
4774                    depr_param,
4775                );
4776                segment_reports.extend(segs);
4777                segment_reconciliations.push(recon);
4778            }
4779
4780            info!(
4781                "Segment reports generated: {} segments, {} reconciliations",
4782                segment_reports.len(),
4783                segment_reconciliations.len()
4784            );
4785        }
4786
4787        // Generate bank reconciliations from payment data
4788        if br_enabled && !document_flows.payments.is_empty() {
4789            let employee_ids: Vec<String> = self
4790                .master_data
4791                .employees
4792                .iter()
4793                .map(|e| e.employee_id.clone())
4794                .collect();
4795            let mut br_gen =
4796                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4797
4798            // Group payments by company code and period
4799            for company in &self.config.companies {
4800                let company_payments: Vec<PaymentReference> = document_flows
4801                    .payments
4802                    .iter()
4803                    .filter(|p| p.header.company_code == company.code)
4804                    .map(|p| PaymentReference {
4805                        id: p.header.document_id.clone(),
4806                        amount: if p.is_vendor { p.amount } else { -p.amount },
4807                        date: p.header.document_date,
4808                        reference: p
4809                            .check_number
4810                            .clone()
4811                            .or_else(|| p.wire_reference.clone())
4812                            .unwrap_or_else(|| p.header.document_id.clone()),
4813                    })
4814                    .collect();
4815
4816                if company_payments.is_empty() {
4817                    continue;
4818                }
4819
4820                let bank_account_id = format!("{}-MAIN", company.code);
4821
4822                // Generate one reconciliation per period
4823                for period in 0..self.config.global.period_months {
4824                    let period_start = start_date + chrono::Months::new(period);
4825                    let period_end =
4826                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4827
4828                    let period_payments: Vec<PaymentReference> = company_payments
4829                        .iter()
4830                        .filter(|p| p.date >= period_start && p.date <= period_end)
4831                        .cloned()
4832                        .collect();
4833
4834                    let recon = br_gen.generate(
4835                        &company.code,
4836                        &bank_account_id,
4837                        period_start,
4838                        period_end,
4839                        &company.currency,
4840                        &period_payments,
4841                    );
4842                    bank_reconciliations.push(recon);
4843                }
4844            }
4845            info!(
4846                "Bank reconciliations generated: {} reconciliations",
4847                bank_reconciliations.len()
4848            );
4849        }
4850
4851        stats.bank_reconciliation_count = bank_reconciliations.len();
4852        self.check_resources_with_log("post-financial-reporting")?;
4853
4854        if !trial_balances.is_empty() {
4855            info!(
4856                "Period-close trial balances captured: {} periods",
4857                trial_balances.len()
4858            );
4859        }
4860
4861        // Notes to financial statements are generated in a separate post-processing step
4862        // (generate_notes_to_financial_statements) called after accounting_standards and tax
4863        // phases have completed, so that deferred tax and provision data can be wired in.
4864        let notes_to_financial_statements = Vec::new();
4865
4866        Ok(FinancialReportingSnapshot {
4867            financial_statements,
4868            standalone_statements,
4869            consolidated_statements,
4870            consolidation_schedules,
4871            bank_reconciliations,
4872            trial_balances,
4873            segment_reports,
4874            segment_reconciliations,
4875            notes_to_financial_statements,
4876        })
4877    }
4878
4879    /// Populate notes to financial statements using fully-resolved snapshots.
4880    ///
4881    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
4882    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
4883    /// can be wired into the notes context.  The method mutates
4884    /// `financial_reporting.notes_to_financial_statements` in-place.
4885    fn generate_notes_to_financial_statements(
4886        &self,
4887        financial_reporting: &mut FinancialReportingSnapshot,
4888        accounting_standards: &AccountingStandardsSnapshot,
4889        tax: &TaxSnapshot,
4890        hr: &HrSnapshot,
4891        audit: &AuditSnapshot,
4892    ) {
4893        use datasynth_config::schema::AccountingFrameworkConfig;
4894        use datasynth_core::models::StatementType;
4895        use datasynth_generators::period_close::notes_generator::{
4896            NotesGenerator, NotesGeneratorContext,
4897        };
4898
4899        let seed = self.seed;
4900        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4901        {
4902            Ok(d) => d,
4903            Err(_) => return,
4904        };
4905
4906        let mut notes_gen = NotesGenerator::new(seed + 4235);
4907
4908        for company in &self.config.companies {
4909            let last_period_end = start_date
4910                + chrono::Months::new(self.config.global.period_months)
4911                - chrono::Days::new(1);
4912            let fiscal_year = last_period_end.year() as u16;
4913
4914            // Extract relevant amounts from the already-generated financial statements
4915            let entity_is = financial_reporting
4916                .standalone_statements
4917                .get(&company.code)
4918                .and_then(|stmts| {
4919                    stmts.iter().find(|s| {
4920                        s.fiscal_year == fiscal_year
4921                            && s.statement_type == StatementType::IncomeStatement
4922                    })
4923                });
4924            let entity_bs = financial_reporting
4925                .standalone_statements
4926                .get(&company.code)
4927                .and_then(|stmts| {
4928                    stmts.iter().find(|s| {
4929                        s.fiscal_year == fiscal_year
4930                            && s.statement_type == StatementType::BalanceSheet
4931                    })
4932                });
4933
4934            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
4935            let revenue_amount = entity_is
4936                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4937                .map(|li| li.amount);
4938            let ppe_gross = entity_bs
4939                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4940                .map(|li| li.amount);
4941
4942            let framework = match self
4943                .config
4944                .accounting_standards
4945                .framework
4946                .unwrap_or_default()
4947            {
4948                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4949                    "IFRS".to_string()
4950                }
4951                _ => "US GAAP".to_string(),
4952            };
4953
4954            // ---- Deferred tax (IAS 12 / ASC 740) ----
4955            // Sum closing DTA and DTL from rollforward entries for this entity.
4956            let (entity_dta, entity_dtl) = {
4957                let mut dta = rust_decimal::Decimal::ZERO;
4958                let mut dtl = rust_decimal::Decimal::ZERO;
4959                for rf in &tax.deferred_tax.rollforwards {
4960                    if rf.entity_code == company.code {
4961                        dta += rf.closing_dta;
4962                        dtl += rf.closing_dtl;
4963                    }
4964                }
4965                (
4966                    if dta > rust_decimal::Decimal::ZERO {
4967                        Some(dta)
4968                    } else {
4969                        None
4970                    },
4971                    if dtl > rust_decimal::Decimal::ZERO {
4972                        Some(dtl)
4973                    } else {
4974                        None
4975                    },
4976                )
4977            };
4978
4979            // ---- Provisions (IAS 37 / ASC 450) ----
4980            // Filter provisions to this entity; sum best_estimate amounts.
4981            let entity_provisions: Vec<_> = accounting_standards
4982                .provisions
4983                .iter()
4984                .filter(|p| p.entity_code == company.code)
4985                .collect();
4986            let provision_count = entity_provisions.len();
4987            let total_provisions = if provision_count > 0 {
4988                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4989            } else {
4990                None
4991            };
4992
4993            // ---- Pension data from HR snapshot ----
4994            let entity_pension_plan_count = hr
4995                .pension_plans
4996                .iter()
4997                .filter(|p| p.entity_code == company.code)
4998                .count();
4999            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5000                let sum: rust_decimal::Decimal = hr
5001                    .pension_disclosures
5002                    .iter()
5003                    .filter(|d| {
5004                        hr.pension_plans
5005                            .iter()
5006                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5007                    })
5008                    .map(|d| d.net_pension_liability)
5009                    .sum();
5010                let plan_assets_sum: rust_decimal::Decimal = hr
5011                    .pension_plan_assets
5012                    .iter()
5013                    .filter(|a| {
5014                        hr.pension_plans
5015                            .iter()
5016                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5017                    })
5018                    .map(|a| a.fair_value_closing)
5019                    .sum();
5020                if entity_pension_plan_count > 0 {
5021                    Some(sum + plan_assets_sum)
5022                } else {
5023                    None
5024                }
5025            };
5026            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5027                let sum: rust_decimal::Decimal = hr
5028                    .pension_plan_assets
5029                    .iter()
5030                    .filter(|a| {
5031                        hr.pension_plans
5032                            .iter()
5033                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5034                    })
5035                    .map(|a| a.fair_value_closing)
5036                    .sum();
5037                if entity_pension_plan_count > 0 {
5038                    Some(sum)
5039                } else {
5040                    None
5041                }
5042            };
5043
5044            // ---- Audit data: related parties + subsequent events ----
5045            // Audit snapshot covers all entities; use total counts (common case = single entity).
5046            let rp_count = audit.related_party_transactions.len();
5047            let se_count = audit.subsequent_events.len();
5048            let adjusting_count = audit
5049                .subsequent_events
5050                .iter()
5051                .filter(|e| {
5052                    matches!(
5053                        e.classification,
5054                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5055                    )
5056                })
5057                .count();
5058
5059            let ctx = NotesGeneratorContext {
5060                entity_code: company.code.clone(),
5061                framework,
5062                period: format!("FY{}", fiscal_year),
5063                period_end: last_period_end,
5064                currency: company.currency.clone(),
5065                revenue_amount,
5066                total_ppe_gross: ppe_gross,
5067                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5068                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5069                deferred_tax_asset: entity_dta,
5070                deferred_tax_liability: entity_dtl,
5071                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5072                provision_count,
5073                total_provisions,
5074                // Pension data from HR snapshot
5075                pension_plan_count: entity_pension_plan_count,
5076                total_dbo: entity_total_dbo,
5077                total_plan_assets: entity_total_plan_assets,
5078                // Audit data
5079                related_party_transaction_count: rp_count,
5080                subsequent_event_count: se_count,
5081                adjusting_event_count: adjusting_count,
5082                ..NotesGeneratorContext::default()
5083            };
5084
5085            let entity_notes = notes_gen.generate(&ctx);
5086            info!(
5087                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5088                company.code,
5089                entity_notes.len(),
5090                entity_dta,
5091                entity_dtl,
5092                provision_count,
5093            );
5094            financial_reporting
5095                .notes_to_financial_statements
5096                .extend(entity_notes);
5097        }
5098    }
5099
5100    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5101    ///
5102    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5103    /// posted in the journal entries flows through to the trial balance, using the real
5104    /// GL account numbers from the CoA.
5105    fn build_trial_balance_from_entries(
5106        journal_entries: &[JournalEntry],
5107        coa: &ChartOfAccounts,
5108        company_code: &str,
5109        fiscal_year: u16,
5110        fiscal_period: u8,
5111    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5112        use rust_decimal::Decimal;
5113
5114        // Accumulate total debits and credits per GL account
5115        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5116        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5117
5118        for je in journal_entries {
5119            // Filter to matching company, fiscal year, and period
5120            if je.header.company_code != company_code
5121                || je.header.fiscal_year != fiscal_year
5122                || je.header.fiscal_period != fiscal_period
5123            {
5124                continue;
5125            }
5126
5127            for line in &je.lines {
5128                let acct = &line.gl_account;
5129                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5130                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5131            }
5132        }
5133
5134        // Build a TrialBalanceEntry for each account that had activity
5135        let mut all_accounts: Vec<&String> = account_debits
5136            .keys()
5137            .chain(account_credits.keys())
5138            .collect::<std::collections::HashSet<_>>()
5139            .into_iter()
5140            .collect();
5141        all_accounts.sort();
5142
5143        let mut entries = Vec::new();
5144
5145        for acct_number in all_accounts {
5146            let debit = account_debits
5147                .get(acct_number)
5148                .copied()
5149                .unwrap_or(Decimal::ZERO);
5150            let credit = account_credits
5151                .get(acct_number)
5152                .copied()
5153                .unwrap_or(Decimal::ZERO);
5154
5155            if debit.is_zero() && credit.is_zero() {
5156                continue;
5157            }
5158
5159            // Look up account name from CoA, fall back to "Account {code}"
5160            let account_name = coa
5161                .get_account(acct_number)
5162                .map(|gl| gl.short_description.clone())
5163                .unwrap_or_else(|| format!("Account {acct_number}"));
5164
5165            // Map account code prefix to the category strings expected by
5166            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5167            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5168            // OperatingExpenses).
5169            let category = Self::category_from_account_code(acct_number);
5170
5171            entries.push(datasynth_generators::TrialBalanceEntry {
5172                account_code: acct_number.clone(),
5173                account_name,
5174                category,
5175                debit_balance: debit,
5176                credit_balance: credit,
5177            });
5178        }
5179
5180        entries
5181    }
5182
5183    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5184    /// (and including) the given period end date.
5185    ///
5186    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5187    /// while income statement accounts (revenue, expenses) show only the current period.
5188    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5189    fn build_cumulative_trial_balance(
5190        journal_entries: &[JournalEntry],
5191        coa: &ChartOfAccounts,
5192        company_code: &str,
5193        start_date: NaiveDate,
5194        period_end: NaiveDate,
5195        fiscal_year: u16,
5196        fiscal_period: u8,
5197    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5198        use rust_decimal::Decimal;
5199
5200        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5201        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5202        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5203
5204        // Accumulate debits/credits for income statement accounts (current period only)
5205        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5206        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5207
5208        for je in journal_entries {
5209            if je.header.company_code != company_code {
5210                continue;
5211            }
5212
5213            for line in &je.lines {
5214                let acct = &line.gl_account;
5215                let category = Self::category_from_account_code(acct);
5216                let is_bs_account = matches!(
5217                    category.as_str(),
5218                    "Cash"
5219                        | "Receivables"
5220                        | "Inventory"
5221                        | "FixedAssets"
5222                        | "Payables"
5223                        | "AccruedLiabilities"
5224                        | "LongTermDebt"
5225                        | "Equity"
5226                );
5227
5228                if is_bs_account {
5229                    // Balance sheet: accumulate from start through period_end
5230                    if je.header.document_date <= period_end
5231                        && je.header.document_date >= start_date
5232                    {
5233                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5234                            line.debit_amount;
5235                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5236                            line.credit_amount;
5237                    }
5238                } else {
5239                    // Income statement: current period only
5240                    if je.header.fiscal_year == fiscal_year
5241                        && je.header.fiscal_period == fiscal_period
5242                    {
5243                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5244                            line.debit_amount;
5245                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5246                            line.credit_amount;
5247                    }
5248                }
5249            }
5250        }
5251
5252        // Merge all accounts
5253        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5254        all_accounts.extend(bs_debits.keys().cloned());
5255        all_accounts.extend(bs_credits.keys().cloned());
5256        all_accounts.extend(is_debits.keys().cloned());
5257        all_accounts.extend(is_credits.keys().cloned());
5258
5259        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5260        sorted_accounts.sort();
5261
5262        let mut entries = Vec::new();
5263
5264        for acct_number in &sorted_accounts {
5265            let category = Self::category_from_account_code(acct_number);
5266            let is_bs_account = matches!(
5267                category.as_str(),
5268                "Cash"
5269                    | "Receivables"
5270                    | "Inventory"
5271                    | "FixedAssets"
5272                    | "Payables"
5273                    | "AccruedLiabilities"
5274                    | "LongTermDebt"
5275                    | "Equity"
5276            );
5277
5278            let (debit, credit) = if is_bs_account {
5279                (
5280                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5281                    bs_credits
5282                        .get(acct_number)
5283                        .copied()
5284                        .unwrap_or(Decimal::ZERO),
5285                )
5286            } else {
5287                (
5288                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5289                    is_credits
5290                        .get(acct_number)
5291                        .copied()
5292                        .unwrap_or(Decimal::ZERO),
5293                )
5294            };
5295
5296            if debit.is_zero() && credit.is_zero() {
5297                continue;
5298            }
5299
5300            let account_name = coa
5301                .get_account(acct_number)
5302                .map(|gl| gl.short_description.clone())
5303                .unwrap_or_else(|| format!("Account {acct_number}"));
5304
5305            entries.push(datasynth_generators::TrialBalanceEntry {
5306                account_code: acct_number.clone(),
5307                account_name,
5308                category,
5309                debit_balance: debit,
5310                credit_balance: credit,
5311            });
5312        }
5313
5314        entries
5315    }
5316
5317    /// Build a JE-derived cash flow statement using the indirect method.
5318    ///
5319    /// Compares current and prior cumulative trial balances to derive working capital
5320    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5321    fn build_cash_flow_from_trial_balances(
5322        current_tb: &[datasynth_generators::TrialBalanceEntry],
5323        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5324        net_income: rust_decimal::Decimal,
5325    ) -> Vec<CashFlowItem> {
5326        use rust_decimal::Decimal;
5327
5328        // Helper: aggregate a TB by category and return net (debit - credit)
5329        let aggregate =
5330            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5331                let mut map: HashMap<String, Decimal> = HashMap::new();
5332                for entry in tb {
5333                    let net = entry.debit_balance - entry.credit_balance;
5334                    *map.entry(entry.category.clone()).or_default() += net;
5335                }
5336                map
5337            };
5338
5339        let current = aggregate(current_tb);
5340        let prior = prior_tb.map(aggregate);
5341
5342        // Get balance for a category, defaulting to zero
5343        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5344            *map.get(key).unwrap_or(&Decimal::ZERO)
5345        };
5346
5347        // Compute change: current - prior (or current if no prior)
5348        let change = |key: &str| -> Decimal {
5349            let curr = get(&current, key);
5350            match &prior {
5351                Some(p) => curr - get(p, key),
5352                None => curr,
5353            }
5354        };
5355
5356        // Operating activities (indirect method)
5357        // Depreciation add-back: approximate from FixedAssets decrease
5358        let fixed_asset_change = change("FixedAssets");
5359        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5360            -fixed_asset_change
5361        } else {
5362            Decimal::ZERO
5363        };
5364
5365        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5366        let ar_change = change("Receivables");
5367        let inventory_change = change("Inventory");
5368        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5369        let ap_change = change("Payables");
5370        let accrued_change = change("AccruedLiabilities");
5371
5372        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5373            + (-ap_change)
5374            + (-accrued_change);
5375
5376        // Investing activities
5377        let capex = if fixed_asset_change > Decimal::ZERO {
5378            -fixed_asset_change
5379        } else {
5380            Decimal::ZERO
5381        };
5382        let investing_cf = capex;
5383
5384        // Financing activities
5385        let debt_change = -change("LongTermDebt");
5386        let equity_change = -change("Equity");
5387        let financing_cf = debt_change + equity_change;
5388
5389        let net_change = operating_cf + investing_cf + financing_cf;
5390
5391        vec![
5392            CashFlowItem {
5393                item_code: "CF-NI".to_string(),
5394                label: "Net Income".to_string(),
5395                category: CashFlowCategory::Operating,
5396                amount: net_income,
5397                amount_prior: None,
5398                sort_order: 1,
5399                is_total: false,
5400            },
5401            CashFlowItem {
5402                item_code: "CF-DEP".to_string(),
5403                label: "Depreciation & Amortization".to_string(),
5404                category: CashFlowCategory::Operating,
5405                amount: depreciation_addback,
5406                amount_prior: None,
5407                sort_order: 2,
5408                is_total: false,
5409            },
5410            CashFlowItem {
5411                item_code: "CF-AR".to_string(),
5412                label: "Change in Accounts Receivable".to_string(),
5413                category: CashFlowCategory::Operating,
5414                amount: -ar_change,
5415                amount_prior: None,
5416                sort_order: 3,
5417                is_total: false,
5418            },
5419            CashFlowItem {
5420                item_code: "CF-AP".to_string(),
5421                label: "Change in Accounts Payable".to_string(),
5422                category: CashFlowCategory::Operating,
5423                amount: -ap_change,
5424                amount_prior: None,
5425                sort_order: 4,
5426                is_total: false,
5427            },
5428            CashFlowItem {
5429                item_code: "CF-INV".to_string(),
5430                label: "Change in Inventory".to_string(),
5431                category: CashFlowCategory::Operating,
5432                amount: -inventory_change,
5433                amount_prior: None,
5434                sort_order: 5,
5435                is_total: false,
5436            },
5437            CashFlowItem {
5438                item_code: "CF-OP".to_string(),
5439                label: "Net Cash from Operating Activities".to_string(),
5440                category: CashFlowCategory::Operating,
5441                amount: operating_cf,
5442                amount_prior: None,
5443                sort_order: 6,
5444                is_total: true,
5445            },
5446            CashFlowItem {
5447                item_code: "CF-CAPEX".to_string(),
5448                label: "Capital Expenditures".to_string(),
5449                category: CashFlowCategory::Investing,
5450                amount: capex,
5451                amount_prior: None,
5452                sort_order: 7,
5453                is_total: false,
5454            },
5455            CashFlowItem {
5456                item_code: "CF-INV-T".to_string(),
5457                label: "Net Cash from Investing Activities".to_string(),
5458                category: CashFlowCategory::Investing,
5459                amount: investing_cf,
5460                amount_prior: None,
5461                sort_order: 8,
5462                is_total: true,
5463            },
5464            CashFlowItem {
5465                item_code: "CF-DEBT".to_string(),
5466                label: "Net Borrowings / (Repayments)".to_string(),
5467                category: CashFlowCategory::Financing,
5468                amount: debt_change,
5469                amount_prior: None,
5470                sort_order: 9,
5471                is_total: false,
5472            },
5473            CashFlowItem {
5474                item_code: "CF-EQ".to_string(),
5475                label: "Equity Changes".to_string(),
5476                category: CashFlowCategory::Financing,
5477                amount: equity_change,
5478                amount_prior: None,
5479                sort_order: 10,
5480                is_total: false,
5481            },
5482            CashFlowItem {
5483                item_code: "CF-FIN-T".to_string(),
5484                label: "Net Cash from Financing Activities".to_string(),
5485                category: CashFlowCategory::Financing,
5486                amount: financing_cf,
5487                amount_prior: None,
5488                sort_order: 11,
5489                is_total: true,
5490            },
5491            CashFlowItem {
5492                item_code: "CF-NET".to_string(),
5493                label: "Net Change in Cash".to_string(),
5494                category: CashFlowCategory::Operating,
5495                amount: net_change,
5496                amount_prior: None,
5497                sort_order: 12,
5498                is_total: true,
5499            },
5500        ]
5501    }
5502
5503    /// Calculate net income from a set of trial balance entries.
5504    ///
5505    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
5506    fn calculate_net_income_from_tb(
5507        tb: &[datasynth_generators::TrialBalanceEntry],
5508    ) -> rust_decimal::Decimal {
5509        use rust_decimal::Decimal;
5510
5511        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5512        for entry in tb {
5513            let net = entry.debit_balance - entry.credit_balance;
5514            *aggregated.entry(entry.category.clone()).or_default() += net;
5515        }
5516
5517        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5518        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5519        let opex = *aggregated
5520            .get("OperatingExpenses")
5521            .unwrap_or(&Decimal::ZERO);
5522        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5523        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5524
5525        // revenue is negative (credit-normal), expenses are positive (debit-normal)
5526        // other_income is typically negative (credit), other_expenses is typically positive
5527        let operating_income = revenue - cogs - opex - other_expenses - other_income;
5528        let tax_rate = Decimal::new(25, 2); // 0.25
5529        let tax = operating_income * tax_rate;
5530        operating_income - tax
5531    }
5532
5533    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
5534    ///
5535    /// Uses the first two digits of the account code to classify into the categories
5536    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
5537    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
5538    /// OperatingExpenses, OtherIncome, OtherExpenses.
5539    fn category_from_account_code(code: &str) -> String {
5540        let prefix: String = code.chars().take(2).collect();
5541        match prefix.as_str() {
5542            "10" => "Cash",
5543            "11" => "Receivables",
5544            "12" | "13" | "14" => "Inventory",
5545            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5546            "20" => "Payables",
5547            "21" | "22" | "23" | "24" => "AccruedLiabilities",
5548            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5549            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5550            "40" | "41" | "42" | "43" | "44" => "Revenue",
5551            "50" | "51" | "52" => "CostOfSales",
5552            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5553                "OperatingExpenses"
5554            }
5555            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5556            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5557            _ => "OperatingExpenses",
5558        }
5559        .to_string()
5560    }
5561
5562    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
5563    fn phase_hr_data(
5564        &mut self,
5565        stats: &mut EnhancedGenerationStatistics,
5566    ) -> SynthResult<HrSnapshot> {
5567        if !self.phase_config.generate_hr {
5568            debug!("Phase 16: Skipped (HR generation disabled)");
5569            return Ok(HrSnapshot::default());
5570        }
5571
5572        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5573
5574        let seed = self.seed;
5575        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5576            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5577        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5578        let company_code = self
5579            .config
5580            .companies
5581            .first()
5582            .map(|c| c.code.as_str())
5583            .unwrap_or("1000");
5584        let currency = self
5585            .config
5586            .companies
5587            .first()
5588            .map(|c| c.currency.as_str())
5589            .unwrap_or("USD");
5590
5591        let employee_ids: Vec<String> = self
5592            .master_data
5593            .employees
5594            .iter()
5595            .map(|e| e.employee_id.clone())
5596            .collect();
5597
5598        if employee_ids.is_empty() {
5599            debug!("Phase 16: Skipped (no employees available)");
5600            return Ok(HrSnapshot::default());
5601        }
5602
5603        // Extract cost-center pool from master data employees for cross-reference
5604        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
5605        let cost_center_ids: Vec<String> = self
5606            .master_data
5607            .employees
5608            .iter()
5609            .filter_map(|e| e.cost_center.clone())
5610            .collect::<std::collections::HashSet<_>>()
5611            .into_iter()
5612            .collect();
5613
5614        let mut snapshot = HrSnapshot::default();
5615
5616        // Generate payroll runs (one per month)
5617        if self.config.hr.payroll.enabled {
5618            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5619                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5620
5621            // Look up country pack for payroll deductions and labels
5622            let payroll_pack = self.primary_pack();
5623
5624            // Store the pack on the generator so generate() resolves
5625            // localized deduction rates and labels from it.
5626            payroll_gen.set_country_pack(payroll_pack.clone());
5627
5628            let employees_with_salary: Vec<(
5629                String,
5630                rust_decimal::Decimal,
5631                Option<String>,
5632                Option<String>,
5633            )> = self
5634                .master_data
5635                .employees
5636                .iter()
5637                .map(|e| {
5638                    // Use the employee's actual annual base salary.
5639                    // Fall back to $60,000 / yr if somehow zero.
5640                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
5641                        e.base_salary
5642                    } else {
5643                        rust_decimal::Decimal::from(60_000)
5644                    };
5645                    (
5646                        e.employee_id.clone(),
5647                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
5648                        e.cost_center.clone(),
5649                        e.department_id.clone(),
5650                    )
5651                })
5652                .collect();
5653
5654            for month in 0..self.config.global.period_months {
5655                let period_start = start_date + chrono::Months::new(month);
5656                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5657                let (run, items) = payroll_gen.generate(
5658                    company_code,
5659                    &employees_with_salary,
5660                    period_start,
5661                    period_end,
5662                    currency,
5663                );
5664                snapshot.payroll_runs.push(run);
5665                snapshot.payroll_run_count += 1;
5666                snapshot.payroll_line_item_count += items.len();
5667                snapshot.payroll_line_items.extend(items);
5668            }
5669        }
5670
5671        // Generate time entries
5672        if self.config.hr.time_attendance.enabled {
5673            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5674                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5675            let entries = time_gen.generate(
5676                &employee_ids,
5677                start_date,
5678                end_date,
5679                &self.config.hr.time_attendance,
5680            );
5681            snapshot.time_entry_count = entries.len();
5682            snapshot.time_entries = entries;
5683        }
5684
5685        // Generate expense reports
5686        if self.config.hr.expenses.enabled {
5687            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5688                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5689            expense_gen.set_country_pack(self.primary_pack().clone());
5690            let company_currency = self
5691                .config
5692                .companies
5693                .first()
5694                .map(|c| c.currency.as_str())
5695                .unwrap_or("USD");
5696            let reports = expense_gen.generate_with_currency(
5697                &employee_ids,
5698                start_date,
5699                end_date,
5700                &self.config.hr.expenses,
5701                company_currency,
5702            );
5703            snapshot.expense_report_count = reports.len();
5704            snapshot.expense_reports = reports;
5705        }
5706
5707        // Generate benefit enrollments (gated on payroll, since benefits require employees)
5708        if self.config.hr.payroll.enabled {
5709            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5710            let employee_pairs: Vec<(String, String)> = self
5711                .master_data
5712                .employees
5713                .iter()
5714                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5715                .collect();
5716            let enrollments =
5717                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5718            snapshot.benefit_enrollment_count = enrollments.len();
5719            snapshot.benefit_enrollments = enrollments;
5720        }
5721
5722        // Generate defined benefit pension plans (IAS 19 / ASC 715)
5723        if self.phase_config.generate_hr {
5724            let entity_name = self
5725                .config
5726                .companies
5727                .first()
5728                .map(|c| c.name.as_str())
5729                .unwrap_or("Entity");
5730            let period_months = self.config.global.period_months;
5731            let period_label = {
5732                let y = start_date.year();
5733                let m = start_date.month();
5734                if period_months >= 12 {
5735                    format!("FY{y}")
5736                } else {
5737                    format!("{y}-{m:02}")
5738                }
5739            };
5740            let reporting_date =
5741                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5742
5743            // Compute average annual salary from actual payroll data when available.
5744            // PayrollRun.total_gross covers all employees for one pay period; we sum
5745            // across all runs and divide by employee_count to get per-employee total,
5746            // then annualise for sub-annual periods.
5747            let avg_salary: Option<rust_decimal::Decimal> = {
5748                let employee_count = employee_ids.len();
5749                if self.config.hr.payroll.enabled
5750                    && employee_count > 0
5751                    && !snapshot.payroll_runs.is_empty()
5752                {
5753                    // Sum total gross pay across all payroll runs for this company
5754                    let total_gross: rust_decimal::Decimal = snapshot
5755                        .payroll_runs
5756                        .iter()
5757                        .filter(|r| r.company_code == company_code)
5758                        .map(|r| r.total_gross)
5759                        .sum();
5760                    if total_gross > rust_decimal::Decimal::ZERO {
5761                        // Annualise: total_gross covers `period_months` months of pay
5762                        let annual_total = if period_months > 0 && period_months < 12 {
5763                            total_gross * rust_decimal::Decimal::from(12u32)
5764                                / rust_decimal::Decimal::from(period_months)
5765                        } else {
5766                            total_gross
5767                        };
5768                        Some(
5769                            (annual_total / rust_decimal::Decimal::from(employee_count))
5770                                .round_dp(2),
5771                        )
5772                    } else {
5773                        None
5774                    }
5775                } else {
5776                    None
5777                }
5778            };
5779
5780            let mut pension_gen =
5781                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5782            let pension_snap = pension_gen.generate(
5783                company_code,
5784                entity_name,
5785                &period_label,
5786                reporting_date,
5787                employee_ids.len(),
5788                currency,
5789                avg_salary,
5790                period_months,
5791            );
5792            snapshot.pension_plan_count = pension_snap.plans.len();
5793            snapshot.pension_plans = pension_snap.plans;
5794            snapshot.pension_obligations = pension_snap.obligations;
5795            snapshot.pension_plan_assets = pension_snap.plan_assets;
5796            snapshot.pension_disclosures = pension_snap.disclosures;
5797            // Pension JEs are returned here so they can be added to entries
5798            // in the caller (stored temporarily on snapshot for transfer).
5799            // We embed them in the hr snapshot for simplicity; the orchestrator
5800            // will extract and extend `entries`.
5801            snapshot.pension_journal_entries = pension_snap.journal_entries;
5802        }
5803
5804        // Generate stock-based compensation (ASC 718 / IFRS 2)
5805        if self.phase_config.generate_hr && !employee_ids.is_empty() {
5806            let period_months = self.config.global.period_months;
5807            let period_label = {
5808                let y = start_date.year();
5809                let m = start_date.month();
5810                if period_months >= 12 {
5811                    format!("FY{y}")
5812                } else {
5813                    format!("{y}-{m:02}")
5814                }
5815            };
5816            let reporting_date =
5817                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5818
5819            let mut stock_comp_gen =
5820                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5821            let stock_snap = stock_comp_gen.generate(
5822                company_code,
5823                &employee_ids,
5824                start_date,
5825                &period_label,
5826                reporting_date,
5827                currency,
5828            );
5829            snapshot.stock_grant_count = stock_snap.grants.len();
5830            snapshot.stock_grants = stock_snap.grants;
5831            snapshot.stock_comp_expenses = stock_snap.expenses;
5832            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5833        }
5834
5835        stats.payroll_run_count = snapshot.payroll_run_count;
5836        stats.time_entry_count = snapshot.time_entry_count;
5837        stats.expense_report_count = snapshot.expense_report_count;
5838        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5839        stats.pension_plan_count = snapshot.pension_plan_count;
5840        stats.stock_grant_count = snapshot.stock_grant_count;
5841
5842        info!(
5843            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5844            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5845            snapshot.time_entry_count, snapshot.expense_report_count,
5846            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5847            snapshot.stock_grant_count
5848        );
5849        self.check_resources_with_log("post-hr")?;
5850
5851        Ok(snapshot)
5852    }
5853
5854    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
5855    fn phase_accounting_standards(
5856        &mut self,
5857        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5858        journal_entries: &[JournalEntry],
5859        stats: &mut EnhancedGenerationStatistics,
5860    ) -> SynthResult<AccountingStandardsSnapshot> {
5861        if !self.phase_config.generate_accounting_standards {
5862            debug!("Phase 17: Skipped (accounting standards generation disabled)");
5863            return Ok(AccountingStandardsSnapshot::default());
5864        }
5865        info!("Phase 17: Generating Accounting Standards Data");
5866
5867        let seed = self.seed;
5868        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5869            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5870        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5871        let company_code = self
5872            .config
5873            .companies
5874            .first()
5875            .map(|c| c.code.as_str())
5876            .unwrap_or("1000");
5877        let currency = self
5878            .config
5879            .companies
5880            .first()
5881            .map(|c| c.currency.as_str())
5882            .unwrap_or("USD");
5883
5884        // Convert config framework to standards framework.
5885        // If the user explicitly set a framework in the YAML config, use that.
5886        // Otherwise, fall back to the country pack's accounting.framework field,
5887        // and if that is also absent or unrecognised, default to US GAAP.
5888        let framework = match self.config.accounting_standards.framework {
5889            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5890                datasynth_standards::framework::AccountingFramework::UsGaap
5891            }
5892            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5893                datasynth_standards::framework::AccountingFramework::Ifrs
5894            }
5895            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5896                datasynth_standards::framework::AccountingFramework::DualReporting
5897            }
5898            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5899                datasynth_standards::framework::AccountingFramework::FrenchGaap
5900            }
5901            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5902                datasynth_standards::framework::AccountingFramework::GermanGaap
5903            }
5904            None => {
5905                // Derive framework from the primary company's country pack
5906                let pack = self.primary_pack();
5907                let pack_fw = pack.accounting.framework.as_str();
5908                match pack_fw {
5909                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5910                    "dual_reporting" => {
5911                        datasynth_standards::framework::AccountingFramework::DualReporting
5912                    }
5913                    "french_gaap" => {
5914                        datasynth_standards::framework::AccountingFramework::FrenchGaap
5915                    }
5916                    "german_gaap" | "hgb" => {
5917                        datasynth_standards::framework::AccountingFramework::GermanGaap
5918                    }
5919                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
5920                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5921                }
5922            }
5923        };
5924
5925        let mut snapshot = AccountingStandardsSnapshot::default();
5926
5927        // Revenue recognition
5928        if self.config.accounting_standards.revenue_recognition.enabled {
5929            let customer_ids: Vec<String> = self
5930                .master_data
5931                .customers
5932                .iter()
5933                .map(|c| c.customer_id.clone())
5934                .collect();
5935
5936            if !customer_ids.is_empty() {
5937                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5938                let contracts = rev_gen.generate(
5939                    company_code,
5940                    &customer_ids,
5941                    start_date,
5942                    end_date,
5943                    currency,
5944                    &self.config.accounting_standards.revenue_recognition,
5945                    framework,
5946                );
5947                snapshot.revenue_contract_count = contracts.len();
5948                snapshot.contracts = contracts;
5949            }
5950        }
5951
5952        // Impairment testing
5953        if self.config.accounting_standards.impairment.enabled {
5954            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5955                .master_data
5956                .assets
5957                .iter()
5958                .map(|a| {
5959                    (
5960                        a.asset_id.clone(),
5961                        a.description.clone(),
5962                        a.acquisition_cost,
5963                    )
5964                })
5965                .collect();
5966
5967            if !asset_data.is_empty() {
5968                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5969                let tests = imp_gen.generate(
5970                    company_code,
5971                    &asset_data,
5972                    end_date,
5973                    &self.config.accounting_standards.impairment,
5974                    framework,
5975                );
5976                snapshot.impairment_test_count = tests.len();
5977                snapshot.impairment_tests = tests;
5978            }
5979        }
5980
5981        // Business combinations (IFRS 3 / ASC 805)
5982        if self
5983            .config
5984            .accounting_standards
5985            .business_combinations
5986            .enabled
5987        {
5988            let bc_config = &self.config.accounting_standards.business_combinations;
5989            let framework_str = match framework {
5990                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5991                _ => "US_GAAP",
5992            };
5993            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5994            let bc_snap = bc_gen.generate(
5995                company_code,
5996                currency,
5997                start_date,
5998                end_date,
5999                bc_config.acquisition_count,
6000                framework_str,
6001            );
6002            snapshot.business_combination_count = bc_snap.combinations.len();
6003            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6004            snapshot.business_combinations = bc_snap.combinations;
6005        }
6006
6007        // Expected Credit Loss (IFRS 9 / ASC 326)
6008        if self
6009            .config
6010            .accounting_standards
6011            .expected_credit_loss
6012            .enabled
6013        {
6014            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6015            let framework_str = match framework {
6016                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6017                _ => "ASC_326",
6018            };
6019
6020            // Use AR aging data from the subledger snapshot if available;
6021            // otherwise generate synthetic bucket exposures.
6022            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6023
6024            let mut ecl_gen = EclGenerator::new(seed + 43);
6025
6026            // Collect combined bucket totals across all company AR aging reports.
6027            let bucket_exposures: Vec<(
6028                datasynth_core::models::subledger::ar::AgingBucket,
6029                rust_decimal::Decimal,
6030            )> = if ar_aging_reports.is_empty() {
6031                // No AR aging data — synthesise plausible bucket exposures.
6032                use datasynth_core::models::subledger::ar::AgingBucket;
6033                vec![
6034                    (
6035                        AgingBucket::Current,
6036                        rust_decimal::Decimal::from(500_000_u32),
6037                    ),
6038                    (
6039                        AgingBucket::Days1To30,
6040                        rust_decimal::Decimal::from(120_000_u32),
6041                    ),
6042                    (
6043                        AgingBucket::Days31To60,
6044                        rust_decimal::Decimal::from(45_000_u32),
6045                    ),
6046                    (
6047                        AgingBucket::Days61To90,
6048                        rust_decimal::Decimal::from(15_000_u32),
6049                    ),
6050                    (
6051                        AgingBucket::Over90Days,
6052                        rust_decimal::Decimal::from(8_000_u32),
6053                    ),
6054                ]
6055            } else {
6056                use datasynth_core::models::subledger::ar::AgingBucket;
6057                // Sum bucket totals from all reports.
6058                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6059                    std::collections::HashMap::new();
6060                for report in ar_aging_reports {
6061                    for (bucket, amount) in &report.bucket_totals {
6062                        *totals.entry(*bucket).or_default() += amount;
6063                    }
6064                }
6065                AgingBucket::all()
6066                    .into_iter()
6067                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6068                    .collect()
6069            };
6070
6071            let ecl_snap = ecl_gen.generate(
6072                company_code,
6073                end_date,
6074                &bucket_exposures,
6075                ecl_config,
6076                &period_label,
6077                framework_str,
6078            );
6079
6080            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6081            snapshot.ecl_models = ecl_snap.ecl_models;
6082            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6083            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6084        }
6085
6086        // Provisions and contingencies (IAS 37 / ASC 450)
6087        {
6088            let framework_str = match framework {
6089                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6090                _ => "US_GAAP",
6091            };
6092
6093            // Compute actual revenue from the journal entries generated so far.
6094            // The `journal_entries` slice passed to this phase contains all GL entries
6095            // up to and including Period Close. Fall back to a minimum of 100_000 to
6096            // avoid degenerate zero-based provision amounts on first-period datasets.
6097            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6098                .max(rust_decimal::Decimal::from(100_000_u32));
6099
6100            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6101
6102            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6103            let prov_snap = prov_gen.generate(
6104                company_code,
6105                currency,
6106                revenue_proxy,
6107                end_date,
6108                &period_label,
6109                framework_str,
6110                None, // prior_opening: no carry-forward data in single-period runs
6111            );
6112
6113            snapshot.provision_count = prov_snap.provisions.len();
6114            snapshot.provisions = prov_snap.provisions;
6115            snapshot.provision_movements = prov_snap.movements;
6116            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6117            snapshot.provision_journal_entries = prov_snap.journal_entries;
6118        }
6119
6120        // IAS 21 Functional Currency Translation
6121        // For each company whose functional currency differs from the presentation
6122        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6123        {
6124            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6125
6126            let presentation_currency = self
6127                .config
6128                .global
6129                .presentation_currency
6130                .clone()
6131                .unwrap_or_else(|| self.config.global.group_currency.clone());
6132
6133            // Build a minimal rate table populated with approximate rates from
6134            // the FX model base rates (USD-based) so we can do the translation.
6135            let mut rate_table = FxRateTable::new(&presentation_currency);
6136
6137            // Populate with base rates against USD; if presentation_currency is
6138            // not USD we do a best-effort two-step conversion using the table's
6139            // triangulation support.
6140            let base_rates = base_rates_usd();
6141            for (ccy, rate) in &base_rates {
6142                rate_table.add_rate(FxRate::new(
6143                    ccy,
6144                    "USD",
6145                    RateType::Closing,
6146                    end_date,
6147                    *rate,
6148                    "SYNTHETIC",
6149                ));
6150                // Average rate = 98% of closing (approximation).
6151                // 0.98 = 98/100 = Decimal::new(98, 2)
6152                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6153                rate_table.add_rate(FxRate::new(
6154                    ccy,
6155                    "USD",
6156                    RateType::Average,
6157                    end_date,
6158                    avg,
6159                    "SYNTHETIC",
6160                ));
6161            }
6162
6163            let mut translation_results = Vec::new();
6164            for company in &self.config.companies {
6165                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6166                // to ensure the translation produces non-trivial CTA amounts.
6167                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6168                    .max(rust_decimal::Decimal::from(100_000_u32));
6169
6170                let func_ccy = company
6171                    .functional_currency
6172                    .clone()
6173                    .unwrap_or_else(|| company.currency.clone());
6174
6175                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6176                    &company.code,
6177                    &func_ccy,
6178                    &presentation_currency,
6179                    &ias21_period_label,
6180                    end_date,
6181                    company_revenue,
6182                    &rate_table,
6183                );
6184                translation_results.push(result);
6185            }
6186
6187            snapshot.currency_translation_count = translation_results.len();
6188            snapshot.currency_translation_results = translation_results;
6189        }
6190
6191        stats.revenue_contract_count = snapshot.revenue_contract_count;
6192        stats.impairment_test_count = snapshot.impairment_test_count;
6193        stats.business_combination_count = snapshot.business_combination_count;
6194        stats.ecl_model_count = snapshot.ecl_model_count;
6195        stats.provision_count = snapshot.provision_count;
6196
6197        info!(
6198            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6199            snapshot.revenue_contract_count,
6200            snapshot.impairment_test_count,
6201            snapshot.business_combination_count,
6202            snapshot.ecl_model_count,
6203            snapshot.provision_count,
6204            snapshot.currency_translation_count
6205        );
6206        self.check_resources_with_log("post-accounting-standards")?;
6207
6208        Ok(snapshot)
6209    }
6210
6211    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6212    fn phase_manufacturing(
6213        &mut self,
6214        stats: &mut EnhancedGenerationStatistics,
6215    ) -> SynthResult<ManufacturingSnapshot> {
6216        if !self.phase_config.generate_manufacturing {
6217            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6218            return Ok(ManufacturingSnapshot::default());
6219        }
6220        info!("Phase 18: Generating Manufacturing Data");
6221
6222        let seed = self.seed;
6223        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6224            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6225        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6226        let company_code = self
6227            .config
6228            .companies
6229            .first()
6230            .map(|c| c.code.as_str())
6231            .unwrap_or("1000");
6232
6233        let material_data: Vec<(String, String)> = self
6234            .master_data
6235            .materials
6236            .iter()
6237            .map(|m| (m.material_id.clone(), m.description.clone()))
6238            .collect();
6239
6240        if material_data.is_empty() {
6241            debug!("Phase 18: Skipped (no materials available)");
6242            return Ok(ManufacturingSnapshot::default());
6243        }
6244
6245        let mut snapshot = ManufacturingSnapshot::default();
6246
6247        // Generate production orders
6248        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
6249        let production_orders = prod_gen.generate(
6250            company_code,
6251            &material_data,
6252            start_date,
6253            end_date,
6254            &self.config.manufacturing.production_orders,
6255            &self.config.manufacturing.costing,
6256            &self.config.manufacturing.routing,
6257        );
6258        snapshot.production_order_count = production_orders.len();
6259
6260        // Generate quality inspections from production orders
6261        let inspection_data: Vec<(String, String, String)> = production_orders
6262            .iter()
6263            .map(|po| {
6264                (
6265                    po.order_id.clone(),
6266                    po.material_id.clone(),
6267                    po.material_description.clone(),
6268                )
6269            })
6270            .collect();
6271
6272        snapshot.production_orders = production_orders;
6273
6274        if !inspection_data.is_empty() {
6275            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
6276            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6277            snapshot.quality_inspection_count = inspections.len();
6278            snapshot.quality_inspections = inspections;
6279        }
6280
6281        // Generate cycle counts (one per month)
6282        let storage_locations: Vec<(String, String)> = material_data
6283            .iter()
6284            .enumerate()
6285            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6286            .collect();
6287
6288        let employee_ids: Vec<String> = self
6289            .master_data
6290            .employees
6291            .iter()
6292            .map(|e| e.employee_id.clone())
6293            .collect();
6294        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
6295            .with_employee_pool(employee_ids);
6296        let mut cycle_count_total = 0usize;
6297        for month in 0..self.config.global.period_months {
6298            let count_date = start_date + chrono::Months::new(month);
6299            let items_per_count = storage_locations.len().clamp(10, 50);
6300            let cc = cc_gen.generate(
6301                company_code,
6302                &storage_locations,
6303                count_date,
6304                items_per_count,
6305            );
6306            snapshot.cycle_counts.push(cc);
6307            cycle_count_total += 1;
6308        }
6309        snapshot.cycle_count_count = cycle_count_total;
6310
6311        // Generate BOM components
6312        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
6313        let bom_components = bom_gen.generate(company_code, &material_data);
6314        snapshot.bom_component_count = bom_components.len();
6315        snapshot.bom_components = bom_components;
6316
6317        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6318        let currency = self
6319            .config
6320            .companies
6321            .first()
6322            .map(|c| c.currency.as_str())
6323            .unwrap_or("USD");
6324        let production_order_ids: Vec<String> = snapshot
6325            .production_orders
6326            .iter()
6327            .map(|po| po.order_id.clone())
6328            .collect();
6329        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
6330        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6331            company_code,
6332            &material_data,
6333            start_date,
6334            end_date,
6335            2,
6336            currency,
6337            &production_order_ids,
6338        );
6339        snapshot.inventory_movement_count = inventory_movements.len();
6340        snapshot.inventory_movements = inventory_movements;
6341
6342        stats.production_order_count = snapshot.production_order_count;
6343        stats.quality_inspection_count = snapshot.quality_inspection_count;
6344        stats.cycle_count_count = snapshot.cycle_count_count;
6345        stats.bom_component_count = snapshot.bom_component_count;
6346        stats.inventory_movement_count = snapshot.inventory_movement_count;
6347
6348        info!(
6349            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6350            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6351            snapshot.bom_component_count, snapshot.inventory_movement_count
6352        );
6353        self.check_resources_with_log("post-manufacturing")?;
6354
6355        Ok(snapshot)
6356    }
6357
6358    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6359    fn phase_sales_kpi_budgets(
6360        &mut self,
6361        coa: &Arc<ChartOfAccounts>,
6362        financial_reporting: &FinancialReportingSnapshot,
6363        stats: &mut EnhancedGenerationStatistics,
6364    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6365        if !self.phase_config.generate_sales_kpi_budgets {
6366            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6367            return Ok(SalesKpiBudgetsSnapshot::default());
6368        }
6369        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6370
6371        let seed = self.seed;
6372        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6373            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6374        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6375        let company_code = self
6376            .config
6377            .companies
6378            .first()
6379            .map(|c| c.code.as_str())
6380            .unwrap_or("1000");
6381
6382        let mut snapshot = SalesKpiBudgetsSnapshot::default();
6383
6384        // Sales Quotes
6385        if self.config.sales_quotes.enabled {
6386            let customer_data: Vec<(String, String)> = self
6387                .master_data
6388                .customers
6389                .iter()
6390                .map(|c| (c.customer_id.clone(), c.name.clone()))
6391                .collect();
6392            let material_data: Vec<(String, String)> = self
6393                .master_data
6394                .materials
6395                .iter()
6396                .map(|m| (m.material_id.clone(), m.description.clone()))
6397                .collect();
6398
6399            if !customer_data.is_empty() && !material_data.is_empty() {
6400                let employee_ids: Vec<String> = self
6401                    .master_data
6402                    .employees
6403                    .iter()
6404                    .map(|e| e.employee_id.clone())
6405                    .collect();
6406                let customer_ids: Vec<String> = self
6407                    .master_data
6408                    .customers
6409                    .iter()
6410                    .map(|c| c.customer_id.clone())
6411                    .collect();
6412                let company_currency = self
6413                    .config
6414                    .companies
6415                    .first()
6416                    .map(|c| c.currency.as_str())
6417                    .unwrap_or("USD");
6418
6419                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6420                    .with_pools(employee_ids, customer_ids);
6421                let quotes = quote_gen.generate_with_currency(
6422                    company_code,
6423                    &customer_data,
6424                    &material_data,
6425                    start_date,
6426                    end_date,
6427                    &self.config.sales_quotes,
6428                    company_currency,
6429                );
6430                snapshot.sales_quote_count = quotes.len();
6431                snapshot.sales_quotes = quotes;
6432            }
6433        }
6434
6435        // Management KPIs
6436        if self.config.financial_reporting.management_kpis.enabled {
6437            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6438            let mut kpis = kpi_gen.generate(
6439                company_code,
6440                start_date,
6441                end_date,
6442                &self.config.financial_reporting.management_kpis,
6443            );
6444
6445            // Override financial KPIs with actual data from financial statements
6446            {
6447                use rust_decimal::Decimal;
6448
6449                if let Some(income_stmt) =
6450                    financial_reporting.financial_statements.iter().find(|fs| {
6451                        fs.statement_type == StatementType::IncomeStatement
6452                            && fs.company_code == company_code
6453                    })
6454                {
6455                    // Extract revenue and COGS from income statement line items
6456                    let total_revenue: Decimal = income_stmt
6457                        .line_items
6458                        .iter()
6459                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
6460                        .map(|li| li.amount)
6461                        .sum();
6462                    let total_cogs: Decimal = income_stmt
6463                        .line_items
6464                        .iter()
6465                        .filter(|li| {
6466                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6467                                && !li.is_total
6468                        })
6469                        .map(|li| li.amount.abs())
6470                        .sum();
6471                    let total_opex: Decimal = income_stmt
6472                        .line_items
6473                        .iter()
6474                        .filter(|li| {
6475                            li.section.contains("Expense")
6476                                && !li.is_total
6477                                && !li.section.contains("Cost")
6478                        })
6479                        .map(|li| li.amount.abs())
6480                        .sum();
6481
6482                    if total_revenue > Decimal::ZERO {
6483                        let hundred = Decimal::from(100);
6484                        let gross_margin_pct =
6485                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6486                        let operating_income = total_revenue - total_cogs - total_opex;
6487                        let op_margin_pct =
6488                            (operating_income * hundred / total_revenue).round_dp(2);
6489
6490                        // Override gross margin and operating margin KPIs
6491                        for kpi in &mut kpis {
6492                            if kpi.name == "Gross Margin" {
6493                                kpi.value = gross_margin_pct;
6494                            } else if kpi.name == "Operating Margin" {
6495                                kpi.value = op_margin_pct;
6496                            }
6497                        }
6498                    }
6499                }
6500
6501                // Override Current Ratio from balance sheet
6502                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6503                    fs.statement_type == StatementType::BalanceSheet
6504                        && fs.company_code == company_code
6505                }) {
6506                    let current_assets: Decimal = bs
6507                        .line_items
6508                        .iter()
6509                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6510                        .map(|li| li.amount)
6511                        .sum();
6512                    let current_liabilities: Decimal = bs
6513                        .line_items
6514                        .iter()
6515                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6516                        .map(|li| li.amount.abs())
6517                        .sum();
6518
6519                    if current_liabilities > Decimal::ZERO {
6520                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
6521                        for kpi in &mut kpis {
6522                            if kpi.name == "Current Ratio" {
6523                                kpi.value = current_ratio;
6524                            }
6525                        }
6526                    }
6527                }
6528            }
6529
6530            snapshot.kpi_count = kpis.len();
6531            snapshot.kpis = kpis;
6532        }
6533
6534        // Budgets
6535        if self.config.financial_reporting.budgets.enabled {
6536            let account_data: Vec<(String, String)> = coa
6537                .accounts
6538                .iter()
6539                .map(|a| (a.account_number.clone(), a.short_description.clone()))
6540                .collect();
6541
6542            if !account_data.is_empty() {
6543                let fiscal_year = start_date.year() as u32;
6544                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6545                let budget = budget_gen.generate(
6546                    company_code,
6547                    fiscal_year,
6548                    &account_data,
6549                    &self.config.financial_reporting.budgets,
6550                );
6551                snapshot.budget_line_count = budget.line_items.len();
6552                snapshot.budgets.push(budget);
6553            }
6554        }
6555
6556        stats.sales_quote_count = snapshot.sales_quote_count;
6557        stats.kpi_count = snapshot.kpi_count;
6558        stats.budget_line_count = snapshot.budget_line_count;
6559
6560        info!(
6561            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6562            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6563        );
6564        self.check_resources_with_log("post-sales-kpi-budgets")?;
6565
6566        Ok(snapshot)
6567    }
6568
6569    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
6570    fn phase_tax_generation(
6571        &mut self,
6572        document_flows: &DocumentFlowSnapshot,
6573        journal_entries: &[JournalEntry],
6574        stats: &mut EnhancedGenerationStatistics,
6575    ) -> SynthResult<TaxSnapshot> {
6576        if !self.phase_config.generate_tax {
6577            debug!("Phase 20: Skipped (tax generation disabled)");
6578            return Ok(TaxSnapshot::default());
6579        }
6580        info!("Phase 20: Generating Tax Data");
6581
6582        let seed = self.seed;
6583        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6584            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6585        let fiscal_year = start_date.year();
6586        let company_code = self
6587            .config
6588            .companies
6589            .first()
6590            .map(|c| c.code.as_str())
6591            .unwrap_or("1000");
6592
6593        let mut gen =
6594            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6595
6596        let pack = self.primary_pack().clone();
6597        let (jurisdictions, codes) =
6598            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6599
6600        // Generate tax provisions for each company
6601        let mut provisions = Vec::new();
6602        if self.config.tax.provisions.enabled {
6603            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6604            for company in &self.config.companies {
6605                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6606                let statutory_rate = rust_decimal::Decimal::new(
6607                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6608                    2,
6609                );
6610                let provision = provision_gen.generate(
6611                    &company.code,
6612                    start_date,
6613                    pre_tax_income,
6614                    statutory_rate,
6615                );
6616                provisions.push(provision);
6617            }
6618        }
6619
6620        // Generate tax lines from document invoices
6621        let mut tax_lines = Vec::new();
6622        if !codes.is_empty() {
6623            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6624                datasynth_generators::TaxLineGeneratorConfig::default(),
6625                codes.clone(),
6626                seed + 72,
6627            );
6628
6629            // Tax lines from vendor invoices (input tax)
6630            // Use the first company's country as buyer country
6631            let buyer_country = self
6632                .config
6633                .companies
6634                .first()
6635                .map(|c| c.country.as_str())
6636                .unwrap_or("US");
6637            for vi in &document_flows.vendor_invoices {
6638                let lines = tax_line_gen.generate_for_document(
6639                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
6640                    &vi.header.document_id,
6641                    buyer_country, // seller approx same country
6642                    buyer_country,
6643                    vi.payable_amount,
6644                    vi.header.document_date,
6645                    None,
6646                );
6647                tax_lines.extend(lines);
6648            }
6649
6650            // Tax lines from customer invoices (output tax)
6651            for ci in &document_flows.customer_invoices {
6652                let lines = tax_line_gen.generate_for_document(
6653                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6654                    &ci.header.document_id,
6655                    buyer_country, // seller is the company
6656                    buyer_country,
6657                    ci.total_gross_amount,
6658                    ci.header.document_date,
6659                    None,
6660                );
6661                tax_lines.extend(lines);
6662            }
6663        }
6664
6665        // Generate deferred tax data (IAS 12 / ASC 740) for each company
6666        let deferred_tax = {
6667            let companies: Vec<(&str, &str)> = self
6668                .config
6669                .companies
6670                .iter()
6671                .map(|c| (c.code.as_str(), c.country.as_str()))
6672                .collect();
6673            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6674            deferred_gen.generate(&companies, start_date, journal_entries)
6675        };
6676
6677        let snapshot = TaxSnapshot {
6678            jurisdiction_count: jurisdictions.len(),
6679            code_count: codes.len(),
6680            jurisdictions,
6681            codes,
6682            tax_provisions: provisions,
6683            tax_lines,
6684            tax_returns: Vec::new(),
6685            withholding_records: Vec::new(),
6686            tax_anomaly_labels: Vec::new(),
6687            deferred_tax,
6688        };
6689
6690        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6691        stats.tax_code_count = snapshot.code_count;
6692        stats.tax_provision_count = snapshot.tax_provisions.len();
6693        stats.tax_line_count = snapshot.tax_lines.len();
6694
6695        info!(
6696            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6697            snapshot.jurisdiction_count,
6698            snapshot.code_count,
6699            snapshot.tax_provisions.len(),
6700            snapshot.deferred_tax.temporary_differences.len(),
6701            snapshot.deferred_tax.journal_entries.len(),
6702        );
6703        self.check_resources_with_log("post-tax")?;
6704
6705        Ok(snapshot)
6706    }
6707
6708    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
6709    fn phase_esg_generation(
6710        &mut self,
6711        document_flows: &DocumentFlowSnapshot,
6712        stats: &mut EnhancedGenerationStatistics,
6713    ) -> SynthResult<EsgSnapshot> {
6714        if !self.phase_config.generate_esg {
6715            debug!("Phase 21: Skipped (ESG generation disabled)");
6716            return Ok(EsgSnapshot::default());
6717        }
6718        let degradation = self.check_resources()?;
6719        if degradation >= DegradationLevel::Reduced {
6720            debug!(
6721                "Phase skipped due to resource pressure (degradation: {:?})",
6722                degradation
6723            );
6724            return Ok(EsgSnapshot::default());
6725        }
6726        info!("Phase 21: Generating ESG Data");
6727
6728        let seed = self.seed;
6729        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6730            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6731        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6732        let entity_id = self
6733            .config
6734            .companies
6735            .first()
6736            .map(|c| c.code.as_str())
6737            .unwrap_or("1000");
6738
6739        let esg_cfg = &self.config.esg;
6740        let mut snapshot = EsgSnapshot::default();
6741
6742        // Energy consumption (feeds into scope 1 & 2 emissions)
6743        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6744            esg_cfg.environmental.energy.clone(),
6745            seed + 80,
6746        );
6747        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6748
6749        // Water usage
6750        let facility_count = esg_cfg.environmental.energy.facility_count;
6751        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6752        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6753
6754        // Waste
6755        let mut waste_gen = datasynth_generators::WasteGenerator::new(
6756            seed + 82,
6757            esg_cfg.environmental.waste.diversion_target,
6758            facility_count,
6759        );
6760        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6761
6762        // Emissions (scope 1, 2, 3)
6763        let mut emission_gen =
6764            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6765
6766        // Build EnergyInput from energy_records
6767        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6768            .iter()
6769            .map(|e| datasynth_generators::EnergyInput {
6770                facility_id: e.facility_id.clone(),
6771                energy_type: match e.energy_source {
6772                    EnergySourceType::NaturalGas => {
6773                        datasynth_generators::EnergyInputType::NaturalGas
6774                    }
6775                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6776                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6777                    _ => datasynth_generators::EnergyInputType::Electricity,
6778                },
6779                consumption_kwh: e.consumption_kwh,
6780                period: e.period,
6781            })
6782            .collect();
6783
6784        let mut emissions = Vec::new();
6785        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6786        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6787
6788        // Scope 3: use vendor spend data from actual payments
6789        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6790            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6791            for payment in &document_flows.payments {
6792                if payment.is_vendor {
6793                    *totals
6794                        .entry(payment.business_partner_id.clone())
6795                        .or_default() += payment.amount;
6796                }
6797            }
6798            totals
6799        };
6800        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6801            .master_data
6802            .vendors
6803            .iter()
6804            .map(|v| {
6805                let spend = vendor_payment_totals
6806                    .get(&v.vendor_id)
6807                    .copied()
6808                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6809                datasynth_generators::VendorSpendInput {
6810                    vendor_id: v.vendor_id.clone(),
6811                    category: format!("{:?}", v.vendor_type).to_lowercase(),
6812                    spend,
6813                    country: v.country.clone(),
6814                }
6815            })
6816            .collect();
6817        if !vendor_spend.is_empty() {
6818            emissions.extend(emission_gen.generate_scope3_purchased_goods(
6819                entity_id,
6820                &vendor_spend,
6821                start_date,
6822                end_date,
6823            ));
6824        }
6825
6826        // Business travel & commuting (scope 3)
6827        let headcount = self.master_data.employees.len() as u32;
6828        if headcount > 0 {
6829            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6830            emissions.extend(emission_gen.generate_scope3_business_travel(
6831                entity_id,
6832                travel_spend,
6833                start_date,
6834            ));
6835            emissions
6836                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6837        }
6838
6839        snapshot.emission_count = emissions.len();
6840        snapshot.emissions = emissions;
6841        snapshot.energy = energy_records;
6842
6843        // Social: Workforce diversity, pay equity, safety
6844        let mut workforce_gen =
6845            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6846        let total_headcount = headcount.max(100);
6847        snapshot.diversity =
6848            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6849        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6850        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6851            entity_id,
6852            facility_count,
6853            start_date,
6854            end_date,
6855        );
6856
6857        // Compute safety metrics
6858        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
6859        let safety_metric = workforce_gen.compute_safety_metrics(
6860            entity_id,
6861            &snapshot.safety_incidents,
6862            total_hours,
6863            start_date,
6864        );
6865        snapshot.safety_metrics = vec![safety_metric];
6866
6867        // Governance
6868        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6869            seed + 85,
6870            esg_cfg.governance.board_size,
6871            esg_cfg.governance.independence_target,
6872        );
6873        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6874
6875        // Supplier ESG assessments
6876        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6877            esg_cfg.supply_chain_esg.clone(),
6878            seed + 86,
6879        );
6880        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6881            .master_data
6882            .vendors
6883            .iter()
6884            .map(|v| datasynth_generators::VendorInput {
6885                vendor_id: v.vendor_id.clone(),
6886                country: v.country.clone(),
6887                industry: format!("{:?}", v.vendor_type).to_lowercase(),
6888                quality_score: None,
6889            })
6890            .collect();
6891        snapshot.supplier_assessments =
6892            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6893
6894        // Disclosures
6895        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6896            seed + 87,
6897            esg_cfg.reporting.clone(),
6898            esg_cfg.climate_scenarios.clone(),
6899        );
6900        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6901        snapshot.disclosures = disclosure_gen.generate_disclosures(
6902            entity_id,
6903            &snapshot.materiality,
6904            start_date,
6905            end_date,
6906        );
6907        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6908        snapshot.disclosure_count = snapshot.disclosures.len();
6909
6910        // Anomaly injection
6911        if esg_cfg.anomaly_rate > 0.0 {
6912            let mut anomaly_injector =
6913                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6914            let mut labels = Vec::new();
6915            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6916            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6917            labels.extend(
6918                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6919            );
6920            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6921            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6922            snapshot.anomaly_labels = labels;
6923        }
6924
6925        stats.esg_emission_count = snapshot.emission_count;
6926        stats.esg_disclosure_count = snapshot.disclosure_count;
6927
6928        info!(
6929            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6930            snapshot.emission_count,
6931            snapshot.disclosure_count,
6932            snapshot.supplier_assessments.len()
6933        );
6934        self.check_resources_with_log("post-esg")?;
6935
6936        Ok(snapshot)
6937    }
6938
6939    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
6940    fn phase_treasury_data(
6941        &mut self,
6942        document_flows: &DocumentFlowSnapshot,
6943        subledger: &SubledgerSnapshot,
6944        intercompany: &IntercompanySnapshot,
6945        stats: &mut EnhancedGenerationStatistics,
6946    ) -> SynthResult<TreasurySnapshot> {
6947        if !self.phase_config.generate_treasury {
6948            debug!("Phase 22: Skipped (treasury generation disabled)");
6949            return Ok(TreasurySnapshot::default());
6950        }
6951        let degradation = self.check_resources()?;
6952        if degradation >= DegradationLevel::Reduced {
6953            debug!(
6954                "Phase skipped due to resource pressure (degradation: {:?})",
6955                degradation
6956            );
6957            return Ok(TreasurySnapshot::default());
6958        }
6959        info!("Phase 22: Generating Treasury Data");
6960
6961        let seed = self.seed;
6962        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6963            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6964        let currency = self
6965            .config
6966            .companies
6967            .first()
6968            .map(|c| c.currency.as_str())
6969            .unwrap_or("USD");
6970        let entity_id = self
6971            .config
6972            .companies
6973            .first()
6974            .map(|c| c.code.as_str())
6975            .unwrap_or("1000");
6976
6977        let mut snapshot = TreasurySnapshot::default();
6978
6979        // Generate debt instruments
6980        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6981            self.config.treasury.debt.clone(),
6982            seed + 90,
6983        );
6984        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6985
6986        // Generate hedging instruments (IR swaps for floating-rate debt)
6987        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6988            self.config.treasury.hedging.clone(),
6989            seed + 91,
6990        );
6991        for debt in &snapshot.debt_instruments {
6992            if debt.rate_type == InterestRateType::Variable {
6993                let swap = hedge_gen.generate_ir_swap(
6994                    currency,
6995                    debt.principal,
6996                    debt.origination_date,
6997                    debt.maturity_date,
6998                );
6999                snapshot.hedging_instruments.push(swap);
7000            }
7001        }
7002
7003        // Build FX exposures from foreign-currency payments and generate
7004        // FX forwards + hedge relationship designations via generate() API.
7005        {
7006            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7007            for payment in &document_flows.payments {
7008                if payment.currency != currency {
7009                    let entry = fx_map
7010                        .entry(payment.currency.clone())
7011                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7012                    entry.0 += payment.amount;
7013                    // Use the latest settlement date among grouped payments
7014                    if payment.header.document_date > entry.1 {
7015                        entry.1 = payment.header.document_date;
7016                    }
7017                }
7018            }
7019            if !fx_map.is_empty() {
7020                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7021                    .into_iter()
7022                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7023                        datasynth_generators::treasury::FxExposure {
7024                            currency_pair: format!("{foreign_ccy}/{currency}"),
7025                            foreign_currency: foreign_ccy,
7026                            net_amount,
7027                            settlement_date,
7028                            description: "AP payment FX exposure".to_string(),
7029                        }
7030                    })
7031                    .collect();
7032                let (fx_instruments, fx_relationships) =
7033                    hedge_gen.generate(start_date, &fx_exposures);
7034                snapshot.hedging_instruments.extend(fx_instruments);
7035                snapshot.hedge_relationships.extend(fx_relationships);
7036            }
7037        }
7038
7039        // Inject anomalies if configured
7040        if self.config.treasury.anomaly_rate > 0.0 {
7041            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7042                seed + 92,
7043                self.config.treasury.anomaly_rate,
7044            );
7045            let mut labels = Vec::new();
7046            labels.extend(
7047                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7048            );
7049            snapshot.treasury_anomaly_labels = labels;
7050        }
7051
7052        // Generate cash positions from payment flows
7053        if self.config.treasury.cash_positioning.enabled {
7054            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7055
7056            // AP payments as outflows
7057            for payment in &document_flows.payments {
7058                cash_flows.push(datasynth_generators::treasury::CashFlow {
7059                    date: payment.header.document_date,
7060                    account_id: format!("{entity_id}-MAIN"),
7061                    amount: payment.amount,
7062                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7063                });
7064            }
7065
7066            // Customer receipts (from O2C chains) as inflows
7067            for chain in &document_flows.o2c_chains {
7068                if let Some(ref receipt) = chain.customer_receipt {
7069                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7070                        date: receipt.header.document_date,
7071                        account_id: format!("{entity_id}-MAIN"),
7072                        amount: receipt.amount,
7073                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7074                    });
7075                }
7076                // Remainder receipts (follow-up to partial payments)
7077                for receipt in &chain.remainder_receipts {
7078                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7079                        date: receipt.header.document_date,
7080                        account_id: format!("{entity_id}-MAIN"),
7081                        amount: receipt.amount,
7082                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7083                    });
7084                }
7085            }
7086
7087            if !cash_flows.is_empty() {
7088                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7089                    self.config.treasury.cash_positioning.clone(),
7090                    seed + 93,
7091                );
7092                let account_id = format!("{entity_id}-MAIN");
7093                snapshot.cash_positions = cash_gen.generate(
7094                    entity_id,
7095                    &account_id,
7096                    currency,
7097                    &cash_flows,
7098                    start_date,
7099                    start_date + chrono::Months::new(self.config.global.period_months),
7100                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7101                );
7102            }
7103        }
7104
7105        // Generate cash forecasts from AR/AP aging
7106        if self.config.treasury.cash_forecasting.enabled {
7107            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7108
7109            // Build AR aging items from subledger AR invoices
7110            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7111                .ar_invoices
7112                .iter()
7113                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7114                .map(|inv| {
7115                    let days_past_due = if inv.due_date < end_date {
7116                        (end_date - inv.due_date).num_days().max(0) as u32
7117                    } else {
7118                        0
7119                    };
7120                    datasynth_generators::treasury::ArAgingItem {
7121                        expected_date: inv.due_date,
7122                        amount: inv.amount_remaining,
7123                        days_past_due,
7124                        document_id: inv.invoice_number.clone(),
7125                    }
7126                })
7127                .collect();
7128
7129            // Build AP aging items from subledger AP invoices
7130            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7131                .ap_invoices
7132                .iter()
7133                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7134                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7135                    payment_date: inv.due_date,
7136                    amount: inv.amount_remaining,
7137                    document_id: inv.invoice_number.clone(),
7138                })
7139                .collect();
7140
7141            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7142                self.config.treasury.cash_forecasting.clone(),
7143                seed + 94,
7144            );
7145            let forecast = forecast_gen.generate(
7146                entity_id,
7147                currency,
7148                end_date,
7149                &ar_items,
7150                &ap_items,
7151                &[], // scheduled disbursements - empty for now
7152            );
7153            snapshot.cash_forecasts.push(forecast);
7154        }
7155
7156        // Generate cash pools and sweeps
7157        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7158            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7159            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7160                self.config.treasury.cash_pooling.clone(),
7161                seed + 95,
7162            );
7163
7164            // Create a pool from available accounts
7165            let account_ids: Vec<String> = snapshot
7166                .cash_positions
7167                .iter()
7168                .map(|cp| cp.bank_account_id.clone())
7169                .collect::<std::collections::HashSet<_>>()
7170                .into_iter()
7171                .collect();
7172
7173            if let Some(pool) =
7174                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7175            {
7176                // Generate sweeps - build participant balances from last cash position per account
7177                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7178                for cp in &snapshot.cash_positions {
7179                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7180                }
7181
7182                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7183                    latest_balances
7184                        .into_iter()
7185                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7186                        .map(
7187                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7188                                account_id: id,
7189                                balance,
7190                            },
7191                        )
7192                        .collect();
7193
7194                let sweeps =
7195                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7196                snapshot.cash_pool_sweeps = sweeps;
7197                snapshot.cash_pools.push(pool);
7198            }
7199        }
7200
7201        // Generate bank guarantees
7202        if self.config.treasury.bank_guarantees.enabled {
7203            let vendor_names: Vec<String> = self
7204                .master_data
7205                .vendors
7206                .iter()
7207                .map(|v| v.name.clone())
7208                .collect();
7209            if !vendor_names.is_empty() {
7210                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7211                    self.config.treasury.bank_guarantees.clone(),
7212                    seed + 96,
7213                );
7214                snapshot.bank_guarantees =
7215                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7216            }
7217        }
7218
7219        // Generate netting runs from intercompany matched pairs
7220        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7221            let entity_ids: Vec<String> = self
7222                .config
7223                .companies
7224                .iter()
7225                .map(|c| c.code.clone())
7226                .collect();
7227            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7228                .matched_pairs
7229                .iter()
7230                .map(|mp| {
7231                    (
7232                        mp.seller_company.clone(),
7233                        mp.buyer_company.clone(),
7234                        mp.amount,
7235                    )
7236                })
7237                .collect();
7238            if entity_ids.len() >= 2 {
7239                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7240                    self.config.treasury.netting.clone(),
7241                    seed + 97,
7242                );
7243                snapshot.netting_runs = netting_gen.generate(
7244                    &entity_ids,
7245                    currency,
7246                    start_date,
7247                    self.config.global.period_months,
7248                    &ic_amounts,
7249                );
7250            }
7251        }
7252
7253        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7254        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7255        stats.cash_position_count = snapshot.cash_positions.len();
7256        stats.cash_forecast_count = snapshot.cash_forecasts.len();
7257        stats.cash_pool_count = snapshot.cash_pools.len();
7258
7259        info!(
7260            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
7261            snapshot.debt_instruments.len(),
7262            snapshot.hedging_instruments.len(),
7263            snapshot.cash_positions.len(),
7264            snapshot.cash_forecasts.len(),
7265            snapshot.cash_pools.len(),
7266            snapshot.bank_guarantees.len(),
7267            snapshot.netting_runs.len(),
7268        );
7269        self.check_resources_with_log("post-treasury")?;
7270
7271        Ok(snapshot)
7272    }
7273
7274    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
7275    fn phase_project_accounting(
7276        &mut self,
7277        document_flows: &DocumentFlowSnapshot,
7278        hr: &HrSnapshot,
7279        stats: &mut EnhancedGenerationStatistics,
7280    ) -> SynthResult<ProjectAccountingSnapshot> {
7281        if !self.phase_config.generate_project_accounting {
7282            debug!("Phase 23: Skipped (project accounting disabled)");
7283            return Ok(ProjectAccountingSnapshot::default());
7284        }
7285        let degradation = self.check_resources()?;
7286        if degradation >= DegradationLevel::Reduced {
7287            debug!(
7288                "Phase skipped due to resource pressure (degradation: {:?})",
7289                degradation
7290            );
7291            return Ok(ProjectAccountingSnapshot::default());
7292        }
7293        info!("Phase 23: Generating Project Accounting Data");
7294
7295        let seed = self.seed;
7296        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7297            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7298        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7299        let company_code = self
7300            .config
7301            .companies
7302            .first()
7303            .map(|c| c.code.as_str())
7304            .unwrap_or("1000");
7305
7306        let mut snapshot = ProjectAccountingSnapshot::default();
7307
7308        // Generate projects with WBS hierarchies
7309        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7310            self.config.project_accounting.clone(),
7311            seed + 95,
7312        );
7313        let pool = project_gen.generate(company_code, start_date, end_date);
7314        snapshot.projects = pool.projects.clone();
7315
7316        // Link source documents to projects for cost allocation
7317        {
7318            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7319                Vec::new();
7320
7321            // Time entries
7322            for te in &hr.time_entries {
7323                let total_hours = te.hours_regular + te.hours_overtime;
7324                if total_hours > 0.0 {
7325                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7326                        id: te.entry_id.clone(),
7327                        entity_id: company_code.to_string(),
7328                        date: te.date,
7329                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7330                            .unwrap_or(rust_decimal::Decimal::ZERO),
7331                        source_type: CostSourceType::TimeEntry,
7332                        hours: Some(
7333                            rust_decimal::Decimal::from_f64_retain(total_hours)
7334                                .unwrap_or(rust_decimal::Decimal::ZERO),
7335                        ),
7336                    });
7337                }
7338            }
7339
7340            // Expense reports
7341            for er in &hr.expense_reports {
7342                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7343                    id: er.report_id.clone(),
7344                    entity_id: company_code.to_string(),
7345                    date: er.submission_date,
7346                    amount: er.total_amount,
7347                    source_type: CostSourceType::ExpenseReport,
7348                    hours: None,
7349                });
7350            }
7351
7352            // Purchase orders
7353            for po in &document_flows.purchase_orders {
7354                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7355                    id: po.header.document_id.clone(),
7356                    entity_id: company_code.to_string(),
7357                    date: po.header.document_date,
7358                    amount: po.total_net_amount,
7359                    source_type: CostSourceType::PurchaseOrder,
7360                    hours: None,
7361                });
7362            }
7363
7364            // Vendor invoices
7365            for vi in &document_flows.vendor_invoices {
7366                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7367                    id: vi.header.document_id.clone(),
7368                    entity_id: company_code.to_string(),
7369                    date: vi.header.document_date,
7370                    amount: vi.payable_amount,
7371                    source_type: CostSourceType::VendorInvoice,
7372                    hours: None,
7373                });
7374            }
7375
7376            if !source_docs.is_empty() && !pool.projects.is_empty() {
7377                let mut cost_gen =
7378                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
7379                        self.config.project_accounting.cost_allocation.clone(),
7380                        seed + 99,
7381                    );
7382                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7383            }
7384        }
7385
7386        // Generate change orders
7387        if self.config.project_accounting.change_orders.enabled {
7388            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7389                self.config.project_accounting.change_orders.clone(),
7390                seed + 96,
7391            );
7392            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7393        }
7394
7395        // Generate milestones
7396        if self.config.project_accounting.milestones.enabled {
7397            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7398                self.config.project_accounting.milestones.clone(),
7399                seed + 97,
7400            );
7401            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7402        }
7403
7404        // Generate earned value metrics (needs cost lines, so only if we have projects)
7405        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7406            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7407                self.config.project_accounting.earned_value.clone(),
7408                seed + 98,
7409            );
7410            snapshot.earned_value_metrics =
7411                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7412        }
7413
7414        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
7415        if self.config.project_accounting.revenue_recognition.enabled
7416            && !snapshot.projects.is_empty()
7417            && !snapshot.cost_lines.is_empty()
7418        {
7419            use datasynth_generators::project_accounting::RevenueGenerator;
7420            let rev_config = self.config.project_accounting.revenue_recognition.clone();
7421            let avg_contract_value =
7422                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
7423                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
7424
7425            // Build contract value tuples: only customer-type projects get revenue recognition.
7426            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
7427            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
7428                snapshot
7429                    .projects
7430                    .iter()
7431                    .filter(|p| {
7432                        matches!(
7433                            p.project_type,
7434                            datasynth_core::models::ProjectType::Customer
7435                        )
7436                    })
7437                    .map(|p| {
7438                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
7439                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
7440                        // budget × 1.25 → contract value
7441                        } else {
7442                            avg_contract_value
7443                        };
7444                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
7445                        (p.project_id.clone(), cv, etc)
7446                    })
7447                    .collect();
7448
7449            if !contract_values.is_empty() {
7450                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
7451                snapshot.revenue_records = rev_gen.generate(
7452                    &snapshot.projects,
7453                    &snapshot.cost_lines,
7454                    &contract_values,
7455                    start_date,
7456                    end_date,
7457                );
7458                debug!(
7459                    "Generated {} revenue recognition records for {} customer projects",
7460                    snapshot.revenue_records.len(),
7461                    contract_values.len()
7462                );
7463            }
7464        }
7465
7466        stats.project_count = snapshot.projects.len();
7467        stats.project_change_order_count = snapshot.change_orders.len();
7468        stats.project_cost_line_count = snapshot.cost_lines.len();
7469
7470        info!(
7471            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
7472            snapshot.projects.len(),
7473            snapshot.change_orders.len(),
7474            snapshot.milestones.len(),
7475            snapshot.earned_value_metrics.len()
7476        );
7477        self.check_resources_with_log("post-project-accounting")?;
7478
7479        Ok(snapshot)
7480    }
7481
7482    /// Phase 24: Generate process evolution and organizational events.
7483    fn phase_evolution_events(
7484        &mut self,
7485        stats: &mut EnhancedGenerationStatistics,
7486    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
7487        if !self.phase_config.generate_evolution_events {
7488            debug!("Phase 24: Skipped (evolution events disabled)");
7489            return Ok((Vec::new(), Vec::new()));
7490        }
7491        info!("Phase 24: Generating Process Evolution + Organizational Events");
7492
7493        let seed = self.seed;
7494        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7495            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7496        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7497
7498        // Process evolution events
7499        let mut proc_gen =
7500            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7501                seed + 100,
7502            );
7503        let process_events = proc_gen.generate_events(start_date, end_date);
7504
7505        // Organizational events
7506        let company_codes: Vec<String> = self
7507            .config
7508            .companies
7509            .iter()
7510            .map(|c| c.code.clone())
7511            .collect();
7512        let mut org_gen =
7513            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7514                seed + 101,
7515            );
7516        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7517
7518        stats.process_evolution_event_count = process_events.len();
7519        stats.organizational_event_count = org_events.len();
7520
7521        info!(
7522            "Evolution events generated: {} process evolution, {} organizational",
7523            process_events.len(),
7524            org_events.len()
7525        );
7526        self.check_resources_with_log("post-evolution-events")?;
7527
7528        Ok((process_events, org_events))
7529    }
7530
7531    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
7532    /// data recovery, and regulatory changes).
7533    fn phase_disruption_events(
7534        &self,
7535        stats: &mut EnhancedGenerationStatistics,
7536    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7537        if !self.config.organizational_events.enabled {
7538            debug!("Phase 24b: Skipped (organizational events disabled)");
7539            return Ok(Vec::new());
7540        }
7541        info!("Phase 24b: Generating Disruption Events");
7542
7543        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7544            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7545        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7546
7547        let company_codes: Vec<String> = self
7548            .config
7549            .companies
7550            .iter()
7551            .map(|c| c.code.clone())
7552            .collect();
7553
7554        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7555        let events = gen.generate(start_date, end_date, &company_codes);
7556
7557        stats.disruption_event_count = events.len();
7558        info!("Disruption events generated: {} events", events.len());
7559        self.check_resources_with_log("post-disruption-events")?;
7560
7561        Ok(events)
7562    }
7563
7564    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
7565    ///
7566    /// Produces paired examples where each pair contains the original clean JE
7567    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
7568    /// split transaction). Useful for training anomaly detection models with
7569    /// known ground truth.
7570    fn phase_counterfactuals(
7571        &self,
7572        journal_entries: &[JournalEntry],
7573        stats: &mut EnhancedGenerationStatistics,
7574    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7575        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7576            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7577            return Ok(Vec::new());
7578        }
7579        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7580
7581        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7582
7583        let mut gen = CounterfactualGenerator::new(self.seed + 110);
7584
7585        // Rotating set of specs to produce diverse mutation types
7586        let specs = [
7587            CounterfactualSpec::ScaleAmount { factor: 2.5 },
7588            CounterfactualSpec::ShiftDate { days: -14 },
7589            CounterfactualSpec::SelfApprove,
7590            CounterfactualSpec::SplitTransaction { split_count: 3 },
7591        ];
7592
7593        let pairs: Vec<_> = journal_entries
7594            .iter()
7595            .enumerate()
7596            .map(|(i, je)| {
7597                let spec = &specs[i % specs.len()];
7598                gen.generate(je, spec)
7599            })
7600            .collect();
7601
7602        stats.counterfactual_pair_count = pairs.len();
7603        info!(
7604            "Counterfactual pairs generated: {} pairs from {} journal entries",
7605            pairs.len(),
7606            journal_entries.len()
7607        );
7608        self.check_resources_with_log("post-counterfactuals")?;
7609
7610        Ok(pairs)
7611    }
7612
7613    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
7614    ///
7615    /// Uses the anomaly labels (from Phase 8) to determine which documents are
7616    /// fraudulent, then generates probabilistic red flags on all chain documents.
7617    /// Non-fraud documents also receive red flags at a lower rate (false positives)
7618    /// to produce realistic ML training data.
7619    fn phase_red_flags(
7620        &self,
7621        anomaly_labels: &AnomalyLabels,
7622        document_flows: &DocumentFlowSnapshot,
7623        stats: &mut EnhancedGenerationStatistics,
7624    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7625        if !self.config.fraud.enabled {
7626            debug!("Phase 26: Skipped (fraud generation disabled)");
7627            return Ok(Vec::new());
7628        }
7629        info!("Phase 26: Generating Fraud Red-Flag Indicators");
7630
7631        use datasynth_generators::fraud::RedFlagGenerator;
7632
7633        let generator = RedFlagGenerator::new();
7634        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7635
7636        // Build a set of document IDs that are known-fraudulent from anomaly labels.
7637        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7638            .labels
7639            .iter()
7640            .filter(|label| label.anomaly_type.is_intentional())
7641            .map(|label| label.document_id.as_str())
7642            .collect();
7643
7644        let mut flags = Vec::new();
7645
7646        // Iterate P2P chains: use the purchase order document ID as the chain key.
7647        for chain in &document_flows.p2p_chains {
7648            let doc_id = &chain.purchase_order.header.document_id;
7649            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7650            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7651        }
7652
7653        // Iterate O2C chains: use the sales order document ID as the chain key.
7654        for chain in &document_flows.o2c_chains {
7655            let doc_id = &chain.sales_order.header.document_id;
7656            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7657            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7658        }
7659
7660        stats.red_flag_count = flags.len();
7661        info!(
7662            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7663            flags.len(),
7664            document_flows.p2p_chains.len(),
7665            document_flows.o2c_chains.len(),
7666            fraud_doc_ids.len()
7667        );
7668        self.check_resources_with_log("post-red-flags")?;
7669
7670        Ok(flags)
7671    }
7672
7673    /// Phase 26b: Generate collusion rings from employee/vendor pools.
7674    ///
7675    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
7676    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
7677    /// advance them over the simulation period.
7678    fn phase_collusion_rings(
7679        &mut self,
7680        stats: &mut EnhancedGenerationStatistics,
7681    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7682        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7683            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7684            return Ok(Vec::new());
7685        }
7686        info!("Phase 26b: Generating Collusion Rings");
7687
7688        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7689            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7690        let months = self.config.global.period_months;
7691
7692        let employee_ids: Vec<String> = self
7693            .master_data
7694            .employees
7695            .iter()
7696            .map(|e| e.employee_id.clone())
7697            .collect();
7698        let vendor_ids: Vec<String> = self
7699            .master_data
7700            .vendors
7701            .iter()
7702            .map(|v| v.vendor_id.clone())
7703            .collect();
7704
7705        let mut generator =
7706            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7707        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7708
7709        stats.collusion_ring_count = rings.len();
7710        info!(
7711            "Collusion rings generated: {} rings, total members: {}",
7712            rings.len(),
7713            rings
7714                .iter()
7715                .map(datasynth_generators::fraud::CollusionRing::size)
7716                .sum::<usize>()
7717        );
7718        self.check_resources_with_log("post-collusion-rings")?;
7719
7720        Ok(rings)
7721    }
7722
7723    /// Phase 27: Generate bi-temporal version chains for vendor entities.
7724    ///
7725    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
7726    /// master data changes over time, supporting bi-temporal audit queries.
7727    fn phase_temporal_attributes(
7728        &mut self,
7729        stats: &mut EnhancedGenerationStatistics,
7730    ) -> SynthResult<
7731        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7732    > {
7733        if !self.config.temporal_attributes.enabled {
7734            debug!("Phase 27: Skipped (temporal attributes disabled)");
7735            return Ok(Vec::new());
7736        }
7737        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7738
7739        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7740            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7741
7742        // Build a TemporalAttributeConfig from the user's config.
7743        // Since Phase 27 is already gated on temporal_attributes.enabled,
7744        // default to enabling version chains so users get actual mutations.
7745        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7746            || self.config.temporal_attributes.enabled;
7747        let temporal_config = {
7748            let ta = &self.config.temporal_attributes;
7749            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7750                .enabled(ta.enabled)
7751                .closed_probability(ta.valid_time.closed_probability)
7752                .avg_validity_days(ta.valid_time.avg_validity_days)
7753                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7754                .with_version_chains(if generate_version_chains {
7755                    ta.avg_versions_per_entity
7756                } else {
7757                    1.0
7758                })
7759                .build()
7760        };
7761        // Apply backdating settings if configured
7762        let temporal_config = if self
7763            .config
7764            .temporal_attributes
7765            .transaction_time
7766            .allow_backdating
7767        {
7768            let mut c = temporal_config;
7769            c.transaction_time.allow_backdating = true;
7770            c.transaction_time.backdating_probability = self
7771                .config
7772                .temporal_attributes
7773                .transaction_time
7774                .backdating_probability;
7775            c.transaction_time.max_backdate_days = self
7776                .config
7777                .temporal_attributes
7778                .transaction_time
7779                .max_backdate_days;
7780            c
7781        } else {
7782            temporal_config
7783        };
7784        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7785            temporal_config,
7786            self.seed + 130,
7787            start_date,
7788        );
7789
7790        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7791            self.seed + 130,
7792            datasynth_core::GeneratorType::Vendor,
7793        );
7794
7795        let chains: Vec<_> = self
7796            .master_data
7797            .vendors
7798            .iter()
7799            .map(|vendor| {
7800                let id = uuid_factory.next();
7801                gen.generate_version_chain(vendor.clone(), id)
7802            })
7803            .collect();
7804
7805        stats.temporal_version_chain_count = chains.len();
7806        info!("Temporal version chains generated: {} chains", chains.len());
7807        self.check_resources_with_log("post-temporal-attributes")?;
7808
7809        Ok(chains)
7810    }
7811
7812    /// Phase 28: Build entity relationship graph and cross-process links.
7813    ///
7814    /// Part 1 (gated on `relationship_strength.enabled`): builds an
7815    /// `EntityGraph` from master-data vendor/customer entities and
7816    /// journal-entry-derived transaction summaries.
7817    ///
7818    /// Part 2 (gated on `cross_process_links.enabled`): extracts
7819    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
7820    /// generates inventory-movement cross-process links.
7821    fn phase_entity_relationships(
7822        &self,
7823        journal_entries: &[JournalEntry],
7824        document_flows: &DocumentFlowSnapshot,
7825        stats: &mut EnhancedGenerationStatistics,
7826    ) -> SynthResult<(
7827        Option<datasynth_core::models::EntityGraph>,
7828        Vec<datasynth_core::models::CrossProcessLink>,
7829    )> {
7830        use datasynth_generators::relationships::{
7831            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7832            TransactionSummary,
7833        };
7834
7835        let rs_enabled = self.config.relationship_strength.enabled;
7836        let cpl_enabled = self.config.cross_process_links.enabled
7837            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7838
7839        if !rs_enabled && !cpl_enabled {
7840            debug!(
7841                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7842            );
7843            return Ok((None, Vec::new()));
7844        }
7845
7846        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7847
7848        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7849            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7850
7851        let company_code = self
7852            .config
7853            .companies
7854            .first()
7855            .map(|c| c.code.as_str())
7856            .unwrap_or("1000");
7857
7858        // Build the generator with matching config flags
7859        let gen_config = EntityGraphConfig {
7860            enabled: rs_enabled,
7861            cross_process: datasynth_generators::relationships::CrossProcessConfig {
7862                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7863                enable_return_flows: false,
7864                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7865                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7866                // Use higher link rate for small datasets to avoid probabilistic empty results
7867                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7868                    1.0
7869                } else {
7870                    0.30
7871                },
7872                ..Default::default()
7873            },
7874            strength_config: datasynth_generators::relationships::StrengthConfig {
7875                transaction_volume_weight: self
7876                    .config
7877                    .relationship_strength
7878                    .calculation
7879                    .transaction_volume_weight,
7880                transaction_count_weight: self
7881                    .config
7882                    .relationship_strength
7883                    .calculation
7884                    .transaction_count_weight,
7885                duration_weight: self
7886                    .config
7887                    .relationship_strength
7888                    .calculation
7889                    .relationship_duration_weight,
7890                recency_weight: self.config.relationship_strength.calculation.recency_weight,
7891                mutual_connections_weight: self
7892                    .config
7893                    .relationship_strength
7894                    .calculation
7895                    .mutual_connections_weight,
7896                recency_half_life_days: self
7897                    .config
7898                    .relationship_strength
7899                    .calculation
7900                    .recency_half_life_days,
7901            },
7902            ..Default::default()
7903        };
7904
7905        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7906
7907        // --- Part 1: Entity Relationship Graph ---
7908        let entity_graph = if rs_enabled {
7909            // Build EntitySummary lists from master data
7910            let vendor_summaries: Vec<EntitySummary> = self
7911                .master_data
7912                .vendors
7913                .iter()
7914                .map(|v| {
7915                    EntitySummary::new(
7916                        &v.vendor_id,
7917                        &v.name,
7918                        datasynth_core::models::GraphEntityType::Vendor,
7919                        start_date,
7920                    )
7921                })
7922                .collect();
7923
7924            let customer_summaries: Vec<EntitySummary> = self
7925                .master_data
7926                .customers
7927                .iter()
7928                .map(|c| {
7929                    EntitySummary::new(
7930                        &c.customer_id,
7931                        &c.name,
7932                        datasynth_core::models::GraphEntityType::Customer,
7933                        start_date,
7934                    )
7935                })
7936                .collect();
7937
7938            // Build transaction summaries from journal entries.
7939            // Key = (company_code, trading_partner) for entries that have a
7940            // trading partner.  This captures intercompany flows and any JE
7941            // whose line items carry a trading_partner reference.
7942            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7943                std::collections::HashMap::new();
7944
7945            for je in journal_entries {
7946                let cc = je.header.company_code.clone();
7947                let posting_date = je.header.posting_date;
7948                for line in &je.lines {
7949                    if let Some(ref tp) = line.trading_partner {
7950                        let amount = if line.debit_amount > line.credit_amount {
7951                            line.debit_amount
7952                        } else {
7953                            line.credit_amount
7954                        };
7955                        let entry = txn_summaries
7956                            .entry((cc.clone(), tp.clone()))
7957                            .or_insert_with(|| TransactionSummary {
7958                                total_volume: rust_decimal::Decimal::ZERO,
7959                                transaction_count: 0,
7960                                first_transaction_date: posting_date,
7961                                last_transaction_date: posting_date,
7962                                related_entities: std::collections::HashSet::new(),
7963                            });
7964                        entry.total_volume += amount;
7965                        entry.transaction_count += 1;
7966                        if posting_date < entry.first_transaction_date {
7967                            entry.first_transaction_date = posting_date;
7968                        }
7969                        if posting_date > entry.last_transaction_date {
7970                            entry.last_transaction_date = posting_date;
7971                        }
7972                        entry.related_entities.insert(cc.clone());
7973                    }
7974                }
7975            }
7976
7977            // Also extract transaction relationships from document flow chains.
7978            // P2P chains: Company → Vendor relationships
7979            for chain in &document_flows.p2p_chains {
7980                let cc = chain.purchase_order.header.company_code.clone();
7981                let vendor_id = chain.purchase_order.vendor_id.clone();
7982                let po_date = chain.purchase_order.header.document_date;
7983                let amount = chain.purchase_order.total_net_amount;
7984
7985                let entry = txn_summaries
7986                    .entry((cc.clone(), vendor_id))
7987                    .or_insert_with(|| TransactionSummary {
7988                        total_volume: rust_decimal::Decimal::ZERO,
7989                        transaction_count: 0,
7990                        first_transaction_date: po_date,
7991                        last_transaction_date: po_date,
7992                        related_entities: std::collections::HashSet::new(),
7993                    });
7994                entry.total_volume += amount;
7995                entry.transaction_count += 1;
7996                if po_date < entry.first_transaction_date {
7997                    entry.first_transaction_date = po_date;
7998                }
7999                if po_date > entry.last_transaction_date {
8000                    entry.last_transaction_date = po_date;
8001                }
8002                entry.related_entities.insert(cc);
8003            }
8004
8005            // O2C chains: Company → Customer relationships
8006            for chain in &document_flows.o2c_chains {
8007                let cc = chain.sales_order.header.company_code.clone();
8008                let customer_id = chain.sales_order.customer_id.clone();
8009                let so_date = chain.sales_order.header.document_date;
8010                let amount = chain.sales_order.total_net_amount;
8011
8012                let entry = txn_summaries
8013                    .entry((cc.clone(), customer_id))
8014                    .or_insert_with(|| TransactionSummary {
8015                        total_volume: rust_decimal::Decimal::ZERO,
8016                        transaction_count: 0,
8017                        first_transaction_date: so_date,
8018                        last_transaction_date: so_date,
8019                        related_entities: std::collections::HashSet::new(),
8020                    });
8021                entry.total_volume += amount;
8022                entry.transaction_count += 1;
8023                if so_date < entry.first_transaction_date {
8024                    entry.first_transaction_date = so_date;
8025                }
8026                if so_date > entry.last_transaction_date {
8027                    entry.last_transaction_date = so_date;
8028                }
8029                entry.related_entities.insert(cc);
8030            }
8031
8032            let as_of_date = journal_entries
8033                .last()
8034                .map(|je| je.header.posting_date)
8035                .unwrap_or(start_date);
8036
8037            let graph = gen.generate_entity_graph(
8038                company_code,
8039                as_of_date,
8040                &vendor_summaries,
8041                &customer_summaries,
8042                &txn_summaries,
8043            );
8044
8045            info!(
8046                "Entity relationship graph: {} nodes, {} edges",
8047                graph.nodes.len(),
8048                graph.edges.len()
8049            );
8050            stats.entity_relationship_node_count = graph.nodes.len();
8051            stats.entity_relationship_edge_count = graph.edges.len();
8052            Some(graph)
8053        } else {
8054            None
8055        };
8056
8057        // --- Part 2: Cross-Process Links ---
8058        let cross_process_links = if cpl_enabled {
8059            // Build GoodsReceiptRef from P2P chains
8060            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8061                .p2p_chains
8062                .iter()
8063                .flat_map(|chain| {
8064                    let vendor_id = chain.purchase_order.vendor_id.clone();
8065                    let cc = chain.purchase_order.header.company_code.clone();
8066                    chain.goods_receipts.iter().flat_map(move |gr| {
8067                        gr.items.iter().filter_map({
8068                            let doc_id = gr.header.document_id.clone();
8069                            let v_id = vendor_id.clone();
8070                            let company = cc.clone();
8071                            let receipt_date = gr.header.document_date;
8072                            move |item| {
8073                                item.base
8074                                    .material_id
8075                                    .as_ref()
8076                                    .map(|mat_id| GoodsReceiptRef {
8077                                        document_id: doc_id.clone(),
8078                                        material_id: mat_id.clone(),
8079                                        quantity: item.base.quantity,
8080                                        receipt_date,
8081                                        vendor_id: v_id.clone(),
8082                                        company_code: company.clone(),
8083                                    })
8084                            }
8085                        })
8086                    })
8087                })
8088                .collect();
8089
8090            // Build DeliveryRef from O2C chains
8091            let del_refs: Vec<DeliveryRef> = document_flows
8092                .o2c_chains
8093                .iter()
8094                .flat_map(|chain| {
8095                    let customer_id = chain.sales_order.customer_id.clone();
8096                    let cc = chain.sales_order.header.company_code.clone();
8097                    chain.deliveries.iter().flat_map(move |del| {
8098                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8099                        del.items.iter().filter_map({
8100                            let doc_id = del.header.document_id.clone();
8101                            let c_id = customer_id.clone();
8102                            let company = cc.clone();
8103                            move |item| {
8104                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8105                                    document_id: doc_id.clone(),
8106                                    material_id: mat_id.clone(),
8107                                    quantity: item.base.quantity,
8108                                    delivery_date,
8109                                    customer_id: c_id.clone(),
8110                                    company_code: company.clone(),
8111                                })
8112                            }
8113                        })
8114                    })
8115                })
8116                .collect();
8117
8118            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8119            info!("Cross-process links generated: {} links", links.len());
8120            stats.cross_process_link_count = links.len();
8121            links
8122        } else {
8123            Vec::new()
8124        };
8125
8126        self.check_resources_with_log("post-entity-relationships")?;
8127        Ok((entity_graph, cross_process_links))
8128    }
8129
8130    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8131    fn phase_industry_data(
8132        &self,
8133        stats: &mut EnhancedGenerationStatistics,
8134    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8135        if !self.config.industry_specific.enabled {
8136            return None;
8137        }
8138        info!("Phase 29: Generating industry-specific data");
8139        let output = datasynth_generators::industry::factory::generate_industry_output(
8140            self.config.global.industry,
8141        );
8142        stats.industry_gl_account_count = output.gl_accounts.len();
8143        info!(
8144            "Industry data generated: {} GL accounts for {:?}",
8145            output.gl_accounts.len(),
8146            self.config.global.industry
8147        );
8148        Some(output)
8149    }
8150
8151    /// Phase 3b: Generate opening balances for each company.
8152    fn phase_opening_balances(
8153        &mut self,
8154        coa: &Arc<ChartOfAccounts>,
8155        stats: &mut EnhancedGenerationStatistics,
8156    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8157        if !self.config.balance.generate_opening_balances {
8158            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8159            return Ok(Vec::new());
8160        }
8161        info!("Phase 3b: Generating Opening Balances");
8162
8163        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8164            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8165        let fiscal_year = start_date.year();
8166
8167        let industry = match self.config.global.industry {
8168            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8169            IndustrySector::Retail => IndustryType::Retail,
8170            IndustrySector::FinancialServices => IndustryType::Financial,
8171            IndustrySector::Healthcare => IndustryType::Healthcare,
8172            IndustrySector::Technology => IndustryType::Technology,
8173            _ => IndustryType::Manufacturing,
8174        };
8175
8176        let config = datasynth_generators::OpeningBalanceConfig {
8177            industry,
8178            ..Default::default()
8179        };
8180        let mut gen =
8181            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8182
8183        let mut results = Vec::new();
8184        for company in &self.config.companies {
8185            let spec = OpeningBalanceSpec::new(
8186                company.code.clone(),
8187                start_date,
8188                fiscal_year,
8189                company.currency.clone(),
8190                rust_decimal::Decimal::new(10_000_000, 0),
8191                industry,
8192            );
8193            let ob = gen.generate(&spec, coa, start_date, &company.code);
8194            results.push(ob);
8195        }
8196
8197        stats.opening_balance_count = results.len();
8198        info!("Opening balances generated: {} companies", results.len());
8199        self.check_resources_with_log("post-opening-balances")?;
8200
8201        Ok(results)
8202    }
8203
8204    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8205    fn phase_subledger_reconciliation(
8206        &mut self,
8207        subledger: &SubledgerSnapshot,
8208        entries: &[JournalEntry],
8209        stats: &mut EnhancedGenerationStatistics,
8210    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8211        if !self.config.balance.reconcile_subledgers {
8212            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8213            return Ok(Vec::new());
8214        }
8215        info!("Phase 9b: Reconciling GL to subledger balances");
8216
8217        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8218            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8219            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8220
8221        // Build GL balance map from journal entries using a balance tracker
8222        let tracker_config = BalanceTrackerConfig {
8223            validate_on_each_entry: false,
8224            track_history: false,
8225            fail_on_validation_error: false,
8226            ..Default::default()
8227        };
8228        let recon_currency = self
8229            .config
8230            .companies
8231            .first()
8232            .map(|c| c.currency.clone())
8233            .unwrap_or_else(|| "USD".to_string());
8234        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8235        let validation_errors = tracker.apply_entries(entries);
8236        if !validation_errors.is_empty() {
8237            warn!(
8238                error_count = validation_errors.len(),
8239                "Balance tracker encountered validation errors during subledger reconciliation"
8240            );
8241            for err in &validation_errors {
8242                debug!("Balance validation error: {:?}", err);
8243            }
8244        }
8245
8246        let mut engine = datasynth_generators::ReconciliationEngine::new(
8247            datasynth_generators::ReconciliationConfig::default(),
8248        );
8249
8250        let mut results = Vec::new();
8251        let company_code = self
8252            .config
8253            .companies
8254            .first()
8255            .map(|c| c.code.as_str())
8256            .unwrap_or("1000");
8257
8258        // Reconcile AR
8259        if !subledger.ar_invoices.is_empty() {
8260            let gl_balance = tracker
8261                .get_account_balance(
8262                    company_code,
8263                    datasynth_core::accounts::control_accounts::AR_CONTROL,
8264                )
8265                .map(|b| b.closing_balance)
8266                .unwrap_or_default();
8267            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8268            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8269        }
8270
8271        // Reconcile AP
8272        if !subledger.ap_invoices.is_empty() {
8273            let gl_balance = tracker
8274                .get_account_balance(
8275                    company_code,
8276                    datasynth_core::accounts::control_accounts::AP_CONTROL,
8277                )
8278                .map(|b| b.closing_balance)
8279                .unwrap_or_default();
8280            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8281            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8282        }
8283
8284        // Reconcile FA
8285        if !subledger.fa_records.is_empty() {
8286            let gl_asset_balance = tracker
8287                .get_account_balance(
8288                    company_code,
8289                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8290                )
8291                .map(|b| b.closing_balance)
8292                .unwrap_or_default();
8293            let gl_accum_depr_balance = tracker
8294                .get_account_balance(
8295                    company_code,
8296                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8297                )
8298                .map(|b| b.closing_balance)
8299                .unwrap_or_default();
8300            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8301                subledger.fa_records.iter().collect();
8302            let (asset_recon, depr_recon) = engine.reconcile_fa(
8303                company_code,
8304                end_date,
8305                gl_asset_balance,
8306                gl_accum_depr_balance,
8307                &fa_refs,
8308            );
8309            results.push(asset_recon);
8310            results.push(depr_recon);
8311        }
8312
8313        // Reconcile Inventory
8314        if !subledger.inventory_positions.is_empty() {
8315            let gl_balance = tracker
8316                .get_account_balance(
8317                    company_code,
8318                    datasynth_core::accounts::control_accounts::INVENTORY,
8319                )
8320                .map(|b| b.closing_balance)
8321                .unwrap_or_default();
8322            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8323                subledger.inventory_positions.iter().collect();
8324            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8325        }
8326
8327        stats.subledger_reconciliation_count = results.len();
8328        let passed = results.iter().filter(|r| r.is_balanced()).count();
8329        let failed = results.len() - passed;
8330        info!(
8331            "Subledger reconciliation: {} checks, {} passed, {} failed",
8332            results.len(),
8333            passed,
8334            failed
8335        );
8336        self.check_resources_with_log("post-subledger-reconciliation")?;
8337
8338        Ok(results)
8339    }
8340
8341    /// Generate the chart of accounts.
8342    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8343        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8344
8345        let coa_framework = self.resolve_coa_framework();
8346
8347        let mut gen = ChartOfAccountsGenerator::new(
8348            self.config.chart_of_accounts.complexity,
8349            self.config.global.industry,
8350            self.seed,
8351        )
8352        .with_coa_framework(coa_framework);
8353
8354        let coa = Arc::new(gen.generate());
8355        self.coa = Some(Arc::clone(&coa));
8356
8357        if let Some(pb) = pb {
8358            pb.finish_with_message("Chart of Accounts complete");
8359        }
8360
8361        Ok(coa)
8362    }
8363
8364    /// Generate master data entities.
8365    fn generate_master_data(&mut self) -> SynthResult<()> {
8366        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8367            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8368        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8369
8370        let total = self.config.companies.len() as u64 * 5; // 5 entity types
8371        let pb = self.create_progress_bar(total, "Generating Master Data");
8372
8373        // Resolve country pack once for all companies (uses primary company's country)
8374        let pack = self.primary_pack().clone();
8375
8376        // Capture config values needed inside the parallel closure
8377        let vendors_per_company = self.phase_config.vendors_per_company;
8378        let customers_per_company = self.phase_config.customers_per_company;
8379        let materials_per_company = self.phase_config.materials_per_company;
8380        let assets_per_company = self.phase_config.assets_per_company;
8381        let coa_framework = self.resolve_coa_framework();
8382
8383        // Generate all master data in parallel across companies.
8384        // Each company's data is independent, making this embarrassingly parallel.
8385        let per_company_results: Vec<_> = self
8386            .config
8387            .companies
8388            .par_iter()
8389            .enumerate()
8390            .map(|(i, company)| {
8391                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8392                let pack = pack.clone();
8393
8394                // Generate vendors (offset counter so IDs are globally unique across companies)
8395                let mut vendor_gen = VendorGenerator::new(company_seed);
8396                vendor_gen.set_country_pack(pack.clone());
8397                vendor_gen.set_coa_framework(coa_framework);
8398                vendor_gen.set_counter_offset(i * vendors_per_company);
8399                // Wire vendor network config when enabled
8400                if self.config.vendor_network.enabled {
8401                    let vn = &self.config.vendor_network;
8402                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8403                        enabled: true,
8404                        depth: vn.depth,
8405                        tier1_count: datasynth_generators::TierCountConfig::new(
8406                            vn.tier1.min,
8407                            vn.tier1.max,
8408                        ),
8409                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
8410                            vn.tier2_per_parent.min,
8411                            vn.tier2_per_parent.max,
8412                        ),
8413                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
8414                            vn.tier3_per_parent.min,
8415                            vn.tier3_per_parent.max,
8416                        ),
8417                        cluster_distribution: datasynth_generators::ClusterDistribution {
8418                            reliable_strategic: vn.clusters.reliable_strategic,
8419                            standard_operational: vn.clusters.standard_operational,
8420                            transactional: vn.clusters.transactional,
8421                            problematic: vn.clusters.problematic,
8422                        },
8423                        concentration_limits: datasynth_generators::ConcentrationLimits {
8424                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
8425                            max_top5: vn.dependencies.top_5_concentration,
8426                        },
8427                        ..datasynth_generators::VendorNetworkConfig::default()
8428                    });
8429                }
8430                let vendor_pool =
8431                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
8432
8433                // Generate customers (offset counter so IDs are globally unique across companies)
8434                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
8435                customer_gen.set_country_pack(pack.clone());
8436                customer_gen.set_coa_framework(coa_framework);
8437                customer_gen.set_counter_offset(i * customers_per_company);
8438                // Wire customer segmentation config when enabled
8439                if self.config.customer_segmentation.enabled {
8440                    let cs = &self.config.customer_segmentation;
8441                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
8442                        enabled: true,
8443                        segment_distribution: datasynth_generators::SegmentDistribution {
8444                            enterprise: cs.value_segments.enterprise.customer_share,
8445                            mid_market: cs.value_segments.mid_market.customer_share,
8446                            smb: cs.value_segments.smb.customer_share,
8447                            consumer: cs.value_segments.consumer.customer_share,
8448                        },
8449                        referral_config: datasynth_generators::ReferralConfig {
8450                            enabled: cs.networks.referrals.enabled,
8451                            referral_rate: cs.networks.referrals.referral_rate,
8452                            ..Default::default()
8453                        },
8454                        hierarchy_config: datasynth_generators::HierarchyConfig {
8455                            enabled: cs.networks.corporate_hierarchies.enabled,
8456                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
8457                            ..Default::default()
8458                        },
8459                        ..Default::default()
8460                    };
8461                    customer_gen.set_segmentation_config(seg_cfg);
8462                }
8463                let customer_pool = customer_gen.generate_customer_pool(
8464                    customers_per_company,
8465                    &company.code,
8466                    start_date,
8467                );
8468
8469                // Generate materials (offset counter so IDs are globally unique across companies)
8470                let mut material_gen = MaterialGenerator::new(company_seed + 200);
8471                material_gen.set_country_pack(pack.clone());
8472                material_gen.set_counter_offset(i * materials_per_company);
8473                let material_pool = material_gen.generate_material_pool(
8474                    materials_per_company,
8475                    &company.code,
8476                    start_date,
8477                );
8478
8479                // Generate fixed assets
8480                let mut asset_gen = AssetGenerator::new(company_seed + 300);
8481                let asset_pool = asset_gen.generate_asset_pool(
8482                    assets_per_company,
8483                    &company.code,
8484                    (start_date, end_date),
8485                );
8486
8487                // Generate employees
8488                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
8489                employee_gen.set_country_pack(pack);
8490                let employee_pool =
8491                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
8492
8493                // Generate employee change history (2-5 events per employee)
8494                let employee_change_history =
8495                    employee_gen.generate_all_change_history(&employee_pool, end_date);
8496
8497                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
8498                let employee_ids: Vec<String> = employee_pool
8499                    .employees
8500                    .iter()
8501                    .map(|e| e.employee_id.clone())
8502                    .collect();
8503                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
8504                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
8505
8506                (
8507                    vendor_pool.vendors,
8508                    customer_pool.customers,
8509                    material_pool.materials,
8510                    asset_pool.assets,
8511                    employee_pool.employees,
8512                    employee_change_history,
8513                    cost_centers,
8514                )
8515            })
8516            .collect();
8517
8518        // Aggregate results from all companies
8519        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
8520            per_company_results
8521        {
8522            self.master_data.vendors.extend(vendors);
8523            self.master_data.customers.extend(customers);
8524            self.master_data.materials.extend(materials);
8525            self.master_data.assets.extend(assets);
8526            self.master_data.employees.extend(employees);
8527            self.master_data.cost_centers.extend(cost_centers);
8528            self.master_data
8529                .employee_change_history
8530                .extend(change_history);
8531        }
8532
8533        if let Some(pb) = &pb {
8534            pb.inc(total);
8535        }
8536        if let Some(pb) = pb {
8537            pb.finish_with_message("Master data generation complete");
8538        }
8539
8540        Ok(())
8541    }
8542
8543    /// Generate document flows (P2P and O2C).
8544    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
8545        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8546            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8547
8548        // Generate P2P chains
8549        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
8550        let months = (self.config.global.period_months as usize).max(1);
8551        let p2p_count = self
8552            .phase_config
8553            .p2p_chains
8554            .min(self.master_data.vendors.len() * 2 * months);
8555        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
8556
8557        // Convert P2P config from schema to generator config
8558        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
8559        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
8560        p2p_gen.set_country_pack(self.primary_pack().clone());
8561
8562        for i in 0..p2p_count {
8563            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
8564            let materials: Vec<&Material> = self
8565                .master_data
8566                .materials
8567                .iter()
8568                .skip(i % self.master_data.materials.len().max(1))
8569                .take(2.min(self.master_data.materials.len()))
8570                .collect();
8571
8572            if materials.is_empty() {
8573                continue;
8574            }
8575
8576            let company = &self.config.companies[i % self.config.companies.len()];
8577            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8578            let fiscal_period = po_date.month() as u8;
8579            let created_by = if self.master_data.employees.is_empty() {
8580                "SYSTEM"
8581            } else {
8582                self.master_data.employees[i % self.master_data.employees.len()]
8583                    .user_id
8584                    .as_str()
8585            };
8586
8587            let chain = p2p_gen.generate_chain(
8588                &company.code,
8589                vendor,
8590                &materials,
8591                po_date,
8592                start_date.year() as u16,
8593                fiscal_period,
8594                created_by,
8595            );
8596
8597            // Flatten documents
8598            flows.purchase_orders.push(chain.purchase_order.clone());
8599            flows.goods_receipts.extend(chain.goods_receipts.clone());
8600            if let Some(vi) = &chain.vendor_invoice {
8601                flows.vendor_invoices.push(vi.clone());
8602            }
8603            if let Some(payment) = &chain.payment {
8604                flows.payments.push(payment.clone());
8605            }
8606            for remainder in &chain.remainder_payments {
8607                flows.payments.push(remainder.clone());
8608            }
8609            flows.p2p_chains.push(chain);
8610
8611            if let Some(pb) = &pb {
8612                pb.inc(1);
8613            }
8614        }
8615
8616        if let Some(pb) = pb {
8617            pb.finish_with_message("P2P document flows complete");
8618        }
8619
8620        // Generate O2C chains
8621        // Cap at ~2 SOs per customer per month to keep order volume realistic
8622        let o2c_count = self
8623            .phase_config
8624            .o2c_chains
8625            .min(self.master_data.customers.len() * 2 * months);
8626        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8627
8628        // Convert O2C config from schema to generator config
8629        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8630        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8631        o2c_gen.set_country_pack(self.primary_pack().clone());
8632
8633        for i in 0..o2c_count {
8634            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8635            let materials: Vec<&Material> = self
8636                .master_data
8637                .materials
8638                .iter()
8639                .skip(i % self.master_data.materials.len().max(1))
8640                .take(2.min(self.master_data.materials.len()))
8641                .collect();
8642
8643            if materials.is_empty() {
8644                continue;
8645            }
8646
8647            let company = &self.config.companies[i % self.config.companies.len()];
8648            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8649            let fiscal_period = so_date.month() as u8;
8650            let created_by = if self.master_data.employees.is_empty() {
8651                "SYSTEM"
8652            } else {
8653                self.master_data.employees[i % self.master_data.employees.len()]
8654                    .user_id
8655                    .as_str()
8656            };
8657
8658            let chain = o2c_gen.generate_chain(
8659                &company.code,
8660                customer,
8661                &materials,
8662                so_date,
8663                start_date.year() as u16,
8664                fiscal_period,
8665                created_by,
8666            );
8667
8668            // Flatten documents
8669            flows.sales_orders.push(chain.sales_order.clone());
8670            flows.deliveries.extend(chain.deliveries.clone());
8671            if let Some(ci) = &chain.customer_invoice {
8672                flows.customer_invoices.push(ci.clone());
8673            }
8674            if let Some(receipt) = &chain.customer_receipt {
8675                flows.payments.push(receipt.clone());
8676            }
8677            // Extract remainder receipts (follow-up to partial payments)
8678            for receipt in &chain.remainder_receipts {
8679                flows.payments.push(receipt.clone());
8680            }
8681            flows.o2c_chains.push(chain);
8682
8683            if let Some(pb) = &pb {
8684                pb.inc(1);
8685            }
8686        }
8687
8688        if let Some(pb) = pb {
8689            pb.finish_with_message("O2C document flows complete");
8690        }
8691
8692        // Collect all document cross-references from document headers.
8693        // Each document embeds references to its predecessor(s) via add_reference(); here we
8694        // denormalise them into a flat list for the document_references.json output file.
8695        {
8696            let mut refs = Vec::new();
8697            for doc in &flows.purchase_orders {
8698                refs.extend(doc.header.document_references.iter().cloned());
8699            }
8700            for doc in &flows.goods_receipts {
8701                refs.extend(doc.header.document_references.iter().cloned());
8702            }
8703            for doc in &flows.vendor_invoices {
8704                refs.extend(doc.header.document_references.iter().cloned());
8705            }
8706            for doc in &flows.sales_orders {
8707                refs.extend(doc.header.document_references.iter().cloned());
8708            }
8709            for doc in &flows.deliveries {
8710                refs.extend(doc.header.document_references.iter().cloned());
8711            }
8712            for doc in &flows.customer_invoices {
8713                refs.extend(doc.header.document_references.iter().cloned());
8714            }
8715            for doc in &flows.payments {
8716                refs.extend(doc.header.document_references.iter().cloned());
8717            }
8718            debug!(
8719                "Collected {} document cross-references from document headers",
8720                refs.len()
8721            );
8722            flows.document_references = refs;
8723        }
8724
8725        Ok(())
8726    }
8727
8728    /// Generate journal entries using parallel generation across multiple cores.
8729    fn generate_journal_entries(
8730        &mut self,
8731        coa: &Arc<ChartOfAccounts>,
8732    ) -> SynthResult<Vec<JournalEntry>> {
8733        use datasynth_core::traits::ParallelGenerator;
8734
8735        let total = self.calculate_total_transactions();
8736        let pb = self.create_progress_bar(total, "Generating Journal Entries");
8737
8738        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8739            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8740        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8741
8742        let company_codes: Vec<String> = self
8743            .config
8744            .companies
8745            .iter()
8746            .map(|c| c.code.clone())
8747            .collect();
8748
8749        let generator = JournalEntryGenerator::new_with_params(
8750            self.config.transactions.clone(),
8751            Arc::clone(coa),
8752            company_codes,
8753            start_date,
8754            end_date,
8755            self.seed,
8756        );
8757
8758        // Connect generated master data to ensure JEs reference real entities
8759        // Enable persona-based error injection for realistic human behavior
8760        // Pass fraud configuration for fraud injection
8761        let je_pack = self.primary_pack();
8762
8763        let mut generator = generator
8764            .with_master_data(
8765                &self.master_data.vendors,
8766                &self.master_data.customers,
8767                &self.master_data.materials,
8768            )
8769            .with_country_pack_names(je_pack)
8770            .with_country_pack_temporal(
8771                self.config.temporal_patterns.clone(),
8772                self.seed + 200,
8773                je_pack,
8774            )
8775            .with_persona_errors(true)
8776            .with_fraud_config(self.config.fraud.clone());
8777
8778        // Apply temporal drift if configured
8779        if self.config.temporal.enabled {
8780            let drift_config = self.config.temporal.to_core_config();
8781            generator = generator.with_drift_config(drift_config, self.seed + 100);
8782        }
8783
8784        // Check memory limit at start
8785        self.check_memory_limit()?;
8786
8787        // Determine parallelism: use available cores, but cap at total entries
8788        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8789
8790        // Use parallel generation for datasets with 10K+ entries.
8791        // Below this threshold, the statistical properties of a single-seeded
8792        // generator (e.g. Benford compliance) are better preserved.
8793        let entries = if total >= 10_000 && num_threads > 1 {
8794            // Parallel path: split the generator across cores and generate in parallel.
8795            // Each sub-generator gets a unique seed for deterministic, independent generation.
8796            let sub_generators = generator.split(num_threads);
8797            let entries_per_thread = total as usize / num_threads;
8798            let remainder = total as usize % num_threads;
8799
8800            let batches: Vec<Vec<JournalEntry>> = sub_generators
8801                .into_par_iter()
8802                .enumerate()
8803                .map(|(i, mut gen)| {
8804                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8805                    gen.generate_batch(count)
8806                })
8807                .collect();
8808
8809            // Merge all batches into a single Vec
8810            let entries = JournalEntryGenerator::merge_results(batches);
8811
8812            if let Some(pb) = &pb {
8813                pb.inc(total);
8814            }
8815            entries
8816        } else {
8817            // Sequential path for small datasets (< 1000 entries)
8818            let mut entries = Vec::with_capacity(total as usize);
8819            for _ in 0..total {
8820                let entry = generator.generate();
8821                entries.push(entry);
8822                if let Some(pb) = &pb {
8823                    pb.inc(1);
8824                }
8825            }
8826            entries
8827        };
8828
8829        if let Some(pb) = pb {
8830            pb.finish_with_message("Journal entries complete");
8831        }
8832
8833        Ok(entries)
8834    }
8835
8836    /// Generate journal entries from document flows.
8837    ///
8838    /// This creates proper GL entries for each document in the P2P and O2C flows,
8839    /// ensuring that document activity is reflected in the general ledger.
8840    fn generate_jes_from_document_flows(
8841        &mut self,
8842        flows: &DocumentFlowSnapshot,
8843    ) -> SynthResult<Vec<JournalEntry>> {
8844        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8845        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8846
8847        let je_config = match self.resolve_coa_framework() {
8848            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8849            CoAFramework::GermanSkr04 => {
8850                let fa = datasynth_core::FrameworkAccounts::german_gaap();
8851                DocumentFlowJeConfig::from(&fa)
8852            }
8853            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8854        };
8855
8856        let populate_fec = je_config.populate_fec_fields;
8857        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8858
8859        // Build auxiliary account lookup from vendor/customer master data so that
8860        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
8861        // PCG "4010001") instead of raw partner IDs.
8862        if populate_fec {
8863            let mut aux_lookup = std::collections::HashMap::new();
8864            for vendor in &self.master_data.vendors {
8865                if let Some(ref aux) = vendor.auxiliary_gl_account {
8866                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8867                }
8868            }
8869            for customer in &self.master_data.customers {
8870                if let Some(ref aux) = customer.auxiliary_gl_account {
8871                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8872                }
8873            }
8874            if !aux_lookup.is_empty() {
8875                generator.set_auxiliary_account_lookup(aux_lookup);
8876            }
8877        }
8878
8879        let mut entries = Vec::new();
8880
8881        // Generate JEs from P2P chains
8882        for chain in &flows.p2p_chains {
8883            let chain_entries = generator.generate_from_p2p_chain(chain);
8884            entries.extend(chain_entries);
8885            if let Some(pb) = &pb {
8886                pb.inc(1);
8887            }
8888        }
8889
8890        // Generate JEs from O2C chains
8891        for chain in &flows.o2c_chains {
8892            let chain_entries = generator.generate_from_o2c_chain(chain);
8893            entries.extend(chain_entries);
8894            if let Some(pb) = &pb {
8895                pb.inc(1);
8896            }
8897        }
8898
8899        if let Some(pb) = pb {
8900            pb.finish_with_message(format!(
8901                "Generated {} JEs from document flows",
8902                entries.len()
8903            ));
8904        }
8905
8906        Ok(entries)
8907    }
8908
8909    /// Generate journal entries from payroll runs.
8910    ///
8911    /// Creates one JE per payroll run:
8912    /// - DR Salaries & Wages (6100) for gross pay
8913    /// - CR Payroll Clearing (9100) for gross pay
8914    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8915        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8916
8917        let mut jes = Vec::with_capacity(payroll_runs.len());
8918
8919        for run in payroll_runs {
8920            let mut je = JournalEntry::new_simple(
8921                format!("JE-PAYROLL-{}", run.payroll_id),
8922                run.company_code.clone(),
8923                run.run_date,
8924                format!("Payroll {}", run.payroll_id),
8925            );
8926
8927            // Debit Salaries & Wages for gross pay
8928            je.add_line(JournalEntryLine {
8929                line_number: 1,
8930                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8931                debit_amount: run.total_gross,
8932                reference: Some(run.payroll_id.clone()),
8933                text: Some(format!(
8934                    "Payroll {} ({} employees)",
8935                    run.payroll_id, run.employee_count
8936                )),
8937                ..Default::default()
8938            });
8939
8940            // Credit Payroll Clearing for gross pay
8941            je.add_line(JournalEntryLine {
8942                line_number: 2,
8943                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8944                credit_amount: run.total_gross,
8945                reference: Some(run.payroll_id.clone()),
8946                ..Default::default()
8947            });
8948
8949            jes.push(je);
8950        }
8951
8952        jes
8953    }
8954
8955    /// Generate journal entries from production orders.
8956    ///
8957    /// Creates one JE per completed production order:
8958    /// - DR Raw Materials (5100) for material consumption (actual_cost)
8959    /// - CR Inventory (1200) for material consumption
8960    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8961        use datasynth_core::accounts::{control_accounts, expense_accounts};
8962        use datasynth_core::models::ProductionOrderStatus;
8963
8964        let mut jes = Vec::new();
8965
8966        for order in production_orders {
8967            // Only generate JEs for completed or closed orders
8968            if !matches!(
8969                order.status,
8970                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8971            ) {
8972                continue;
8973            }
8974
8975            let mut je = JournalEntry::new_simple(
8976                format!("JE-MFG-{}", order.order_id),
8977                order.company_code.clone(),
8978                order.actual_end.unwrap_or(order.planned_end),
8979                format!(
8980                    "Production Order {} - {}",
8981                    order.order_id, order.material_description
8982                ),
8983            );
8984
8985            // Debit Raw Materials / Manufacturing expense for actual cost
8986            je.add_line(JournalEntryLine {
8987                line_number: 1,
8988                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8989                debit_amount: order.actual_cost,
8990                reference: Some(order.order_id.clone()),
8991                text: Some(format!(
8992                    "Material consumption for {}",
8993                    order.material_description
8994                )),
8995                quantity: Some(order.actual_quantity),
8996                unit: Some("EA".to_string()),
8997                ..Default::default()
8998            });
8999
9000            // Credit Inventory for material consumption
9001            je.add_line(JournalEntryLine {
9002                line_number: 2,
9003                gl_account: control_accounts::INVENTORY.to_string(),
9004                credit_amount: order.actual_cost,
9005                reference: Some(order.order_id.clone()),
9006                ..Default::default()
9007            });
9008
9009            jes.push(je);
9010        }
9011
9012        jes
9013    }
9014
9015    /// Link document flows to subledger records.
9016    ///
9017    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9018    /// ensuring subledger data is coherent with document flow data.
9019    fn link_document_flows_to_subledgers(
9020        &mut self,
9021        flows: &DocumentFlowSnapshot,
9022    ) -> SynthResult<SubledgerSnapshot> {
9023        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9024        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9025
9026        // Build vendor/customer name maps from master data for realistic subledger names
9027        let vendor_names: std::collections::HashMap<String, String> = self
9028            .master_data
9029            .vendors
9030            .iter()
9031            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9032            .collect();
9033        let customer_names: std::collections::HashMap<String, String> = self
9034            .master_data
9035            .customers
9036            .iter()
9037            .map(|c| (c.customer_id.clone(), c.name.clone()))
9038            .collect();
9039
9040        let mut linker = DocumentFlowLinker::new()
9041            .with_vendor_names(vendor_names)
9042            .with_customer_names(customer_names);
9043
9044        // Convert vendor invoices to AP invoices
9045        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9046        if let Some(pb) = &pb {
9047            pb.inc(flows.vendor_invoices.len() as u64);
9048        }
9049
9050        // Convert customer invoices to AR invoices
9051        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9052        if let Some(pb) = &pb {
9053            pb.inc(flows.customer_invoices.len() as u64);
9054        }
9055
9056        if let Some(pb) = pb {
9057            pb.finish_with_message(format!(
9058                "Linked {} AP and {} AR invoices",
9059                ap_invoices.len(),
9060                ar_invoices.len()
9061            ));
9062        }
9063
9064        Ok(SubledgerSnapshot {
9065            ap_invoices,
9066            ar_invoices,
9067            fa_records: Vec::new(),
9068            inventory_positions: Vec::new(),
9069            inventory_movements: Vec::new(),
9070            // Aging reports are computed after payment settlement in phase_document_flows.
9071            ar_aging_reports: Vec::new(),
9072            ap_aging_reports: Vec::new(),
9073            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9074            depreciation_runs: Vec::new(),
9075            inventory_valuations: Vec::new(),
9076            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9077            dunning_runs: Vec::new(),
9078            dunning_letters: Vec::new(),
9079        })
9080    }
9081
9082    /// Generate OCPM events from document flows.
9083    ///
9084    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9085    /// capturing the object-centric process perspective.
9086    #[allow(clippy::too_many_arguments)]
9087    fn generate_ocpm_events(
9088        &mut self,
9089        flows: &DocumentFlowSnapshot,
9090        sourcing: &SourcingSnapshot,
9091        hr: &HrSnapshot,
9092        manufacturing: &ManufacturingSnapshot,
9093        banking: &BankingSnapshot,
9094        audit: &AuditSnapshot,
9095        financial_reporting: &FinancialReportingSnapshot,
9096    ) -> SynthResult<OcpmSnapshot> {
9097        let total_chains = flows.p2p_chains.len()
9098            + flows.o2c_chains.len()
9099            + sourcing.sourcing_projects.len()
9100            + hr.payroll_runs.len()
9101            + manufacturing.production_orders.len()
9102            + banking.customers.len()
9103            + audit.engagements.len()
9104            + financial_reporting.bank_reconciliations.len();
9105        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9106
9107        // Create OCPM event log with standard types
9108        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9109        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9110
9111        // Configure the OCPM generator
9112        let ocpm_config = OcpmGeneratorConfig {
9113            generate_p2p: true,
9114            generate_o2c: true,
9115            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9116            generate_h2r: !hr.payroll_runs.is_empty(),
9117            generate_mfg: !manufacturing.production_orders.is_empty(),
9118            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9119            generate_bank: !banking.customers.is_empty(),
9120            generate_audit: !audit.engagements.is_empty(),
9121            happy_path_rate: 0.75,
9122            exception_path_rate: 0.20,
9123            error_path_rate: 0.05,
9124            add_duration_variability: true,
9125            duration_std_dev_factor: 0.3,
9126        };
9127        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9128        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9129
9130        // Get available users for resource assignment
9131        let available_users: Vec<String> = self
9132            .master_data
9133            .employees
9134            .iter()
9135            .take(20)
9136            .map(|e| e.user_id.clone())
9137            .collect();
9138
9139        // Deterministic base date from config (avoids Utc::now() non-determinism)
9140        let fallback_date =
9141            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9142        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9143            .unwrap_or(fallback_date);
9144        let base_midnight = base_date
9145            .and_hms_opt(0, 0, 0)
9146            .expect("midnight is always valid");
9147        let base_datetime =
9148            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9149
9150        // Helper closure to add case results to event log
9151        let add_result = |event_log: &mut OcpmEventLog,
9152                          result: datasynth_ocpm::CaseGenerationResult| {
9153            for event in result.events {
9154                event_log.add_event(event);
9155            }
9156            for object in result.objects {
9157                event_log.add_object(object);
9158            }
9159            for relationship in result.relationships {
9160                event_log.add_relationship(relationship);
9161            }
9162            for corr in result.correlation_events {
9163                event_log.add_correlation_event(corr);
9164            }
9165            event_log.add_case(result.case_trace);
9166        };
9167
9168        // Generate events from P2P chains
9169        for chain in &flows.p2p_chains {
9170            let po = &chain.purchase_order;
9171            let documents = P2pDocuments::new(
9172                &po.header.document_id,
9173                &po.vendor_id,
9174                &po.header.company_code,
9175                po.total_net_amount,
9176                &po.header.currency,
9177                &ocpm_uuid_factory,
9178            )
9179            .with_goods_receipt(
9180                chain
9181                    .goods_receipts
9182                    .first()
9183                    .map(|gr| gr.header.document_id.as_str())
9184                    .unwrap_or(""),
9185                &ocpm_uuid_factory,
9186            )
9187            .with_invoice(
9188                chain
9189                    .vendor_invoice
9190                    .as_ref()
9191                    .map(|vi| vi.header.document_id.as_str())
9192                    .unwrap_or(""),
9193                &ocpm_uuid_factory,
9194            )
9195            .with_payment(
9196                chain
9197                    .payment
9198                    .as_ref()
9199                    .map(|p| p.header.document_id.as_str())
9200                    .unwrap_or(""),
9201                &ocpm_uuid_factory,
9202            );
9203
9204            let start_time =
9205                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9206            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9207            add_result(&mut event_log, result);
9208
9209            if let Some(pb) = &pb {
9210                pb.inc(1);
9211            }
9212        }
9213
9214        // Generate events from O2C chains
9215        for chain in &flows.o2c_chains {
9216            let so = &chain.sales_order;
9217            let documents = O2cDocuments::new(
9218                &so.header.document_id,
9219                &so.customer_id,
9220                &so.header.company_code,
9221                so.total_net_amount,
9222                &so.header.currency,
9223                &ocpm_uuid_factory,
9224            )
9225            .with_delivery(
9226                chain
9227                    .deliveries
9228                    .first()
9229                    .map(|d| d.header.document_id.as_str())
9230                    .unwrap_or(""),
9231                &ocpm_uuid_factory,
9232            )
9233            .with_invoice(
9234                chain
9235                    .customer_invoice
9236                    .as_ref()
9237                    .map(|ci| ci.header.document_id.as_str())
9238                    .unwrap_or(""),
9239                &ocpm_uuid_factory,
9240            )
9241            .with_receipt(
9242                chain
9243                    .customer_receipt
9244                    .as_ref()
9245                    .map(|r| r.header.document_id.as_str())
9246                    .unwrap_or(""),
9247                &ocpm_uuid_factory,
9248            );
9249
9250            let start_time =
9251                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9252            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9253            add_result(&mut event_log, result);
9254
9255            if let Some(pb) = &pb {
9256                pb.inc(1);
9257            }
9258        }
9259
9260        // Generate events from S2C sourcing projects
9261        for project in &sourcing.sourcing_projects {
9262            // Find vendor from contracts or qualifications
9263            let vendor_id = sourcing
9264                .contracts
9265                .iter()
9266                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9267                .map(|c| c.vendor_id.clone())
9268                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9269                .or_else(|| {
9270                    self.master_data
9271                        .vendors
9272                        .first()
9273                        .map(|v| v.vendor_id.clone())
9274                })
9275                .unwrap_or_else(|| "V000".to_string());
9276            let mut docs = S2cDocuments::new(
9277                &project.project_id,
9278                &vendor_id,
9279                &project.company_code,
9280                project.estimated_annual_spend,
9281                &ocpm_uuid_factory,
9282            );
9283            // Link RFx if available
9284            if let Some(rfx) = sourcing
9285                .rfx_events
9286                .iter()
9287                .find(|r| r.sourcing_project_id == project.project_id)
9288            {
9289                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9290                // Link winning bid (status == Accepted)
9291                if let Some(bid) = sourcing.bids.iter().find(|b| {
9292                    b.rfx_id == rfx.rfx_id
9293                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9294                }) {
9295                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9296                }
9297            }
9298            // Link contract
9299            if let Some(contract) = sourcing
9300                .contracts
9301                .iter()
9302                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9303            {
9304                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9305            }
9306            let start_time = base_datetime - chrono::Duration::days(90);
9307            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9308            add_result(&mut event_log, result);
9309
9310            if let Some(pb) = &pb {
9311                pb.inc(1);
9312            }
9313        }
9314
9315        // Generate events from H2R payroll runs
9316        for run in &hr.payroll_runs {
9317            // Use first matching payroll line item's employee, or fallback
9318            let employee_id = hr
9319                .payroll_line_items
9320                .iter()
9321                .find(|li| li.payroll_id == run.payroll_id)
9322                .map(|li| li.employee_id.as_str())
9323                .unwrap_or("EMP000");
9324            let docs = H2rDocuments::new(
9325                &run.payroll_id,
9326                employee_id,
9327                &run.company_code,
9328                run.total_gross,
9329                &ocpm_uuid_factory,
9330            )
9331            .with_time_entries(
9332                hr.time_entries
9333                    .iter()
9334                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9335                    .take(5)
9336                    .map(|t| t.entry_id.as_str())
9337                    .collect(),
9338            );
9339            let start_time = base_datetime - chrono::Duration::days(30);
9340            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9341            add_result(&mut event_log, result);
9342
9343            if let Some(pb) = &pb {
9344                pb.inc(1);
9345            }
9346        }
9347
9348        // Generate events from MFG production orders
9349        for order in &manufacturing.production_orders {
9350            let mut docs = MfgDocuments::new(
9351                &order.order_id,
9352                &order.material_id,
9353                &order.company_code,
9354                order.planned_quantity,
9355                &ocpm_uuid_factory,
9356            )
9357            .with_operations(
9358                order
9359                    .operations
9360                    .iter()
9361                    .map(|o| format!("OP-{:04}", o.operation_number))
9362                    .collect::<Vec<_>>()
9363                    .iter()
9364                    .map(std::string::String::as_str)
9365                    .collect(),
9366            );
9367            // Link quality inspection if available (via reference_id matching order_id)
9368            if let Some(insp) = manufacturing
9369                .quality_inspections
9370                .iter()
9371                .find(|i| i.reference_id == order.order_id)
9372            {
9373                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9374            }
9375            // Link cycle count if available (match by material_id in items)
9376            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9377                cc.items
9378                    .iter()
9379                    .any(|item| item.material_id == order.material_id)
9380            }) {
9381                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9382            }
9383            let start_time = base_datetime - chrono::Duration::days(60);
9384            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9385            add_result(&mut event_log, result);
9386
9387            if let Some(pb) = &pb {
9388                pb.inc(1);
9389            }
9390        }
9391
9392        // Generate events from Banking customers
9393        for customer in &banking.customers {
9394            let customer_id_str = customer.customer_id.to_string();
9395            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9396            // Link accounts (primary_owner_id matches customer_id)
9397            if let Some(account) = banking
9398                .accounts
9399                .iter()
9400                .find(|a| a.primary_owner_id == customer.customer_id)
9401            {
9402                let account_id_str = account.account_id.to_string();
9403                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9404                // Link transactions for this account
9405                let txn_strs: Vec<String> = banking
9406                    .transactions
9407                    .iter()
9408                    .filter(|t| t.account_id == account.account_id)
9409                    .take(10)
9410                    .map(|t| t.transaction_id.to_string())
9411                    .collect();
9412                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9413                let txn_amounts: Vec<rust_decimal::Decimal> = banking
9414                    .transactions
9415                    .iter()
9416                    .filter(|t| t.account_id == account.account_id)
9417                    .take(10)
9418                    .map(|t| t.amount)
9419                    .collect();
9420                if !txn_ids.is_empty() {
9421                    docs = docs.with_transactions(txn_ids, txn_amounts);
9422                }
9423            }
9424            let start_time = base_datetime - chrono::Duration::days(180);
9425            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9426            add_result(&mut event_log, result);
9427
9428            if let Some(pb) = &pb {
9429                pb.inc(1);
9430            }
9431        }
9432
9433        // Generate events from Audit engagements
9434        for engagement in &audit.engagements {
9435            let engagement_id_str = engagement.engagement_id.to_string();
9436            let docs = AuditDocuments::new(
9437                &engagement_id_str,
9438                &engagement.client_entity_id,
9439                &ocpm_uuid_factory,
9440            )
9441            .with_workpapers(
9442                audit
9443                    .workpapers
9444                    .iter()
9445                    .filter(|w| w.engagement_id == engagement.engagement_id)
9446                    .take(10)
9447                    .map(|w| w.workpaper_id.to_string())
9448                    .collect::<Vec<_>>()
9449                    .iter()
9450                    .map(std::string::String::as_str)
9451                    .collect(),
9452            )
9453            .with_evidence(
9454                audit
9455                    .evidence
9456                    .iter()
9457                    .filter(|e| e.engagement_id == engagement.engagement_id)
9458                    .take(10)
9459                    .map(|e| e.evidence_id.to_string())
9460                    .collect::<Vec<_>>()
9461                    .iter()
9462                    .map(std::string::String::as_str)
9463                    .collect(),
9464            )
9465            .with_risks(
9466                audit
9467                    .risk_assessments
9468                    .iter()
9469                    .filter(|r| r.engagement_id == engagement.engagement_id)
9470                    .take(5)
9471                    .map(|r| r.risk_id.to_string())
9472                    .collect::<Vec<_>>()
9473                    .iter()
9474                    .map(std::string::String::as_str)
9475                    .collect(),
9476            )
9477            .with_findings(
9478                audit
9479                    .findings
9480                    .iter()
9481                    .filter(|f| f.engagement_id == engagement.engagement_id)
9482                    .take(5)
9483                    .map(|f| f.finding_id.to_string())
9484                    .collect::<Vec<_>>()
9485                    .iter()
9486                    .map(std::string::String::as_str)
9487                    .collect(),
9488            )
9489            .with_judgments(
9490                audit
9491                    .judgments
9492                    .iter()
9493                    .filter(|j| j.engagement_id == engagement.engagement_id)
9494                    .take(5)
9495                    .map(|j| j.judgment_id.to_string())
9496                    .collect::<Vec<_>>()
9497                    .iter()
9498                    .map(std::string::String::as_str)
9499                    .collect(),
9500            );
9501            let start_time = base_datetime - chrono::Duration::days(120);
9502            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
9503            add_result(&mut event_log, result);
9504
9505            if let Some(pb) = &pb {
9506                pb.inc(1);
9507            }
9508        }
9509
9510        // Generate events from Bank Reconciliations
9511        for recon in &financial_reporting.bank_reconciliations {
9512            let docs = BankReconDocuments::new(
9513                &recon.reconciliation_id,
9514                &recon.bank_account_id,
9515                &recon.company_code,
9516                recon.bank_ending_balance,
9517                &ocpm_uuid_factory,
9518            )
9519            .with_statement_lines(
9520                recon
9521                    .statement_lines
9522                    .iter()
9523                    .take(20)
9524                    .map(|l| l.line_id.as_str())
9525                    .collect(),
9526            )
9527            .with_reconciling_items(
9528                recon
9529                    .reconciling_items
9530                    .iter()
9531                    .take(10)
9532                    .map(|i| i.item_id.as_str())
9533                    .collect(),
9534            );
9535            let start_time = base_datetime - chrono::Duration::days(30);
9536            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
9537            add_result(&mut event_log, result);
9538
9539            if let Some(pb) = &pb {
9540                pb.inc(1);
9541            }
9542        }
9543
9544        // Compute process variants
9545        event_log.compute_variants();
9546
9547        let summary = event_log.summary();
9548
9549        if let Some(pb) = pb {
9550            pb.finish_with_message(format!(
9551                "Generated {} OCPM events, {} objects",
9552                summary.event_count, summary.object_count
9553            ));
9554        }
9555
9556        Ok(OcpmSnapshot {
9557            event_count: summary.event_count,
9558            object_count: summary.object_count,
9559            case_count: summary.case_count,
9560            event_log: Some(event_log),
9561        })
9562    }
9563
9564    /// Inject anomalies into journal entries.
9565    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
9566        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
9567
9568        // Read anomaly rates from config instead of using hardcoded values.
9569        // Priority: anomaly_injection config > fraud config > default 0.02
9570        let total_rate = if self.config.anomaly_injection.enabled {
9571            self.config.anomaly_injection.rates.total_rate
9572        } else if self.config.fraud.enabled {
9573            self.config.fraud.fraud_rate
9574        } else {
9575            0.02
9576        };
9577
9578        let fraud_rate = if self.config.anomaly_injection.enabled {
9579            self.config.anomaly_injection.rates.fraud_rate
9580        } else {
9581            AnomalyRateConfig::default().fraud_rate
9582        };
9583
9584        let error_rate = if self.config.anomaly_injection.enabled {
9585            self.config.anomaly_injection.rates.error_rate
9586        } else {
9587            AnomalyRateConfig::default().error_rate
9588        };
9589
9590        let process_issue_rate = if self.config.anomaly_injection.enabled {
9591            self.config.anomaly_injection.rates.process_rate
9592        } else {
9593            AnomalyRateConfig::default().process_issue_rate
9594        };
9595
9596        let anomaly_config = AnomalyInjectorConfig {
9597            rates: AnomalyRateConfig {
9598                total_rate,
9599                fraud_rate,
9600                error_rate,
9601                process_issue_rate,
9602                ..Default::default()
9603            },
9604            seed: self.seed + 5000,
9605            ..Default::default()
9606        };
9607
9608        let mut injector = AnomalyInjector::new(anomaly_config);
9609        let result = injector.process_entries(entries);
9610
9611        if let Some(pb) = &pb {
9612            pb.inc(entries.len() as u64);
9613            pb.finish_with_message("Anomaly injection complete");
9614        }
9615
9616        let mut by_type = HashMap::new();
9617        for label in &result.labels {
9618            *by_type
9619                .entry(format!("{:?}", label.anomaly_type))
9620                .or_insert(0) += 1;
9621        }
9622
9623        Ok(AnomalyLabels {
9624            labels: result.labels,
9625            summary: Some(result.summary),
9626            by_type,
9627        })
9628    }
9629
9630    /// Validate journal entries using running balance tracker.
9631    ///
9632    /// Applies all entries to the balance tracker and validates:
9633    /// - Each entry is internally balanced (debits = credits)
9634    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
9635    ///
9636    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
9637    /// excluded from balance validation as they may be intentionally unbalanced.
9638    fn validate_journal_entries(
9639        &mut self,
9640        entries: &[JournalEntry],
9641    ) -> SynthResult<BalanceValidationResult> {
9642        // Filter out entries with human errors as they may be intentionally unbalanced
9643        let clean_entries: Vec<&JournalEntry> = entries
9644            .iter()
9645            .filter(|e| {
9646                e.header
9647                    .header_text
9648                    .as_ref()
9649                    .map(|t| !t.contains("[HUMAN_ERROR:"))
9650                    .unwrap_or(true)
9651            })
9652            .collect();
9653
9654        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9655
9656        // Configure tracker to not fail on errors (collect them instead)
9657        let config = BalanceTrackerConfig {
9658            validate_on_each_entry: false,   // We'll validate at the end
9659            track_history: false,            // Skip history for performance
9660            fail_on_validation_error: false, // Collect errors, don't fail
9661            ..Default::default()
9662        };
9663        let validation_currency = self
9664            .config
9665            .companies
9666            .first()
9667            .map(|c| c.currency.clone())
9668            .unwrap_or_else(|| "USD".to_string());
9669
9670        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9671
9672        // Apply clean entries (without human errors)
9673        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9674        let errors = tracker.apply_entries(&clean_refs);
9675
9676        if let Some(pb) = &pb {
9677            pb.inc(entries.len() as u64);
9678        }
9679
9680        // Check if any entries were unbalanced
9681        // Note: When fail_on_validation_error is false, errors are stored in tracker
9682        let has_unbalanced = tracker
9683            .get_validation_errors()
9684            .iter()
9685            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9686
9687        // Validate balance sheet for each company
9688        // Include both returned errors and collected validation errors
9689        let mut all_errors = errors;
9690        all_errors.extend(tracker.get_validation_errors().iter().cloned());
9691        let company_codes: Vec<String> = self
9692            .config
9693            .companies
9694            .iter()
9695            .map(|c| c.code.clone())
9696            .collect();
9697
9698        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9699            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9700            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9701
9702        for company_code in &company_codes {
9703            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9704                all_errors.push(e);
9705            }
9706        }
9707
9708        // Get statistics after all mutable operations are done
9709        let stats = tracker.get_statistics();
9710
9711        // Determine if balanced overall
9712        let is_balanced = all_errors.is_empty();
9713
9714        if let Some(pb) = pb {
9715            let msg = if is_balanced {
9716                "Balance validation passed"
9717            } else {
9718                "Balance validation completed with errors"
9719            };
9720            pb.finish_with_message(msg);
9721        }
9722
9723        Ok(BalanceValidationResult {
9724            validated: true,
9725            is_balanced,
9726            entries_processed: stats.entries_processed,
9727            total_debits: stats.total_debits,
9728            total_credits: stats.total_credits,
9729            accounts_tracked: stats.accounts_tracked,
9730            companies_tracked: stats.companies_tracked,
9731            validation_errors: all_errors,
9732            has_unbalanced_entries: has_unbalanced,
9733        })
9734    }
9735
9736    /// Inject data quality variations into journal entries.
9737    ///
9738    /// Applies typos, missing values, and format variations to make
9739    /// the synthetic data more realistic for testing data cleaning pipelines.
9740    fn inject_data_quality(
9741        &mut self,
9742        entries: &mut [JournalEntry],
9743    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
9744        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9745
9746        // Build config from user-specified schema settings when data_quality is enabled;
9747        // otherwise fall back to the low-rate minimal() preset.
9748        let config = if self.config.data_quality.enabled {
9749            let dq = &self.config.data_quality;
9750            DataQualityConfig {
9751                enable_missing_values: dq.missing_values.enabled,
9752                missing_values: datasynth_generators::MissingValueConfig {
9753                    global_rate: dq.effective_missing_rate(),
9754                    ..Default::default()
9755                },
9756                enable_format_variations: dq.format_variations.enabled,
9757                format_variations: datasynth_generators::FormatVariationConfig {
9758                    date_variation_rate: dq.format_variations.dates.rate,
9759                    amount_variation_rate: dq.format_variations.amounts.rate,
9760                    identifier_variation_rate: dq.format_variations.identifiers.rate,
9761                    ..Default::default()
9762                },
9763                enable_duplicates: dq.duplicates.enabled,
9764                duplicates: datasynth_generators::DuplicateConfig {
9765                    duplicate_rate: dq.effective_duplicate_rate(),
9766                    ..Default::default()
9767                },
9768                enable_typos: dq.typos.enabled,
9769                typos: datasynth_generators::TypoConfig {
9770                    char_error_rate: dq.effective_typo_rate(),
9771                    ..Default::default()
9772                },
9773                enable_encoding_issues: dq.encoding_issues.enabled,
9774                encoding_issue_rate: dq.encoding_issues.rate,
9775                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
9776                track_statistics: true,
9777            }
9778        } else {
9779            DataQualityConfig::minimal()
9780        };
9781        let mut injector = DataQualityInjector::new(config);
9782
9783        // Wire country pack for locale-aware format baselines
9784        injector.set_country_pack(self.primary_pack().clone());
9785
9786        // Build context for missing value decisions
9787        let context = HashMap::new();
9788
9789        for entry in entries.iter_mut() {
9790            // Process header_text field (common target for typos)
9791            if let Some(text) = &entry.header.header_text {
9792                let processed = injector.process_text_field(
9793                    "header_text",
9794                    text,
9795                    &entry.header.document_id.to_string(),
9796                    &context,
9797                );
9798                match processed {
9799                    Some(new_text) if new_text != *text => {
9800                        entry.header.header_text = Some(new_text);
9801                    }
9802                    None => {
9803                        entry.header.header_text = None; // Missing value
9804                    }
9805                    _ => {}
9806                }
9807            }
9808
9809            // Process reference field
9810            if let Some(ref_text) = &entry.header.reference {
9811                let processed = injector.process_text_field(
9812                    "reference",
9813                    ref_text,
9814                    &entry.header.document_id.to_string(),
9815                    &context,
9816                );
9817                match processed {
9818                    Some(new_text) if new_text != *ref_text => {
9819                        entry.header.reference = Some(new_text);
9820                    }
9821                    None => {
9822                        entry.header.reference = None;
9823                    }
9824                    _ => {}
9825                }
9826            }
9827
9828            // Process user_persona field (potential for typos in user IDs)
9829            let user_persona = entry.header.user_persona.clone();
9830            if let Some(processed) = injector.process_text_field(
9831                "user_persona",
9832                &user_persona,
9833                &entry.header.document_id.to_string(),
9834                &context,
9835            ) {
9836                if processed != user_persona {
9837                    entry.header.user_persona = processed;
9838                }
9839            }
9840
9841            // Process line items
9842            for line in &mut entry.lines {
9843                // Process line description if present
9844                if let Some(ref text) = line.line_text {
9845                    let processed = injector.process_text_field(
9846                        "line_text",
9847                        text,
9848                        &entry.header.document_id.to_string(),
9849                        &context,
9850                    );
9851                    match processed {
9852                        Some(new_text) if new_text != *text => {
9853                            line.line_text = Some(new_text);
9854                        }
9855                        None => {
9856                            line.line_text = None;
9857                        }
9858                        _ => {}
9859                    }
9860                }
9861
9862                // Process cost_center if present
9863                if let Some(cc) = &line.cost_center {
9864                    let processed = injector.process_text_field(
9865                        "cost_center",
9866                        cc,
9867                        &entry.header.document_id.to_string(),
9868                        &context,
9869                    );
9870                    match processed {
9871                        Some(new_cc) if new_cc != *cc => {
9872                            line.cost_center = Some(new_cc);
9873                        }
9874                        None => {
9875                            line.cost_center = None;
9876                        }
9877                        _ => {}
9878                    }
9879                }
9880            }
9881
9882            if let Some(pb) = &pb {
9883                pb.inc(1);
9884            }
9885        }
9886
9887        if let Some(pb) = pb {
9888            pb.finish_with_message("Data quality injection complete");
9889        }
9890
9891        let quality_issues = injector.issues().to_vec();
9892        Ok((injector.stats().clone(), quality_issues))
9893    }
9894
9895    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
9896    ///
9897    /// Creates complete audit documentation for each company in the configuration,
9898    /// following ISA standards:
9899    /// - ISA 210/220: Engagement acceptance and terms
9900    /// - ISA 230: Audit documentation (workpapers)
9901    /// - ISA 265: Control deficiencies (findings)
9902    /// - ISA 315/330: Risk assessment and response
9903    /// - ISA 500: Audit evidence
9904    /// - ISA 200: Professional judgment
9905    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9906        // Check if FSM-driven audit generation is enabled
9907        let use_fsm = self
9908            .config
9909            .audit
9910            .fsm
9911            .as_ref()
9912            .map(|f| f.enabled)
9913            .unwrap_or(false);
9914
9915        if use_fsm {
9916            return self.generate_audit_data_with_fsm(entries);
9917        }
9918
9919        // --- Legacy (non-FSM) audit generation follows ---
9920        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9921            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9922        let fiscal_year = start_date.year() as u16;
9923        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9924
9925        // Calculate rough total revenue from entries for materiality
9926        let total_revenue: rust_decimal::Decimal = entries
9927            .iter()
9928            .flat_map(|e| e.lines.iter())
9929            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9930            .map(|l| l.credit_amount)
9931            .sum();
9932
9933        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
9934        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9935
9936        let mut snapshot = AuditSnapshot::default();
9937
9938        // Initialize generators
9939        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9940        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9941        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9942        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9943        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9944        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9945        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9946        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9947        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9948        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9949        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9950        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9951
9952        // Get list of accounts from CoA for risk assessment
9953        let accounts: Vec<String> = self
9954            .coa
9955            .as_ref()
9956            .map(|coa| {
9957                coa.get_postable_accounts()
9958                    .iter()
9959                    .map(|acc| acc.account_code().to_string())
9960                    .collect()
9961            })
9962            .unwrap_or_default();
9963
9964        // Generate engagements for each company
9965        for (i, company) in self.config.companies.iter().enumerate() {
9966            // Calculate company-specific revenue (proportional to volume weight)
9967            let company_revenue = total_revenue
9968                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9969
9970            // Generate engagements for this company
9971            let engagements_for_company =
9972                self.phase_config.audit_engagements / self.config.companies.len().max(1);
9973            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9974                1
9975            } else {
9976                0
9977            };
9978
9979            for _eng_idx in 0..(engagements_for_company + extra) {
9980                // Generate the engagement
9981                let mut engagement = engagement_gen.generate_engagement(
9982                    &company.code,
9983                    &company.name,
9984                    fiscal_year,
9985                    period_end,
9986                    company_revenue,
9987                    None, // Use default engagement type
9988                );
9989
9990                // Replace synthetic team IDs with real employee IDs from master data
9991                if !self.master_data.employees.is_empty() {
9992                    let emp_count = self.master_data.employees.len();
9993                    // Use employee IDs deterministically based on engagement index
9994                    let base = (i * 10 + _eng_idx) % emp_count;
9995                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9996                        .employee_id
9997                        .clone();
9998                    engagement.engagement_manager_id = self.master_data.employees
9999                        [(base + 1) % emp_count]
10000                        .employee_id
10001                        .clone();
10002                    let real_team: Vec<String> = engagement
10003                        .team_member_ids
10004                        .iter()
10005                        .enumerate()
10006                        .map(|(j, _)| {
10007                            self.master_data.employees[(base + 2 + j) % emp_count]
10008                                .employee_id
10009                                .clone()
10010                        })
10011                        .collect();
10012                    engagement.team_member_ids = real_team;
10013                }
10014
10015                if let Some(pb) = &pb {
10016                    pb.inc(1);
10017                }
10018
10019                // Get team members from the engagement
10020                let team_members: Vec<String> = engagement.team_member_ids.clone();
10021
10022                // Generate workpapers for the engagement
10023                let workpapers =
10024                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10025
10026                for wp in &workpapers {
10027                    if let Some(pb) = &pb {
10028                        pb.inc(1);
10029                    }
10030
10031                    // Generate evidence for each workpaper
10032                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10033                        wp,
10034                        &team_members,
10035                        wp.preparer_date,
10036                    );
10037
10038                    for _ in &evidence {
10039                        if let Some(pb) = &pb {
10040                            pb.inc(1);
10041                        }
10042                    }
10043
10044                    snapshot.evidence.extend(evidence);
10045                }
10046
10047                // Generate risk assessments for the engagement
10048                let risks =
10049                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10050
10051                for _ in &risks {
10052                    if let Some(pb) = &pb {
10053                        pb.inc(1);
10054                    }
10055                }
10056                snapshot.risk_assessments.extend(risks);
10057
10058                // Generate findings for the engagement
10059                let findings = finding_gen.generate_findings_for_engagement(
10060                    &engagement,
10061                    &workpapers,
10062                    &team_members,
10063                );
10064
10065                for _ in &findings {
10066                    if let Some(pb) = &pb {
10067                        pb.inc(1);
10068                    }
10069                }
10070                snapshot.findings.extend(findings);
10071
10072                // Generate professional judgments for the engagement
10073                let judgments =
10074                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10075
10076                for _ in &judgments {
10077                    if let Some(pb) = &pb {
10078                        pb.inc(1);
10079                    }
10080                }
10081                snapshot.judgments.extend(judgments);
10082
10083                // ISA 505: External confirmations and responses
10084                let (confs, resps) =
10085                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10086                snapshot.confirmations.extend(confs);
10087                snapshot.confirmation_responses.extend(resps);
10088
10089                // ISA 330: Procedure steps per workpaper
10090                let team_pairs: Vec<(String, String)> = team_members
10091                    .iter()
10092                    .map(|id| {
10093                        let name = self
10094                            .master_data
10095                            .employees
10096                            .iter()
10097                            .find(|e| e.employee_id == *id)
10098                            .map(|e| e.display_name.clone())
10099                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10100                        (id.clone(), name)
10101                    })
10102                    .collect();
10103                for wp in &workpapers {
10104                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10105                    snapshot.procedure_steps.extend(steps);
10106                }
10107
10108                // ISA 530: Samples per workpaper
10109                for wp in &workpapers {
10110                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10111                        snapshot.samples.push(sample);
10112                    }
10113                }
10114
10115                // ISA 520: Analytical procedures
10116                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10117                snapshot.analytical_results.extend(analytical);
10118
10119                // ISA 610: Internal audit function and reports
10120                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10121                snapshot.ia_functions.push(ia_func);
10122                snapshot.ia_reports.extend(ia_reports);
10123
10124                // ISA 550: Related parties and transactions
10125                let vendor_names: Vec<String> = self
10126                    .master_data
10127                    .vendors
10128                    .iter()
10129                    .map(|v| v.name.clone())
10130                    .collect();
10131                let customer_names: Vec<String> = self
10132                    .master_data
10133                    .customers
10134                    .iter()
10135                    .map(|c| c.name.clone())
10136                    .collect();
10137                let (parties, rp_txns) =
10138                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10139                snapshot.related_parties.extend(parties);
10140                snapshot.related_party_transactions.extend(rp_txns);
10141
10142                // Add workpapers after findings since findings need them
10143                snapshot.workpapers.extend(workpapers);
10144
10145                // Generate audit scope record for this engagement (one per engagement)
10146                {
10147                    let scope_id = format!(
10148                        "SCOPE-{}-{}",
10149                        engagement.engagement_id.simple(),
10150                        &engagement.client_entity_id
10151                    );
10152                    let scope = datasynth_core::models::audit::AuditScope::new(
10153                        scope_id.clone(),
10154                        engagement.engagement_id.to_string(),
10155                        engagement.client_entity_id.clone(),
10156                        engagement.materiality,
10157                    );
10158                    // Wire scope_id back to engagement
10159                    let mut eng = engagement;
10160                    eng.scope_id = Some(scope_id);
10161                    snapshot.audit_scopes.push(scope);
10162                    snapshot.engagements.push(eng);
10163                }
10164            }
10165        }
10166
10167        // ----------------------------------------------------------------
10168        // ISA 600: Group audit — component auditors, plan, instructions, reports
10169        // ----------------------------------------------------------------
10170        if self.config.companies.len() > 1 {
10171            // Use materiality from the first engagement if available, otherwise
10172            // derive a reasonable figure from total revenue.
10173            let group_materiality = snapshot
10174                .engagements
10175                .first()
10176                .map(|e| e.materiality)
10177                .unwrap_or_else(|| {
10178                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10179                    total_revenue * pct
10180                });
10181
10182            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10183            let group_engagement_id = snapshot
10184                .engagements
10185                .first()
10186                .map(|e| e.engagement_id.to_string())
10187                .unwrap_or_else(|| "GROUP-ENG".to_string());
10188
10189            let component_snapshot = component_gen.generate(
10190                &self.config.companies,
10191                group_materiality,
10192                &group_engagement_id,
10193                period_end,
10194            );
10195
10196            snapshot.component_auditors = component_snapshot.component_auditors;
10197            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10198            snapshot.component_instructions = component_snapshot.component_instructions;
10199            snapshot.component_reports = component_snapshot.component_reports;
10200
10201            info!(
10202                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10203                snapshot.component_auditors.len(),
10204                snapshot.component_instructions.len(),
10205                snapshot.component_reports.len(),
10206            );
10207        }
10208
10209        // ----------------------------------------------------------------
10210        // ISA 210: Engagement letters — one per engagement
10211        // ----------------------------------------------------------------
10212        {
10213            let applicable_framework = self
10214                .config
10215                .accounting_standards
10216                .framework
10217                .as_ref()
10218                .map(|f| format!("{f:?}"))
10219                .unwrap_or_else(|| "IFRS".to_string());
10220
10221            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10222            let entity_count = self.config.companies.len();
10223
10224            for engagement in &snapshot.engagements {
10225                let company = self
10226                    .config
10227                    .companies
10228                    .iter()
10229                    .find(|c| c.code == engagement.client_entity_id);
10230                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10231                let letter_date = engagement.planning_start;
10232                let letter = letter_gen.generate(
10233                    &engagement.engagement_id.to_string(),
10234                    &engagement.client_name,
10235                    entity_count,
10236                    engagement.period_end_date,
10237                    currency,
10238                    &applicable_framework,
10239                    letter_date,
10240                );
10241                snapshot.engagement_letters.push(letter);
10242            }
10243
10244            info!(
10245                "ISA 210 engagement letters: {} generated",
10246                snapshot.engagement_letters.len()
10247            );
10248        }
10249
10250        // ----------------------------------------------------------------
10251        // ISA 560 / IAS 10: Subsequent events
10252        // ----------------------------------------------------------------
10253        {
10254            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10255            let entity_codes: Vec<String> = self
10256                .config
10257                .companies
10258                .iter()
10259                .map(|c| c.code.clone())
10260                .collect();
10261            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10262            info!(
10263                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10264                subsequent.len(),
10265                subsequent
10266                    .iter()
10267                    .filter(|e| matches!(
10268                        e.classification,
10269                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10270                    ))
10271                    .count(),
10272                subsequent
10273                    .iter()
10274                    .filter(|e| matches!(
10275                        e.classification,
10276                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10277                    ))
10278                    .count(),
10279            );
10280            snapshot.subsequent_events = subsequent;
10281        }
10282
10283        // ----------------------------------------------------------------
10284        // ISA 402: Service organization controls
10285        // ----------------------------------------------------------------
10286        {
10287            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10288            let entity_codes: Vec<String> = self
10289                .config
10290                .companies
10291                .iter()
10292                .map(|c| c.code.clone())
10293                .collect();
10294            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10295            info!(
10296                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10297                soc_snapshot.service_organizations.len(),
10298                soc_snapshot.soc_reports.len(),
10299                soc_snapshot.user_entity_controls.len(),
10300            );
10301            snapshot.service_organizations = soc_snapshot.service_organizations;
10302            snapshot.soc_reports = soc_snapshot.soc_reports;
10303            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10304        }
10305
10306        // ----------------------------------------------------------------
10307        // ISA 570: Going concern assessments
10308        // ----------------------------------------------------------------
10309        {
10310            use datasynth_generators::audit::going_concern_generator::{
10311                GoingConcernGenerator, GoingConcernInput,
10312            };
10313            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10314            let entity_codes: Vec<String> = self
10315                .config
10316                .companies
10317                .iter()
10318                .map(|c| c.code.clone())
10319                .collect();
10320            // Assessment date = period end + 75 days (typical sign-off window).
10321            let assessment_date = period_end + chrono::Duration::days(75);
10322            let period_label = format!("FY{}", period_end.year());
10323
10324            // Build financial inputs from actual journal entries.
10325            //
10326            // We derive approximate P&L, working capital, and operating cash flow
10327            // by aggregating GL account balances from the journal entry population.
10328            // Account ranges used (standard chart):
10329            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
10330            //   Expenses:        6xxx (debit-normal)
10331            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
10332            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
10333            //   Operating CF:    net income adjusted for D&A (rough proxy)
10334            let gc_inputs: Vec<GoingConcernInput> = self
10335                .config
10336                .companies
10337                .iter()
10338                .map(|company| {
10339                    let code = &company.code;
10340                    let mut revenue = rust_decimal::Decimal::ZERO;
10341                    let mut expenses = rust_decimal::Decimal::ZERO;
10342                    let mut current_assets = rust_decimal::Decimal::ZERO;
10343                    let mut current_liabs = rust_decimal::Decimal::ZERO;
10344                    let mut total_debt = rust_decimal::Decimal::ZERO;
10345
10346                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
10347                        for line in &je.lines {
10348                            let acct = line.gl_account.as_str();
10349                            let net = line.debit_amount - line.credit_amount;
10350                            if acct.starts_with('4') {
10351                                // Revenue accounts: credit-normal, so negative net = revenue earned
10352                                revenue -= net;
10353                            } else if acct.starts_with('6') {
10354                                // Expense accounts: debit-normal
10355                                expenses += net;
10356                            }
10357                            // Balance sheet accounts for working capital
10358                            if acct.starts_with('1') {
10359                                // Current asset accounts (1000–1499)
10360                                if let Ok(n) = acct.parse::<u32>() {
10361                                    if (1000..=1499).contains(&n) {
10362                                        current_assets += net;
10363                                    }
10364                                }
10365                            } else if acct.starts_with('2') {
10366                                if let Ok(n) = acct.parse::<u32>() {
10367                                    if (2000..=2499).contains(&n) {
10368                                        // Current liabilities
10369                                        current_liabs -= net; // credit-normal
10370                                    } else if (2500..=2999).contains(&n) {
10371                                        // Long-term debt
10372                                        total_debt -= net;
10373                                    }
10374                                }
10375                            }
10376                        }
10377                    }
10378
10379                    let net_income = revenue - expenses;
10380                    let working_capital = current_assets - current_liabs;
10381                    // Rough operating CF proxy: net income (full accrual CF calculation
10382                    // is done separately in the cash flow statement generator)
10383                    let operating_cash_flow = net_income;
10384
10385                    GoingConcernInput {
10386                        entity_code: code.clone(),
10387                        net_income,
10388                        working_capital,
10389                        operating_cash_flow,
10390                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10391                        assessment_date,
10392                    }
10393                })
10394                .collect();
10395
10396            let assessments = if gc_inputs.is_empty() {
10397                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10398            } else {
10399                gc_gen.generate_for_entities_with_inputs(
10400                    &entity_codes,
10401                    &gc_inputs,
10402                    assessment_date,
10403                    &period_label,
10404                )
10405            };
10406            info!(
10407                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10408                assessments.len(),
10409                assessments.iter().filter(|a| matches!(
10410                    a.auditor_conclusion,
10411                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10412                )).count(),
10413                assessments.iter().filter(|a| matches!(
10414                    a.auditor_conclusion,
10415                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10416                )).count(),
10417                assessments.iter().filter(|a| matches!(
10418                    a.auditor_conclusion,
10419                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10420                )).count(),
10421            );
10422            snapshot.going_concern_assessments = assessments;
10423        }
10424
10425        // ----------------------------------------------------------------
10426        // ISA 540: Accounting estimates
10427        // ----------------------------------------------------------------
10428        {
10429            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10430            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10431            let entity_codes: Vec<String> = self
10432                .config
10433                .companies
10434                .iter()
10435                .map(|c| c.code.clone())
10436                .collect();
10437            let estimates = est_gen.generate_for_entities(&entity_codes);
10438            info!(
10439                "ISA 540 accounting estimates: {} estimates across {} entities \
10440                 ({} with retrospective reviews, {} with auditor point estimates)",
10441                estimates.len(),
10442                entity_codes.len(),
10443                estimates
10444                    .iter()
10445                    .filter(|e| e.retrospective_review.is_some())
10446                    .count(),
10447                estimates
10448                    .iter()
10449                    .filter(|e| e.auditor_point_estimate.is_some())
10450                    .count(),
10451            );
10452            snapshot.accounting_estimates = estimates;
10453        }
10454
10455        // ----------------------------------------------------------------
10456        // ISA 700/701/705/706: Audit opinions (one per engagement)
10457        // ----------------------------------------------------------------
10458        {
10459            use datasynth_generators::audit::audit_opinion_generator::{
10460                AuditOpinionGenerator, AuditOpinionInput,
10461            };
10462
10463            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10464
10465            // Build inputs — one per engagement, linking findings and going concern.
10466            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10467                .engagements
10468                .iter()
10469                .map(|eng| {
10470                    // Collect findings for this engagement.
10471                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10472                        .findings
10473                        .iter()
10474                        .filter(|f| f.engagement_id == eng.engagement_id)
10475                        .cloned()
10476                        .collect();
10477
10478                    // Going concern for this entity.
10479                    let gc = snapshot
10480                        .going_concern_assessments
10481                        .iter()
10482                        .find(|g| g.entity_code == eng.client_entity_id)
10483                        .cloned();
10484
10485                    // Component reports relevant to this engagement.
10486                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
10487                        snapshot.component_reports.clone();
10488
10489                    let auditor = self
10490                        .master_data
10491                        .employees
10492                        .first()
10493                        .map(|e| e.display_name.clone())
10494                        .unwrap_or_else(|| "Global Audit LLP".into());
10495
10496                    let partner = self
10497                        .master_data
10498                        .employees
10499                        .get(1)
10500                        .map(|e| e.display_name.clone())
10501                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
10502
10503                    AuditOpinionInput {
10504                        entity_code: eng.client_entity_id.clone(),
10505                        entity_name: eng.client_name.clone(),
10506                        engagement_id: eng.engagement_id,
10507                        period_end: eng.period_end_date,
10508                        findings: eng_findings,
10509                        going_concern: gc,
10510                        component_reports: comp_reports,
10511                        // Mark as US-listed when audit standards include PCAOB.
10512                        is_us_listed: {
10513                            let fw = &self.config.audit_standards.isa_compliance.framework;
10514                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
10515                        },
10516                        auditor_name: auditor,
10517                        engagement_partner: partner,
10518                    }
10519                })
10520                .collect();
10521
10522            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
10523
10524            for go in &generated_opinions {
10525                snapshot
10526                    .key_audit_matters
10527                    .extend(go.key_audit_matters.clone());
10528            }
10529            snapshot.audit_opinions = generated_opinions
10530                .into_iter()
10531                .map(|go| go.opinion)
10532                .collect();
10533
10534            info!(
10535                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
10536                snapshot.audit_opinions.len(),
10537                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
10538                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
10539                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
10540                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
10541            );
10542        }
10543
10544        // ----------------------------------------------------------------
10545        // SOX 302 / 404 assessments
10546        // ----------------------------------------------------------------
10547        {
10548            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
10549
10550            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
10551
10552            for (i, company) in self.config.companies.iter().enumerate() {
10553                // Collect findings for this company's engagements.
10554                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
10555                    .engagements
10556                    .iter()
10557                    .filter(|e| e.client_entity_id == company.code)
10558                    .map(|e| e.engagement_id)
10559                    .collect();
10560
10561                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10562                    .findings
10563                    .iter()
10564                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
10565                    .cloned()
10566                    .collect();
10567
10568                // Derive executive names from employee list.
10569                let emp_count = self.master_data.employees.len();
10570                let ceo_name = if emp_count > 0 {
10571                    self.master_data.employees[i % emp_count]
10572                        .display_name
10573                        .clone()
10574                } else {
10575                    format!("CEO of {}", company.name)
10576                };
10577                let cfo_name = if emp_count > 1 {
10578                    self.master_data.employees[(i + 1) % emp_count]
10579                        .display_name
10580                        .clone()
10581                } else {
10582                    format!("CFO of {}", company.name)
10583                };
10584
10585                // Use engagement materiality if available.
10586                let materiality = snapshot
10587                    .engagements
10588                    .iter()
10589                    .find(|e| e.client_entity_id == company.code)
10590                    .map(|e| e.materiality)
10591                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
10592
10593                let input = SoxGeneratorInput {
10594                    company_code: company.code.clone(),
10595                    company_name: company.name.clone(),
10596                    fiscal_year,
10597                    period_end,
10598                    findings: company_findings,
10599                    ceo_name,
10600                    cfo_name,
10601                    materiality_threshold: materiality,
10602                    revenue_percent: rust_decimal::Decimal::from(100),
10603                    assets_percent: rust_decimal::Decimal::from(100),
10604                    significant_accounts: vec![
10605                        "Revenue".into(),
10606                        "Accounts Receivable".into(),
10607                        "Inventory".into(),
10608                        "Fixed Assets".into(),
10609                        "Accounts Payable".into(),
10610                    ],
10611                };
10612
10613                let (certs, assessment) = sox_gen.generate(&input);
10614                snapshot.sox_302_certifications.extend(certs);
10615                snapshot.sox_404_assessments.push(assessment);
10616            }
10617
10618            info!(
10619                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
10620                snapshot.sox_302_certifications.len(),
10621                snapshot.sox_404_assessments.len(),
10622                snapshot
10623                    .sox_404_assessments
10624                    .iter()
10625                    .filter(|a| a.icfr_effective)
10626                    .count(),
10627                snapshot
10628                    .sox_404_assessments
10629                    .iter()
10630                    .filter(|a| !a.icfr_effective)
10631                    .count(),
10632            );
10633        }
10634
10635        // ----------------------------------------------------------------
10636        // ISA 320: Materiality calculations (one per entity)
10637        // ----------------------------------------------------------------
10638        {
10639            use datasynth_generators::audit::materiality_generator::{
10640                MaterialityGenerator, MaterialityInput,
10641            };
10642
10643            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
10644
10645            // Compute per-company financials from JEs.
10646            // Asset accounts start with '1', revenue with '4',
10647            // expense accounts with '5' or '6'.
10648            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10649
10650            for company in &self.config.companies {
10651                let company_code = company.code.clone();
10652
10653                // Revenue: credit-side entries on 4xxx accounts
10654                let company_revenue: rust_decimal::Decimal = entries
10655                    .iter()
10656                    .filter(|e| e.company_code() == company_code)
10657                    .flat_map(|e| e.lines.iter())
10658                    .filter(|l| l.account_code.starts_with('4'))
10659                    .map(|l| l.credit_amount)
10660                    .sum();
10661
10662                // Total assets: debit balances on 1xxx accounts
10663                let total_assets: rust_decimal::Decimal = entries
10664                    .iter()
10665                    .filter(|e| e.company_code() == company_code)
10666                    .flat_map(|e| e.lines.iter())
10667                    .filter(|l| l.account_code.starts_with('1'))
10668                    .map(|l| l.debit_amount)
10669                    .sum();
10670
10671                // Expenses: debit-side entries on 5xxx/6xxx accounts
10672                let total_expenses: rust_decimal::Decimal = entries
10673                    .iter()
10674                    .filter(|e| e.company_code() == company_code)
10675                    .flat_map(|e| e.lines.iter())
10676                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10677                    .map(|l| l.debit_amount)
10678                    .sum();
10679
10680                // Equity: credit balances on 3xxx accounts
10681                let equity: rust_decimal::Decimal = entries
10682                    .iter()
10683                    .filter(|e| e.company_code() == company_code)
10684                    .flat_map(|e| e.lines.iter())
10685                    .filter(|l| l.account_code.starts_with('3'))
10686                    .map(|l| l.credit_amount)
10687                    .sum();
10688
10689                let pretax_income = company_revenue - total_expenses;
10690
10691                // If no company-specific data, fall back to proportional share
10692                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10693                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
10694                        .unwrap_or(rust_decimal::Decimal::ONE);
10695                    (
10696                        total_revenue * w,
10697                        total_revenue * w * rust_decimal::Decimal::from(3),
10698                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
10699                        total_revenue * w * rust_decimal::Decimal::from(2),
10700                    )
10701                } else {
10702                    (company_revenue, total_assets, pretax_income, equity)
10703                };
10704
10705                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
10706
10707                materiality_inputs.push(MaterialityInput {
10708                    entity_code: company_code,
10709                    period: format!("FY{}", fiscal_year),
10710                    revenue: rev,
10711                    pretax_income: pti,
10712                    total_assets: assets,
10713                    equity: eq,
10714                    gross_profit,
10715                });
10716            }
10717
10718            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10719
10720            info!(
10721                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10722                 {} total assets, {} equity benchmarks)",
10723                snapshot.materiality_calculations.len(),
10724                snapshot
10725                    .materiality_calculations
10726                    .iter()
10727                    .filter(|m| matches!(
10728                        m.benchmark,
10729                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10730                    ))
10731                    .count(),
10732                snapshot
10733                    .materiality_calculations
10734                    .iter()
10735                    .filter(|m| matches!(
10736                        m.benchmark,
10737                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10738                    ))
10739                    .count(),
10740                snapshot
10741                    .materiality_calculations
10742                    .iter()
10743                    .filter(|m| matches!(
10744                        m.benchmark,
10745                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10746                    ))
10747                    .count(),
10748                snapshot
10749                    .materiality_calculations
10750                    .iter()
10751                    .filter(|m| matches!(
10752                        m.benchmark,
10753                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10754                    ))
10755                    .count(),
10756            );
10757        }
10758
10759        // ----------------------------------------------------------------
10760        // ISA 315: Combined Risk Assessments (per entity, per account area)
10761        // ----------------------------------------------------------------
10762        {
10763            use datasynth_generators::audit::cra_generator::CraGenerator;
10764
10765            let mut cra_gen = CraGenerator::new(self.seed + 8315);
10766
10767            // Build entity → scope_id map from already-generated scopes
10768            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
10769                .audit_scopes
10770                .iter()
10771                .map(|s| (s.entity_code.clone(), s.id.clone()))
10772                .collect();
10773
10774            for company in &self.config.companies {
10775                let cras = cra_gen.generate_for_entity(&company.code, None);
10776                let scope_id = entity_scope_map.get(&company.code).cloned();
10777                let cras_with_scope: Vec<_> = cras
10778                    .into_iter()
10779                    .map(|mut cra| {
10780                        cra.scope_id = scope_id.clone();
10781                        cra
10782                    })
10783                    .collect();
10784                snapshot.combined_risk_assessments.extend(cras_with_scope);
10785            }
10786
10787            let significant_count = snapshot
10788                .combined_risk_assessments
10789                .iter()
10790                .filter(|c| c.significant_risk)
10791                .count();
10792            let high_cra_count = snapshot
10793                .combined_risk_assessments
10794                .iter()
10795                .filter(|c| {
10796                    matches!(
10797                        c.combined_risk,
10798                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10799                    )
10800                })
10801                .count();
10802
10803            info!(
10804                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10805                snapshot.combined_risk_assessments.len(),
10806                significant_count,
10807                high_cra_count,
10808            );
10809        }
10810
10811        // ----------------------------------------------------------------
10812        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
10813        // ----------------------------------------------------------------
10814        {
10815            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10816
10817            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10818
10819            // Group CRAs by entity and use per-entity tolerable error from materiality
10820            for company in &self.config.companies {
10821                let entity_code = company.code.clone();
10822
10823                // Find tolerable error for this entity (= performance materiality)
10824                let tolerable_error = snapshot
10825                    .materiality_calculations
10826                    .iter()
10827                    .find(|m| m.entity_code == entity_code)
10828                    .map(|m| m.tolerable_error);
10829
10830                // Collect CRAs for this entity
10831                let entity_cras: Vec<_> = snapshot
10832                    .combined_risk_assessments
10833                    .iter()
10834                    .filter(|c| c.entity_code == entity_code)
10835                    .cloned()
10836                    .collect();
10837
10838                if !entity_cras.is_empty() {
10839                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10840                    snapshot.sampling_plans.extend(plans);
10841                    snapshot.sampled_items.extend(items);
10842                }
10843            }
10844
10845            let misstatement_count = snapshot
10846                .sampled_items
10847                .iter()
10848                .filter(|i| i.misstatement_found)
10849                .count();
10850
10851            info!(
10852                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10853                snapshot.sampling_plans.len(),
10854                snapshot.sampled_items.len(),
10855                misstatement_count,
10856            );
10857        }
10858
10859        // ----------------------------------------------------------------
10860        // ISA 315: Significant Classes of Transactions (SCOTS)
10861        // ----------------------------------------------------------------
10862        {
10863            use datasynth_generators::audit::scots_generator::{
10864                ScotsGenerator, ScotsGeneratorConfig,
10865            };
10866
10867            let ic_enabled = self.config.intercompany.enabled;
10868
10869            let config = ScotsGeneratorConfig {
10870                intercompany_enabled: ic_enabled,
10871                ..ScotsGeneratorConfig::default()
10872            };
10873            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10874
10875            for company in &self.config.companies {
10876                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10877                snapshot
10878                    .significant_transaction_classes
10879                    .extend(entity_scots);
10880            }
10881
10882            let estimation_count = snapshot
10883                .significant_transaction_classes
10884                .iter()
10885                .filter(|s| {
10886                    matches!(
10887                        s.transaction_type,
10888                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10889                    )
10890                })
10891                .count();
10892
10893            info!(
10894                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10895                snapshot.significant_transaction_classes.len(),
10896                estimation_count,
10897            );
10898        }
10899
10900        // ----------------------------------------------------------------
10901        // ISA 520: Unusual Item Markers
10902        // ----------------------------------------------------------------
10903        {
10904            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10905
10906            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10907            let entity_codes: Vec<String> = self
10908                .config
10909                .companies
10910                .iter()
10911                .map(|c| c.code.clone())
10912                .collect();
10913            let unusual_flags =
10914                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10915            info!(
10916                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10917                unusual_flags.len(),
10918                unusual_flags
10919                    .iter()
10920                    .filter(|f| matches!(
10921                        f.severity,
10922                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10923                    ))
10924                    .count(),
10925                unusual_flags
10926                    .iter()
10927                    .filter(|f| matches!(
10928                        f.severity,
10929                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10930                    ))
10931                    .count(),
10932                unusual_flags
10933                    .iter()
10934                    .filter(|f| matches!(
10935                        f.severity,
10936                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10937                    ))
10938                    .count(),
10939            );
10940            snapshot.unusual_items = unusual_flags;
10941        }
10942
10943        // ----------------------------------------------------------------
10944        // ISA 520: Analytical Relationships
10945        // ----------------------------------------------------------------
10946        {
10947            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10948
10949            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10950            let entity_codes: Vec<String> = self
10951                .config
10952                .companies
10953                .iter()
10954                .map(|c| c.code.clone())
10955                .collect();
10956            let current_period_label = format!("FY{fiscal_year}");
10957            let prior_period_label = format!("FY{}", fiscal_year - 1);
10958            let analytical_rels = ar_gen.generate_for_entities(
10959                &entity_codes,
10960                entries,
10961                &current_period_label,
10962                &prior_period_label,
10963            );
10964            let out_of_range = analytical_rels
10965                .iter()
10966                .filter(|r| !r.within_expected_range)
10967                .count();
10968            info!(
10969                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10970                analytical_rels.len(),
10971                out_of_range,
10972            );
10973            snapshot.analytical_relationships = analytical_rels;
10974        }
10975
10976        if let Some(pb) = pb {
10977            pb.finish_with_message(format!(
10978                "Audit data: {} engagements, {} workpapers, {} evidence, \
10979                 {} confirmations, {} procedure steps, {} samples, \
10980                 {} analytical, {} IA funcs, {} related parties, \
10981                 {} component auditors, {} letters, {} subsequent events, \
10982                 {} service orgs, {} going concern, {} accounting estimates, \
10983                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10984                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10985                 {} unusual items, {} analytical relationships",
10986                snapshot.engagements.len(),
10987                snapshot.workpapers.len(),
10988                snapshot.evidence.len(),
10989                snapshot.confirmations.len(),
10990                snapshot.procedure_steps.len(),
10991                snapshot.samples.len(),
10992                snapshot.analytical_results.len(),
10993                snapshot.ia_functions.len(),
10994                snapshot.related_parties.len(),
10995                snapshot.component_auditors.len(),
10996                snapshot.engagement_letters.len(),
10997                snapshot.subsequent_events.len(),
10998                snapshot.service_organizations.len(),
10999                snapshot.going_concern_assessments.len(),
11000                snapshot.accounting_estimates.len(),
11001                snapshot.audit_opinions.len(),
11002                snapshot.key_audit_matters.len(),
11003                snapshot.sox_302_certifications.len(),
11004                snapshot.sox_404_assessments.len(),
11005                snapshot.materiality_calculations.len(),
11006                snapshot.combined_risk_assessments.len(),
11007                snapshot.sampling_plans.len(),
11008                snapshot.significant_transaction_classes.len(),
11009                snapshot.unusual_items.len(),
11010                snapshot.analytical_relationships.len(),
11011            ));
11012        }
11013
11014        // ----------------------------------------------------------------
11015        // PCAOB-ISA cross-reference mappings
11016        // ----------------------------------------------------------------
11017        // Always include the standard PCAOB-ISA mappings when audit generation is
11018        // enabled. These are static reference data (no randomness required) so we
11019        // call standard_mappings() directly.
11020        {
11021            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11022            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11023            debug!(
11024                "PCAOB-ISA mappings generated: {} mappings",
11025                snapshot.isa_pcaob_mappings.len()
11026            );
11027        }
11028
11029        // ----------------------------------------------------------------
11030        // ISA standard reference entries
11031        // ----------------------------------------------------------------
11032        // Emit flat ISA standard reference data (number, title, series) so
11033        // consumers get a machine-readable listing of all 34 ISA standards in
11034        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11035        {
11036            use datasynth_standards::audit::isa_reference::IsaStandard;
11037            snapshot.isa_mappings = IsaStandard::standard_entries();
11038            debug!(
11039                "ISA standard entries generated: {} standards",
11040                snapshot.isa_mappings.len()
11041            );
11042        }
11043
11044        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11045        // For each RPT, find the chronologically closest JE for the engagement's entity.
11046        {
11047            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11048                .engagements
11049                .iter()
11050                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11051                .collect();
11052
11053            for rpt in &mut snapshot.related_party_transactions {
11054                if rpt.journal_entry_id.is_some() {
11055                    continue; // already set
11056                }
11057                let entity = engagement_by_id
11058                    .get(&rpt.engagement_id.to_string())
11059                    .copied()
11060                    .unwrap_or("");
11061
11062                // Find closest JE by date in the entity's company
11063                let best_je = entries
11064                    .iter()
11065                    .filter(|je| je.header.company_code == entity)
11066                    .min_by_key(|je| {
11067                        (je.header.posting_date - rpt.transaction_date)
11068                            .num_days()
11069                            .abs()
11070                    });
11071
11072                if let Some(je) = best_je {
11073                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11074                }
11075            }
11076
11077            let linked = snapshot
11078                .related_party_transactions
11079                .iter()
11080                .filter(|t| t.journal_entry_id.is_some())
11081                .count();
11082            debug!(
11083                "Linked {}/{} related party transactions to journal entries",
11084                linked,
11085                snapshot.related_party_transactions.len()
11086            );
11087        }
11088
11089        Ok(snapshot)
11090    }
11091
11092    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11093    ///
11094    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11095    /// from the current orchestrator state, runs the FSM engine, and maps the
11096    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11097    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11098    fn generate_audit_data_with_fsm(
11099        &mut self,
11100        entries: &[JournalEntry],
11101    ) -> SynthResult<AuditSnapshot> {
11102        use datasynth_audit_fsm::{
11103            context::EngagementContext,
11104            engine::AuditFsmEngine,
11105            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11106        };
11107        use rand::SeedableRng;
11108        use rand_chacha::ChaCha8Rng;
11109
11110        info!("Audit FSM: generating audit data via FSM engine");
11111
11112        let fsm_config = self
11113            .config
11114            .audit
11115            .fsm
11116            .as_ref()
11117            .expect("FSM config must be present when FSM is enabled");
11118
11119        // 1. Load blueprint from config string.
11120        let bwp = match fsm_config.blueprint.as_str() {
11121            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11122            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11123            _ => {
11124                warn!(
11125                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11126                    fsm_config.blueprint
11127                );
11128                BlueprintWithPreconditions::load_builtin_fsa()
11129            }
11130        }
11131        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11132
11133        // 2. Load overlay from config string.
11134        let overlay = match fsm_config.overlay.as_str() {
11135            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11136            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11137            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11138            _ => {
11139                warn!(
11140                    "Unknown FSM overlay '{}', falling back to builtin:default",
11141                    fsm_config.overlay
11142                );
11143                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11144            }
11145        }
11146        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11147
11148        // 3. Build EngagementContext from orchestrator state.
11149        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11150            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11151        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11152
11153        // Financial aggregates from journal entries.
11154        let total_revenue: rust_decimal::Decimal = entries
11155            .iter()
11156            .flat_map(|e| e.lines.iter())
11157            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11158            .map(|l| l.credit_amount)
11159            .sum();
11160
11161        let total_assets: rust_decimal::Decimal = entries
11162            .iter()
11163            .flat_map(|e| e.lines.iter())
11164            .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
11165            .map(|l| l.debit_amount)
11166            .sum();
11167
11168        // GL accounts for reference data.
11169        let accounts: Vec<String> = self
11170            .coa
11171            .as_ref()
11172            .map(|coa| {
11173                coa.get_postable_accounts()
11174                    .iter()
11175                    .map(|acc| acc.account_code().to_string())
11176                    .collect()
11177            })
11178            .unwrap_or_default();
11179
11180        // Team member IDs and display names from master data.
11181        let team_member_ids: Vec<String> = self
11182            .master_data
11183            .employees
11184            .iter()
11185            .take(8) // Cap team size
11186            .map(|e| e.employee_id.clone())
11187            .collect();
11188        let team_member_pairs: Vec<(String, String)> = self
11189            .master_data
11190            .employees
11191            .iter()
11192            .take(8)
11193            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11194            .collect();
11195
11196        let vendor_names: Vec<String> = self
11197            .master_data
11198            .vendors
11199            .iter()
11200            .map(|v| v.name.clone())
11201            .collect();
11202        let customer_names: Vec<String> = self
11203            .master_data
11204            .customers
11205            .iter()
11206            .map(|c| c.name.clone())
11207            .collect();
11208
11209        let entity_codes: Vec<String> = self
11210            .config
11211            .companies
11212            .iter()
11213            .map(|c| c.code.clone())
11214            .collect();
11215
11216        let company = self.config.companies.first();
11217        let company_code = company
11218            .map(|c| c.code.clone())
11219            .unwrap_or_else(|| "UNKNOWN".to_string());
11220        let company_name = company
11221            .map(|c| c.name.clone())
11222            .unwrap_or_else(|| "Unknown Company".to_string());
11223        let currency = company
11224            .map(|c| c.currency.clone())
11225            .unwrap_or_else(|| "USD".to_string());
11226
11227        // Journal entry IDs for evidence tracing (sample up to 50).
11228        let journal_entry_ids: Vec<String> = entries
11229            .iter()
11230            .take(50)
11231            .map(|e| e.header.document_id.to_string())
11232            .collect();
11233
11234        // Account balances for risk weighting (aggregate debit - credit per account).
11235        let mut account_balances = std::collections::HashMap::<String, f64>::new();
11236        for entry in entries {
11237            for line in &entry.lines {
11238                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11239                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11240                *account_balances
11241                    .entry(line.account_code.clone())
11242                    .or_insert(0.0) += debit_f64 - credit_f64;
11243            }
11244        }
11245
11246        // Internal control IDs and anomaly refs are populated by the
11247        // caller when available; here we default to empty because the
11248        // orchestrator state may not have generated controls/anomalies
11249        // yet at this point in the pipeline.
11250        let control_ids: Vec<String> = Vec::new();
11251        let anomaly_refs: Vec<String> = Vec::new();
11252
11253        let context = EngagementContext {
11254            company_code,
11255            company_name,
11256            fiscal_year: start_date.year(),
11257            currency,
11258            total_revenue,
11259            total_assets,
11260            engagement_start: start_date,
11261            report_date: period_end,
11262            pretax_income: rust_decimal::Decimal::ZERO,
11263            equity: rust_decimal::Decimal::ZERO,
11264            gross_profit: rust_decimal::Decimal::ZERO,
11265            working_capital: rust_decimal::Decimal::ZERO,
11266            operating_cash_flow: rust_decimal::Decimal::ZERO,
11267            total_debt: rust_decimal::Decimal::ZERO,
11268            team_member_ids,
11269            team_member_pairs,
11270            accounts,
11271            vendor_names,
11272            customer_names,
11273            journal_entry_ids,
11274            account_balances,
11275            control_ids,
11276            anomaly_refs,
11277            is_us_listed: false,
11278            entity_codes,
11279        };
11280
11281        // 4. Create and run the FSM engine.
11282        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
11283        let rng = ChaCha8Rng::seed_from_u64(seed);
11284        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
11285
11286        let result = engine
11287            .run_engagement(&context)
11288            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
11289
11290        info!(
11291            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
11292             {} phases completed, duration {:.1}h",
11293            result.event_log.len(),
11294            result.artifacts.total_artifacts(),
11295            result.anomalies.len(),
11296            result.phases_completed.len(),
11297            result.total_duration_hours,
11298        );
11299
11300        // 5. Map ArtifactBag fields to AuditSnapshot.
11301        let bag = result.artifacts;
11302        let mut snapshot = AuditSnapshot {
11303            engagements: bag.engagements,
11304            engagement_letters: bag.engagement_letters,
11305            materiality_calculations: bag.materiality_calculations,
11306            risk_assessments: bag.risk_assessments,
11307            combined_risk_assessments: bag.combined_risk_assessments,
11308            workpapers: bag.workpapers,
11309            evidence: bag.evidence,
11310            findings: bag.findings,
11311            judgments: bag.judgments,
11312            sampling_plans: bag.sampling_plans,
11313            sampled_items: bag.sampled_items,
11314            analytical_results: bag.analytical_results,
11315            going_concern_assessments: bag.going_concern_assessments,
11316            subsequent_events: bag.subsequent_events,
11317            audit_opinions: bag.audit_opinions,
11318            key_audit_matters: bag.key_audit_matters,
11319            procedure_steps: bag.procedure_steps,
11320            samples: bag.samples,
11321            confirmations: bag.confirmations,
11322            confirmation_responses: bag.confirmation_responses,
11323            // Store the event trail for downstream export.
11324            fsm_event_trail: Some(result.event_log),
11325            // Fields not produced by the FSM engine remain at their defaults.
11326            ..Default::default()
11327        };
11328
11329        // 6. Add static reference data (same as legacy path).
11330        {
11331            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11332            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11333        }
11334        {
11335            use datasynth_standards::audit::isa_reference::IsaStandard;
11336            snapshot.isa_mappings = IsaStandard::standard_entries();
11337        }
11338
11339        info!(
11340            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
11341             {} risk assessments, {} findings, {} materiality calcs",
11342            snapshot.engagements.len(),
11343            snapshot.workpapers.len(),
11344            snapshot.evidence.len(),
11345            snapshot.risk_assessments.len(),
11346            snapshot.findings.len(),
11347            snapshot.materiality_calculations.len(),
11348        );
11349
11350        Ok(snapshot)
11351    }
11352
11353    /// Export journal entries as graph data for ML training and network reconstruction.
11354    ///
11355    /// Builds a transaction graph where:
11356    /// - Nodes are GL accounts
11357    /// - Edges are money flows from credit to debit accounts
11358    /// - Edge attributes include amount, date, business process, anomaly flags
11359    fn export_graphs(
11360        &mut self,
11361        entries: &[JournalEntry],
11362        _coa: &Arc<ChartOfAccounts>,
11363        stats: &mut EnhancedGenerationStatistics,
11364    ) -> SynthResult<GraphExportSnapshot> {
11365        let pb = self.create_progress_bar(100, "Exporting Graphs");
11366
11367        let mut snapshot = GraphExportSnapshot::default();
11368
11369        // Get output directory
11370        let output_dir = self
11371            .output_path
11372            .clone()
11373            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11374        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11375
11376        // Process each graph type configuration
11377        for graph_type in &self.config.graph_export.graph_types {
11378            if let Some(pb) = &pb {
11379                pb.inc(10);
11380            }
11381
11382            // Build transaction graph
11383            let graph_config = TransactionGraphConfig {
11384                include_vendors: false,
11385                include_customers: false,
11386                create_debit_credit_edges: true,
11387                include_document_nodes: graph_type.include_document_nodes,
11388                min_edge_weight: graph_type.min_edge_weight,
11389                aggregate_parallel_edges: graph_type.aggregate_edges,
11390                framework: None,
11391            };
11392
11393            let mut builder = TransactionGraphBuilder::new(graph_config);
11394            builder.add_journal_entries(entries);
11395            let graph = builder.build();
11396
11397            // Update stats
11398            stats.graph_node_count += graph.node_count();
11399            stats.graph_edge_count += graph.edge_count();
11400
11401            if let Some(pb) = &pb {
11402                pb.inc(40);
11403            }
11404
11405            // Export to each configured format
11406            for format in &self.config.graph_export.formats {
11407                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
11408
11409                // Create output directory
11410                if let Err(e) = std::fs::create_dir_all(&format_dir) {
11411                    warn!("Failed to create graph output directory: {}", e);
11412                    continue;
11413                }
11414
11415                match format {
11416                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
11417                        let pyg_config = PyGExportConfig {
11418                            common: datasynth_graph::CommonExportConfig {
11419                                export_node_features: true,
11420                                export_edge_features: true,
11421                                export_node_labels: true,
11422                                export_edge_labels: true,
11423                                export_masks: true,
11424                                train_ratio: self.config.graph_export.train_ratio,
11425                                val_ratio: self.config.graph_export.validation_ratio,
11426                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11427                            },
11428                            one_hot_categoricals: false,
11429                        };
11430
11431                        let exporter = PyGExporter::new(pyg_config);
11432                        match exporter.export(&graph, &format_dir) {
11433                            Ok(metadata) => {
11434                                snapshot.exports.insert(
11435                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
11436                                    GraphExportInfo {
11437                                        name: graph_type.name.clone(),
11438                                        format: "pytorch_geometric".to_string(),
11439                                        output_path: format_dir.clone(),
11440                                        node_count: metadata.num_nodes,
11441                                        edge_count: metadata.num_edges,
11442                                    },
11443                                );
11444                                snapshot.graph_count += 1;
11445                            }
11446                            Err(e) => {
11447                                warn!("Failed to export PyTorch Geometric graph: {}", e);
11448                            }
11449                        }
11450                    }
11451                    datasynth_config::schema::GraphExportFormat::Neo4j => {
11452                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
11453
11454                        let neo4j_config = Neo4jExportConfig {
11455                            export_node_properties: true,
11456                            export_edge_properties: true,
11457                            export_features: true,
11458                            generate_cypher: true,
11459                            generate_admin_import: true,
11460                            database_name: "synth".to_string(),
11461                            cypher_batch_size: 1000,
11462                        };
11463
11464                        let exporter = Neo4jExporter::new(neo4j_config);
11465                        match exporter.export(&graph, &format_dir) {
11466                            Ok(metadata) => {
11467                                snapshot.exports.insert(
11468                                    format!("{}_{}", graph_type.name, "neo4j"),
11469                                    GraphExportInfo {
11470                                        name: graph_type.name.clone(),
11471                                        format: "neo4j".to_string(),
11472                                        output_path: format_dir.clone(),
11473                                        node_count: metadata.num_nodes,
11474                                        edge_count: metadata.num_edges,
11475                                    },
11476                                );
11477                                snapshot.graph_count += 1;
11478                            }
11479                            Err(e) => {
11480                                warn!("Failed to export Neo4j graph: {}", e);
11481                            }
11482                        }
11483                    }
11484                    datasynth_config::schema::GraphExportFormat::Dgl => {
11485                        use datasynth_graph::{DGLExportConfig, DGLExporter};
11486
11487                        let dgl_config = DGLExportConfig {
11488                            common: datasynth_graph::CommonExportConfig {
11489                                export_node_features: true,
11490                                export_edge_features: true,
11491                                export_node_labels: true,
11492                                export_edge_labels: true,
11493                                export_masks: true,
11494                                train_ratio: self.config.graph_export.train_ratio,
11495                                val_ratio: self.config.graph_export.validation_ratio,
11496                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11497                            },
11498                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
11499                            include_pickle_script: true, // DGL ecosystem standard helper
11500                        };
11501
11502                        let exporter = DGLExporter::new(dgl_config);
11503                        match exporter.export(&graph, &format_dir) {
11504                            Ok(metadata) => {
11505                                snapshot.exports.insert(
11506                                    format!("{}_{}", graph_type.name, "dgl"),
11507                                    GraphExportInfo {
11508                                        name: graph_type.name.clone(),
11509                                        format: "dgl".to_string(),
11510                                        output_path: format_dir.clone(),
11511                                        node_count: metadata.common.num_nodes,
11512                                        edge_count: metadata.common.num_edges,
11513                                    },
11514                                );
11515                                snapshot.graph_count += 1;
11516                            }
11517                            Err(e) => {
11518                                warn!("Failed to export DGL graph: {}", e);
11519                            }
11520                        }
11521                    }
11522                    datasynth_config::schema::GraphExportFormat::RustGraph => {
11523                        use datasynth_graph::{
11524                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
11525                        };
11526
11527                        let rustgraph_config = RustGraphExportConfig {
11528                            include_features: true,
11529                            include_temporal: true,
11530                            include_labels: true,
11531                            source_name: "datasynth".to_string(),
11532                            batch_id: None,
11533                            output_format: RustGraphOutputFormat::JsonLines,
11534                            export_node_properties: true,
11535                            export_edge_properties: true,
11536                            pretty_print: false,
11537                        };
11538
11539                        let exporter = RustGraphExporter::new(rustgraph_config);
11540                        match exporter.export(&graph, &format_dir) {
11541                            Ok(metadata) => {
11542                                snapshot.exports.insert(
11543                                    format!("{}_{}", graph_type.name, "rustgraph"),
11544                                    GraphExportInfo {
11545                                        name: graph_type.name.clone(),
11546                                        format: "rustgraph".to_string(),
11547                                        output_path: format_dir.clone(),
11548                                        node_count: metadata.num_nodes,
11549                                        edge_count: metadata.num_edges,
11550                                    },
11551                                );
11552                                snapshot.graph_count += 1;
11553                            }
11554                            Err(e) => {
11555                                warn!("Failed to export RustGraph: {}", e);
11556                            }
11557                        }
11558                    }
11559                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
11560                        // Hypergraph export is handled separately in Phase 10b
11561                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
11562                    }
11563                }
11564            }
11565
11566            if let Some(pb) = &pb {
11567                pb.inc(40);
11568            }
11569        }
11570
11571        stats.graph_export_count = snapshot.graph_count;
11572        snapshot.exported = snapshot.graph_count > 0;
11573
11574        if let Some(pb) = pb {
11575            pb.finish_with_message(format!(
11576                "Graphs exported: {} graphs ({} nodes, {} edges)",
11577                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
11578            ));
11579        }
11580
11581        Ok(snapshot)
11582    }
11583
11584    /// Build additional graph types (banking, approval, entity) when relevant data
11585    /// is available. These run as a late phase because the data they need (banking
11586    /// snapshot, intercompany snapshot) is only generated after the main graph
11587    /// export phase.
11588    fn build_additional_graphs(
11589        &self,
11590        banking: &BankingSnapshot,
11591        intercompany: &IntercompanySnapshot,
11592        entries: &[JournalEntry],
11593        stats: &mut EnhancedGenerationStatistics,
11594    ) {
11595        let output_dir = self
11596            .output_path
11597            .clone()
11598            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11599        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11600
11601        // Banking graph: build when banking customers and transactions exist
11602        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
11603            info!("Phase 10c: Building banking network graph");
11604            let config = BankingGraphConfig::default();
11605            let mut builder = BankingGraphBuilder::new(config);
11606            builder.add_customers(&banking.customers);
11607            builder.add_accounts(&banking.accounts, &banking.customers);
11608            builder.add_transactions(&banking.transactions);
11609            let graph = builder.build();
11610
11611            let node_count = graph.node_count();
11612            let edge_count = graph.edge_count();
11613            stats.graph_node_count += node_count;
11614            stats.graph_edge_count += edge_count;
11615
11616            // Export as PyG if configured
11617            for format in &self.config.graph_export.formats {
11618                if matches!(
11619                    format,
11620                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11621                ) {
11622                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
11623                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11624                        warn!("Failed to create banking graph output dir: {}", e);
11625                        continue;
11626                    }
11627                    let pyg_config = PyGExportConfig::default();
11628                    let exporter = PyGExporter::new(pyg_config);
11629                    if let Err(e) = exporter.export(&graph, &format_dir) {
11630                        warn!("Failed to export banking graph as PyG: {}", e);
11631                    } else {
11632                        info!(
11633                            "Banking network graph exported: {} nodes, {} edges",
11634                            node_count, edge_count
11635                        );
11636                    }
11637                }
11638            }
11639        }
11640
11641        // Approval graph: build from journal entry approval workflows
11642        let approval_entries: Vec<_> = entries
11643            .iter()
11644            .filter(|je| je.header.approval_workflow.is_some())
11645            .collect();
11646
11647        if !approval_entries.is_empty() {
11648            info!(
11649                "Phase 10c: Building approval network graph ({} entries with approvals)",
11650                approval_entries.len()
11651            );
11652            let config = ApprovalGraphConfig::default();
11653            let mut builder = ApprovalGraphBuilder::new(config);
11654
11655            for je in &approval_entries {
11656                if let Some(ref wf) = je.header.approval_workflow {
11657                    for action in &wf.actions {
11658                        let record = datasynth_core::models::ApprovalRecord {
11659                            approval_id: format!(
11660                                "APR-{}-{}",
11661                                je.header.document_id, action.approval_level
11662                            ),
11663                            document_number: je.header.document_id.to_string(),
11664                            document_type: "JE".to_string(),
11665                            company_code: je.company_code().to_string(),
11666                            requester_id: wf.preparer_id.clone(),
11667                            requester_name: Some(wf.preparer_name.clone()),
11668                            approver_id: action.actor_id.clone(),
11669                            approver_name: action.actor_name.clone(),
11670                            approval_date: je.posting_date(),
11671                            action: format!("{:?}", action.action),
11672                            amount: wf.amount,
11673                            approval_limit: None,
11674                            comments: action.comments.clone(),
11675                            delegation_from: None,
11676                            is_auto_approved: false,
11677                        };
11678                        builder.add_approval(&record);
11679                    }
11680                }
11681            }
11682
11683            let graph = builder.build();
11684            let node_count = graph.node_count();
11685            let edge_count = graph.edge_count();
11686            stats.graph_node_count += node_count;
11687            stats.graph_edge_count += edge_count;
11688
11689            // Export as PyG if configured
11690            for format in &self.config.graph_export.formats {
11691                if matches!(
11692                    format,
11693                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11694                ) {
11695                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
11696                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11697                        warn!("Failed to create approval graph output dir: {}", e);
11698                        continue;
11699                    }
11700                    let pyg_config = PyGExportConfig::default();
11701                    let exporter = PyGExporter::new(pyg_config);
11702                    if let Err(e) = exporter.export(&graph, &format_dir) {
11703                        warn!("Failed to export approval graph as PyG: {}", e);
11704                    } else {
11705                        info!(
11706                            "Approval network graph exported: {} nodes, {} edges",
11707                            node_count, edge_count
11708                        );
11709                    }
11710                }
11711            }
11712        }
11713
11714        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
11715        if self.config.companies.len() >= 2 {
11716            info!(
11717                "Phase 10c: Building entity relationship graph ({} companies)",
11718                self.config.companies.len()
11719            );
11720
11721            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11722                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
11723
11724            // Map CompanyConfig → Company objects
11725            let parent_code = &self.config.companies[0].code;
11726            let mut companies: Vec<datasynth_core::models::Company> =
11727                Vec::with_capacity(self.config.companies.len());
11728
11729            // First company is the parent
11730            let first = &self.config.companies[0];
11731            companies.push(datasynth_core::models::Company::parent(
11732                &first.code,
11733                &first.name,
11734                &first.country,
11735                &first.currency,
11736            ));
11737
11738            // Remaining companies are subsidiaries (100% owned by parent)
11739            for cc in self.config.companies.iter().skip(1) {
11740                companies.push(datasynth_core::models::Company::subsidiary(
11741                    &cc.code,
11742                    &cc.name,
11743                    &cc.country,
11744                    &cc.currency,
11745                    parent_code,
11746                    rust_decimal::Decimal::from(100),
11747                ));
11748            }
11749
11750            // Build IntercompanyRelationship records (same logic as phase_intercompany)
11751            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
11752                self.config
11753                    .companies
11754                    .iter()
11755                    .skip(1)
11756                    .enumerate()
11757                    .map(|(i, cc)| {
11758                        let mut rel =
11759                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
11760                                format!("REL{:03}", i + 1),
11761                                parent_code.clone(),
11762                                cc.code.clone(),
11763                                rust_decimal::Decimal::from(100),
11764                                start_date,
11765                            );
11766                        rel.functional_currency = cc.currency.clone();
11767                        rel
11768                    })
11769                    .collect();
11770
11771            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
11772            builder.add_companies(&companies);
11773            builder.add_ownership_relationships(&relationships);
11774
11775            // Thread IC matched-pair transaction edges into the entity graph
11776            for pair in &intercompany.matched_pairs {
11777                builder.add_intercompany_edge(
11778                    &pair.seller_company,
11779                    &pair.buyer_company,
11780                    pair.amount,
11781                    &format!("{:?}", pair.transaction_type),
11782                );
11783            }
11784
11785            let graph = builder.build();
11786            let node_count = graph.node_count();
11787            let edge_count = graph.edge_count();
11788            stats.graph_node_count += node_count;
11789            stats.graph_edge_count += edge_count;
11790
11791            // Export as PyG if configured
11792            for format in &self.config.graph_export.formats {
11793                if matches!(
11794                    format,
11795                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11796                ) {
11797                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
11798                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11799                        warn!("Failed to create entity graph output dir: {}", e);
11800                        continue;
11801                    }
11802                    let pyg_config = PyGExportConfig::default();
11803                    let exporter = PyGExporter::new(pyg_config);
11804                    if let Err(e) = exporter.export(&graph, &format_dir) {
11805                        warn!("Failed to export entity graph as PyG: {}", e);
11806                    } else {
11807                        info!(
11808                            "Entity relationship graph exported: {} nodes, {} edges",
11809                            node_count, edge_count
11810                        );
11811                    }
11812                }
11813            }
11814        } else {
11815            debug!(
11816                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
11817                self.config.companies.len()
11818            );
11819        }
11820    }
11821
11822    /// Export a multi-layer hypergraph for RustGraph integration.
11823    ///
11824    /// Builds a 3-layer hypergraph:
11825    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
11826    /// - Layer 2: Process Events (all process family document flows + OCPM events)
11827    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
11828    #[allow(clippy::too_many_arguments)]
11829    fn export_hypergraph(
11830        &self,
11831        coa: &Arc<ChartOfAccounts>,
11832        entries: &[JournalEntry],
11833        document_flows: &DocumentFlowSnapshot,
11834        sourcing: &SourcingSnapshot,
11835        hr: &HrSnapshot,
11836        manufacturing: &ManufacturingSnapshot,
11837        banking: &BankingSnapshot,
11838        audit: &AuditSnapshot,
11839        financial_reporting: &FinancialReportingSnapshot,
11840        ocpm: &OcpmSnapshot,
11841        compliance: &ComplianceRegulationsSnapshot,
11842        stats: &mut EnhancedGenerationStatistics,
11843    ) -> SynthResult<HypergraphExportInfo> {
11844        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
11845        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
11846        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
11847        use datasynth_graph::models::hypergraph::AggregationStrategy;
11848
11849        let hg_settings = &self.config.graph_export.hypergraph;
11850
11851        // Parse aggregation strategy from config string
11852        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
11853            "truncate" => AggregationStrategy::Truncate,
11854            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
11855            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
11856            "importance_sample" => AggregationStrategy::ImportanceSample,
11857            _ => AggregationStrategy::PoolByCounterparty,
11858        };
11859
11860        let builder_config = HypergraphConfig {
11861            max_nodes: hg_settings.max_nodes,
11862            aggregation_strategy,
11863            include_coso: hg_settings.governance_layer.include_coso,
11864            include_controls: hg_settings.governance_layer.include_controls,
11865            include_sox: hg_settings.governance_layer.include_sox,
11866            include_vendors: hg_settings.governance_layer.include_vendors,
11867            include_customers: hg_settings.governance_layer.include_customers,
11868            include_employees: hg_settings.governance_layer.include_employees,
11869            include_p2p: hg_settings.process_layer.include_p2p,
11870            include_o2c: hg_settings.process_layer.include_o2c,
11871            include_s2c: hg_settings.process_layer.include_s2c,
11872            include_h2r: hg_settings.process_layer.include_h2r,
11873            include_mfg: hg_settings.process_layer.include_mfg,
11874            include_bank: hg_settings.process_layer.include_bank,
11875            include_audit: hg_settings.process_layer.include_audit,
11876            include_r2r: hg_settings.process_layer.include_r2r,
11877            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
11878            docs_per_counterparty_threshold: hg_settings
11879                .process_layer
11880                .docs_per_counterparty_threshold,
11881            include_accounts: hg_settings.accounting_layer.include_accounts,
11882            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
11883            include_cross_layer_edges: hg_settings.cross_layer.enabled,
11884            include_compliance: self.config.compliance_regulations.enabled,
11885            include_tax: true,
11886            include_treasury: true,
11887            include_esg: true,
11888            include_project: true,
11889            include_intercompany: true,
11890            include_temporal_events: true,
11891        };
11892
11893        let mut builder = HypergraphBuilder::new(builder_config);
11894
11895        // Layer 1: Governance & Controls
11896        builder.add_coso_framework();
11897
11898        // Add controls if available (generated during JE generation)
11899        // Controls are generated per-company; we use the standard set
11900        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
11901            let controls = InternalControl::standard_controls();
11902            builder.add_controls(&controls);
11903        }
11904
11905        // Add master data
11906        builder.add_vendors(&self.master_data.vendors);
11907        builder.add_customers(&self.master_data.customers);
11908        builder.add_employees(&self.master_data.employees);
11909
11910        // Layer 2: Process Events (all process families)
11911        builder.add_p2p_documents(
11912            &document_flows.purchase_orders,
11913            &document_flows.goods_receipts,
11914            &document_flows.vendor_invoices,
11915            &document_flows.payments,
11916        );
11917        builder.add_o2c_documents(
11918            &document_flows.sales_orders,
11919            &document_flows.deliveries,
11920            &document_flows.customer_invoices,
11921        );
11922        builder.add_s2c_documents(
11923            &sourcing.sourcing_projects,
11924            &sourcing.qualifications,
11925            &sourcing.rfx_events,
11926            &sourcing.bids,
11927            &sourcing.bid_evaluations,
11928            &sourcing.contracts,
11929        );
11930        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
11931        builder.add_mfg_documents(
11932            &manufacturing.production_orders,
11933            &manufacturing.quality_inspections,
11934            &manufacturing.cycle_counts,
11935        );
11936        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
11937        builder.add_audit_documents(
11938            &audit.engagements,
11939            &audit.workpapers,
11940            &audit.findings,
11941            &audit.evidence,
11942            &audit.risk_assessments,
11943            &audit.judgments,
11944            &audit.materiality_calculations,
11945            &audit.audit_opinions,
11946            &audit.going_concern_assessments,
11947        );
11948        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
11949
11950        // OCPM events as hyperedges
11951        if let Some(ref event_log) = ocpm.event_log {
11952            builder.add_ocpm_events(event_log);
11953        }
11954
11955        // Compliance regulations as cross-layer nodes
11956        if self.config.compliance_regulations.enabled
11957            && hg_settings.governance_layer.include_controls
11958        {
11959            // Reconstruct ComplianceStandard objects from the registry
11960            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
11961            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
11962                .standard_records
11963                .iter()
11964                .filter_map(|r| {
11965                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
11966                    registry.get(&sid).cloned()
11967                })
11968                .collect();
11969
11970            builder.add_compliance_regulations(
11971                &standards,
11972                &compliance.findings,
11973                &compliance.filings,
11974            );
11975        }
11976
11977        // Layer 3: Accounting Network
11978        builder.add_accounts(coa);
11979        builder.add_journal_entries_as_hyperedges(entries);
11980
11981        // Build the hypergraph
11982        let hypergraph = builder.build();
11983
11984        // Export
11985        let output_dir = self
11986            .output_path
11987            .clone()
11988            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11989        let hg_dir = output_dir
11990            .join(&self.config.graph_export.output_subdirectory)
11991            .join(&hg_settings.output_subdirectory);
11992
11993        // Branch on output format
11994        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
11995            "unified" => {
11996                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11997                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11998                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
11999                })?;
12000                (
12001                    metadata.num_nodes,
12002                    metadata.num_edges,
12003                    metadata.num_hyperedges,
12004                )
12005            }
12006            _ => {
12007                // "native" or any unrecognized format → use existing exporter
12008                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12009                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12010                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12011                })?;
12012                (
12013                    metadata.num_nodes,
12014                    metadata.num_edges,
12015                    metadata.num_hyperedges,
12016                )
12017            }
12018        };
12019
12020        // Stream to RustGraph ingest endpoint if configured
12021        #[cfg(feature = "streaming")]
12022        if let Some(ref target_url) = hg_settings.stream_target {
12023            use crate::stream_client::{StreamClient, StreamConfig};
12024            use std::io::Write as _;
12025
12026            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12027            let stream_config = StreamConfig {
12028                target_url: target_url.clone(),
12029                batch_size: hg_settings.stream_batch_size,
12030                api_key,
12031                ..StreamConfig::default()
12032            };
12033
12034            match StreamClient::new(stream_config) {
12035                Ok(mut client) => {
12036                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12037                    match exporter.export_to_writer(&hypergraph, &mut client) {
12038                        Ok(_) => {
12039                            if let Err(e) = client.flush() {
12040                                warn!("Failed to flush stream client: {}", e);
12041                            } else {
12042                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12043                            }
12044                        }
12045                        Err(e) => {
12046                            warn!("Streaming export failed: {}", e);
12047                        }
12048                    }
12049                }
12050                Err(e) => {
12051                    warn!("Failed to create stream client: {}", e);
12052                }
12053            }
12054        }
12055
12056        // Update stats
12057        stats.graph_node_count += num_nodes;
12058        stats.graph_edge_count += num_edges;
12059        stats.graph_export_count += 1;
12060
12061        Ok(HypergraphExportInfo {
12062            node_count: num_nodes,
12063            edge_count: num_edges,
12064            hyperedge_count: num_hyperedges,
12065            output_path: hg_dir,
12066        })
12067    }
12068
12069    /// Generate banking KYC/AML data.
12070    ///
12071    /// Creates banking customers, accounts, and transactions with AML typology injection.
12072    /// Uses the BankingOrchestrator from synth-banking crate.
12073    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12074        let pb = self.create_progress_bar(100, "Generating Banking Data");
12075
12076        // Build the banking orchestrator from config
12077        let orchestrator = BankingOrchestratorBuilder::new()
12078            .config(self.config.banking.clone())
12079            .seed(self.seed + 9000)
12080            .country_pack(self.primary_pack().clone())
12081            .build();
12082
12083        if let Some(pb) = &pb {
12084            pb.inc(10);
12085        }
12086
12087        // Generate the banking data
12088        let result = orchestrator.generate();
12089
12090        if let Some(pb) = &pb {
12091            pb.inc(90);
12092            pb.finish_with_message(format!(
12093                "Banking: {} customers, {} transactions",
12094                result.customers.len(),
12095                result.transactions.len()
12096            ));
12097        }
12098
12099        // Cross-reference banking customers with core master data so that
12100        // banking customer names align with the enterprise customer list.
12101        // We rotate through core customers, overlaying their name and country
12102        // onto the generated banking customers where possible.
12103        let mut banking_customers = result.customers;
12104        let core_customers = &self.master_data.customers;
12105        if !core_customers.is_empty() {
12106            for (i, bc) in banking_customers.iter_mut().enumerate() {
12107                let core = &core_customers[i % core_customers.len()];
12108                bc.name = CustomerName::business(&core.name);
12109                bc.residence_country = core.country.clone();
12110                bc.enterprise_customer_id = Some(core.customer_id.clone());
12111            }
12112            debug!(
12113                "Cross-referenced {} banking customers with {} core customers",
12114                banking_customers.len(),
12115                core_customers.len()
12116            );
12117        }
12118
12119        Ok(BankingSnapshot {
12120            customers: banking_customers,
12121            accounts: result.accounts,
12122            transactions: result.transactions,
12123            transaction_labels: result.transaction_labels,
12124            customer_labels: result.customer_labels,
12125            account_labels: result.account_labels,
12126            relationship_labels: result.relationship_labels,
12127            narratives: result.narratives,
12128            suspicious_count: result.stats.suspicious_count,
12129            scenario_count: result.scenarios.len(),
12130        })
12131    }
12132
12133    /// Calculate total transactions to generate.
12134    fn calculate_total_transactions(&self) -> u64 {
12135        let months = self.config.global.period_months as f64;
12136        self.config
12137            .companies
12138            .iter()
12139            .map(|c| {
12140                let annual = c.annual_transaction_volume.count() as f64;
12141                let weighted = annual * c.volume_weight;
12142                (weighted * months / 12.0) as u64
12143            })
12144            .sum()
12145    }
12146
12147    /// Create a progress bar if progress display is enabled.
12148    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12149        if !self.phase_config.show_progress {
12150            return None;
12151        }
12152
12153        let pb = if let Some(mp) = &self.multi_progress {
12154            mp.add(ProgressBar::new(total))
12155        } else {
12156            ProgressBar::new(total)
12157        };
12158
12159        pb.set_style(
12160            ProgressStyle::default_bar()
12161                .template(&format!(
12162                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12163                ))
12164                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12165                .progress_chars("#>-"),
12166        );
12167
12168        Some(pb)
12169    }
12170
12171    /// Get the generated chart of accounts.
12172    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12173        self.coa.clone()
12174    }
12175
12176    /// Get the generated master data.
12177    pub fn get_master_data(&self) -> &MasterDataSnapshot {
12178        &self.master_data
12179    }
12180
12181    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
12182    fn phase_compliance_regulations(
12183        &mut self,
12184        _stats: &mut EnhancedGenerationStatistics,
12185    ) -> SynthResult<ComplianceRegulationsSnapshot> {
12186        if !self.phase_config.generate_compliance_regulations {
12187            return Ok(ComplianceRegulationsSnapshot::default());
12188        }
12189
12190        info!("Phase: Generating Compliance Regulations Data");
12191
12192        let cr_config = &self.config.compliance_regulations;
12193
12194        // Determine jurisdictions: from config or inferred from companies
12195        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12196            self.config
12197                .companies
12198                .iter()
12199                .map(|c| c.country.clone())
12200                .collect::<std::collections::HashSet<_>>()
12201                .into_iter()
12202                .collect()
12203        } else {
12204            cr_config.jurisdictions.clone()
12205        };
12206
12207        // Determine reference date
12208        let fallback_date =
12209            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12210        let reference_date = cr_config
12211            .reference_date
12212            .as_ref()
12213            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12214            .unwrap_or_else(|| {
12215                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12216                    .unwrap_or(fallback_date)
12217            });
12218
12219        // Generate standards registry data
12220        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12221        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12222        let cross_reference_records = reg_gen.generate_cross_reference_records();
12223        let jurisdiction_records =
12224            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12225
12226        info!(
12227            "  Standards: {} records, {} cross-references, {} jurisdictions",
12228            standard_records.len(),
12229            cross_reference_records.len(),
12230            jurisdiction_records.len()
12231        );
12232
12233        // Generate audit procedures (if enabled)
12234        let audit_procedures = if cr_config.audit_procedures.enabled {
12235            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12236                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12237                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12238                confidence_level: cr_config.audit_procedures.confidence_level,
12239                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12240            };
12241            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12242                self.seed + 9000,
12243                proc_config,
12244            );
12245            let registry = reg_gen.registry();
12246            let mut all_procs = Vec::new();
12247            for jurisdiction in &jurisdictions {
12248                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12249                all_procs.extend(procs);
12250            }
12251            info!("  Audit procedures: {}", all_procs.len());
12252            all_procs
12253        } else {
12254            Vec::new()
12255        };
12256
12257        // Generate compliance findings (if enabled)
12258        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12259            let finding_config =
12260                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
12261                    finding_rate: cr_config.findings.finding_rate,
12262                    material_weakness_rate: cr_config.findings.material_weakness_rate,
12263                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
12264                    generate_remediation: cr_config.findings.generate_remediation,
12265                };
12266            let mut finding_gen =
12267                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
12268                    self.seed + 9100,
12269                    finding_config,
12270                );
12271            let mut all_findings = Vec::new();
12272            for company in &self.config.companies {
12273                let company_findings =
12274                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
12275                all_findings.extend(company_findings);
12276            }
12277            info!("  Compliance findings: {}", all_findings.len());
12278            all_findings
12279        } else {
12280            Vec::new()
12281        };
12282
12283        // Generate regulatory filings (if enabled)
12284        let filings = if cr_config.filings.enabled {
12285            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12286                filing_types: cr_config.filings.filing_types.clone(),
12287                generate_status_progression: cr_config.filings.generate_status_progression,
12288            };
12289            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12290                self.seed + 9200,
12291                filing_config,
12292            );
12293            let company_codes: Vec<String> = self
12294                .config
12295                .companies
12296                .iter()
12297                .map(|c| c.code.clone())
12298                .collect();
12299            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12300                .unwrap_or(fallback_date);
12301            let filings = filing_gen.generate_filings(
12302                &company_codes,
12303                &jurisdictions,
12304                start_date,
12305                self.config.global.period_months,
12306            );
12307            info!("  Regulatory filings: {}", filings.len());
12308            filings
12309        } else {
12310            Vec::new()
12311        };
12312
12313        // Build compliance graph (if enabled)
12314        let compliance_graph = if cr_config.graph.enabled {
12315            let graph_config = datasynth_graph::ComplianceGraphConfig {
12316                include_standard_nodes: cr_config.graph.include_compliance_nodes,
12317                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12318                include_cross_references: cr_config.graph.include_cross_references,
12319                include_supersession_edges: cr_config.graph.include_supersession_edges,
12320                include_account_links: cr_config.graph.include_account_links,
12321                include_control_links: cr_config.graph.include_control_links,
12322                include_company_links: cr_config.graph.include_company_links,
12323            };
12324            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12325
12326            // Add standard nodes
12327            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12328                .iter()
12329                .map(|r| datasynth_graph::StandardNodeInput {
12330                    standard_id: r.standard_id.clone(),
12331                    title: r.title.clone(),
12332                    category: r.category.clone(),
12333                    domain: r.domain.clone(),
12334                    is_active: r.is_active,
12335                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
12336                    applicable_account_types: r.applicable_account_types.clone(),
12337                    applicable_processes: r.applicable_processes.clone(),
12338                })
12339                .collect();
12340            builder.add_standards(&standard_inputs);
12341
12342            // Add jurisdiction nodes
12343            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12344                jurisdiction_records
12345                    .iter()
12346                    .map(|r| datasynth_graph::JurisdictionNodeInput {
12347                        country_code: r.country_code.clone(),
12348                        country_name: r.country_name.clone(),
12349                        framework: r.accounting_framework.clone(),
12350                        standard_count: r.standard_count,
12351                        tax_rate: r.statutory_tax_rate,
12352                    })
12353                    .collect();
12354            builder.add_jurisdictions(&jurisdiction_inputs);
12355
12356            // Add cross-reference edges
12357            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12358                cross_reference_records
12359                    .iter()
12360                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12361                        from_standard: r.from_standard.clone(),
12362                        to_standard: r.to_standard.clone(),
12363                        relationship: r.relationship.clone(),
12364                        convergence_level: r.convergence_level,
12365                    })
12366                    .collect();
12367            builder.add_cross_references(&xref_inputs);
12368
12369            // Add jurisdiction→standard mappings
12370            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12371                .iter()
12372                .map(|r| datasynth_graph::JurisdictionMappingInput {
12373                    country_code: r.jurisdiction.clone(),
12374                    standard_id: r.standard_id.clone(),
12375                })
12376                .collect();
12377            builder.add_jurisdiction_mappings(&mapping_inputs);
12378
12379            // Add procedure nodes
12380            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
12381                .iter()
12382                .map(|p| datasynth_graph::ProcedureNodeInput {
12383                    procedure_id: p.procedure_id.clone(),
12384                    standard_id: p.standard_id.clone(),
12385                    procedure_type: p.procedure_type.clone(),
12386                    sample_size: p.sample_size,
12387                    confidence_level: p.confidence_level,
12388                })
12389                .collect();
12390            builder.add_procedures(&proc_inputs);
12391
12392            // Add finding nodes
12393            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
12394                .iter()
12395                .map(|f| datasynth_graph::FindingNodeInput {
12396                    finding_id: f.finding_id.to_string(),
12397                    standard_id: f
12398                        .related_standards
12399                        .first()
12400                        .map(|s| s.as_str().to_string())
12401                        .unwrap_or_default(),
12402                    severity: f.severity.to_string(),
12403                    deficiency_level: f.deficiency_level.to_string(),
12404                    severity_score: f.deficiency_level.severity_score(),
12405                    control_id: f.control_id.clone(),
12406                    affected_accounts: f.affected_accounts.clone(),
12407                })
12408                .collect();
12409            builder.add_findings(&finding_inputs);
12410
12411            // Cross-domain: link standards to accounts from chart of accounts
12412            if cr_config.graph.include_account_links {
12413                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12414                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
12415                for std_record in &standard_records {
12416                    if let Some(std_obj) =
12417                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
12418                            &std_record.standard_id,
12419                        ))
12420                    {
12421                        for acct_type in &std_obj.applicable_account_types {
12422                            account_links.push(datasynth_graph::AccountLinkInput {
12423                                standard_id: std_record.standard_id.clone(),
12424                                account_code: acct_type.clone(),
12425                                account_name: acct_type.clone(),
12426                            });
12427                        }
12428                    }
12429                }
12430                builder.add_account_links(&account_links);
12431            }
12432
12433            // Cross-domain: link standards to internal controls
12434            if cr_config.graph.include_control_links {
12435                let mut control_links = Vec::new();
12436                // SOX/PCAOB standards link to all controls
12437                let sox_like_ids: Vec<String> = standard_records
12438                    .iter()
12439                    .filter(|r| {
12440                        r.standard_id.starts_with("SOX")
12441                            || r.standard_id.starts_with("PCAOB-AS-2201")
12442                    })
12443                    .map(|r| r.standard_id.clone())
12444                    .collect();
12445                // Get control IDs from config (C001-C060 standard controls)
12446                let control_ids = [
12447                    ("C001", "Cash Controls"),
12448                    ("C002", "Large Transaction Approval"),
12449                    ("C010", "PO Approval"),
12450                    ("C011", "Three-Way Match"),
12451                    ("C020", "Revenue Recognition"),
12452                    ("C021", "Credit Check"),
12453                    ("C030", "Manual JE Approval"),
12454                    ("C031", "Period Close Review"),
12455                    ("C032", "Account Reconciliation"),
12456                    ("C040", "Payroll Processing"),
12457                    ("C050", "Fixed Asset Capitalization"),
12458                    ("C060", "Intercompany Elimination"),
12459                ];
12460                for sox_id in &sox_like_ids {
12461                    for (ctrl_id, ctrl_name) in &control_ids {
12462                        control_links.push(datasynth_graph::ControlLinkInput {
12463                            standard_id: sox_id.clone(),
12464                            control_id: ctrl_id.to_string(),
12465                            control_name: ctrl_name.to_string(),
12466                        });
12467                    }
12468                }
12469                builder.add_control_links(&control_links);
12470            }
12471
12472            // Cross-domain: filing nodes with company links
12473            if cr_config.graph.include_company_links {
12474                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
12475                    .iter()
12476                    .enumerate()
12477                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
12478                        filing_id: format!("F{:04}", i + 1),
12479                        filing_type: f.filing_type.to_string(),
12480                        company_code: f.company_code.clone(),
12481                        jurisdiction: f.jurisdiction.clone(),
12482                        status: format!("{:?}", f.status),
12483                    })
12484                    .collect();
12485                builder.add_filings(&filing_inputs);
12486            }
12487
12488            let graph = builder.build();
12489            info!(
12490                "  Compliance graph: {} nodes, {} edges",
12491                graph.nodes.len(),
12492                graph.edges.len()
12493            );
12494            Some(graph)
12495        } else {
12496            None
12497        };
12498
12499        self.check_resources_with_log("post-compliance-regulations")?;
12500
12501        Ok(ComplianceRegulationsSnapshot {
12502            standard_records,
12503            cross_reference_records,
12504            jurisdiction_records,
12505            audit_procedures,
12506            findings,
12507            filings,
12508            compliance_graph,
12509        })
12510    }
12511
12512    /// Build a lineage graph describing config → phase → output relationships.
12513    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
12514        use super::lineage::LineageGraphBuilder;
12515
12516        let mut builder = LineageGraphBuilder::new();
12517
12518        // Config sections
12519        builder.add_config_section("config:global", "Global Config");
12520        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
12521        builder.add_config_section("config:transactions", "Transaction Config");
12522
12523        // Generator phases
12524        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
12525        builder.add_generator_phase("phase:je", "Journal Entry Generation");
12526
12527        // Config → phase edges
12528        builder.configured_by("phase:coa", "config:chart_of_accounts");
12529        builder.configured_by("phase:je", "config:transactions");
12530
12531        // Output files
12532        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
12533        builder.produced_by("output:je", "phase:je");
12534
12535        // Optional phases based on config
12536        if self.phase_config.generate_master_data {
12537            builder.add_config_section("config:master_data", "Master Data Config");
12538            builder.add_generator_phase("phase:master_data", "Master Data Generation");
12539            builder.configured_by("phase:master_data", "config:master_data");
12540            builder.input_to("phase:master_data", "phase:je");
12541        }
12542
12543        if self.phase_config.generate_document_flows {
12544            builder.add_config_section("config:document_flows", "Document Flow Config");
12545            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
12546            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
12547            builder.configured_by("phase:p2p", "config:document_flows");
12548            builder.configured_by("phase:o2c", "config:document_flows");
12549
12550            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
12551            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
12552            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
12553            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
12554            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
12555
12556            builder.produced_by("output:po", "phase:p2p");
12557            builder.produced_by("output:gr", "phase:p2p");
12558            builder.produced_by("output:vi", "phase:p2p");
12559            builder.produced_by("output:so", "phase:o2c");
12560            builder.produced_by("output:ci", "phase:o2c");
12561        }
12562
12563        if self.phase_config.inject_anomalies {
12564            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
12565            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
12566            builder.configured_by("phase:anomaly", "config:fraud");
12567            builder.add_output_file(
12568                "output:labels",
12569                "Anomaly Labels",
12570                "labels/anomaly_labels.csv",
12571            );
12572            builder.produced_by("output:labels", "phase:anomaly");
12573        }
12574
12575        if self.phase_config.generate_audit {
12576            builder.add_config_section("config:audit", "Audit Config");
12577            builder.add_generator_phase("phase:audit", "Audit Data Generation");
12578            builder.configured_by("phase:audit", "config:audit");
12579        }
12580
12581        if self.phase_config.generate_banking {
12582            builder.add_config_section("config:banking", "Banking Config");
12583            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
12584            builder.configured_by("phase:banking", "config:banking");
12585        }
12586
12587        if self.config.llm.enabled {
12588            builder.add_config_section("config:llm", "LLM Enrichment Config");
12589            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
12590            builder.configured_by("phase:llm_enrichment", "config:llm");
12591        }
12592
12593        if self.config.diffusion.enabled {
12594            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
12595            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
12596            builder.configured_by("phase:diffusion", "config:diffusion");
12597        }
12598
12599        if self.config.causal.enabled {
12600            builder.add_config_section("config:causal", "Causal Generation Config");
12601            builder.add_generator_phase("phase:causal", "Causal Overlay");
12602            builder.configured_by("phase:causal", "config:causal");
12603        }
12604
12605        builder.build()
12606    }
12607
12608    // -----------------------------------------------------------------------
12609    // Trial-balance helpers used to replace hardcoded proxy values
12610    // -----------------------------------------------------------------------
12611
12612    /// Compute total revenue for a company from its journal entries.
12613    ///
12614    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
12615    /// net credits on all revenue-account lines filtered to `company_code`.
12616    fn compute_company_revenue(
12617        entries: &[JournalEntry],
12618        company_code: &str,
12619    ) -> rust_decimal::Decimal {
12620        use rust_decimal::Decimal;
12621        let mut revenue = Decimal::ZERO;
12622        for je in entries {
12623            if je.header.company_code != company_code {
12624                continue;
12625            }
12626            for line in &je.lines {
12627                if line.gl_account.starts_with('4') {
12628                    // Revenue is credit-normal
12629                    revenue += line.credit_amount - line.debit_amount;
12630                }
12631            }
12632        }
12633        revenue.max(Decimal::ZERO)
12634    }
12635
12636    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
12637    ///
12638    /// Asset accounts start with "1"; liability accounts start with "2".
12639    fn compute_entity_net_assets(
12640        entries: &[JournalEntry],
12641        entity_code: &str,
12642    ) -> rust_decimal::Decimal {
12643        use rust_decimal::Decimal;
12644        let mut asset_net = Decimal::ZERO;
12645        let mut liability_net = Decimal::ZERO;
12646        for je in entries {
12647            if je.header.company_code != entity_code {
12648                continue;
12649            }
12650            for line in &je.lines {
12651                if line.gl_account.starts_with('1') {
12652                    asset_net += line.debit_amount - line.credit_amount;
12653                } else if line.gl_account.starts_with('2') {
12654                    liability_net += line.credit_amount - line.debit_amount;
12655                }
12656            }
12657        }
12658        asset_net - liability_net
12659    }
12660}
12661
12662/// Get the directory name for a graph export format.
12663fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
12664    match format {
12665        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
12666        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
12667        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
12668        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
12669        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
12670    }
12671}
12672
12673#[cfg(test)]
12674#[allow(clippy::unwrap_used)]
12675mod tests {
12676    use super::*;
12677    use datasynth_config::schema::*;
12678
12679    fn create_test_config() -> GeneratorConfig {
12680        GeneratorConfig {
12681            global: GlobalConfig {
12682                industry: IndustrySector::Manufacturing,
12683                start_date: "2024-01-01".to_string(),
12684                period_months: 1,
12685                seed: Some(42),
12686                parallel: false,
12687                group_currency: "USD".to_string(),
12688                presentation_currency: None,
12689                worker_threads: 0,
12690                memory_limit_mb: 0,
12691                fiscal_year_months: None,
12692            },
12693            companies: vec![CompanyConfig {
12694                code: "1000".to_string(),
12695                name: "Test Company".to_string(),
12696                currency: "USD".to_string(),
12697                functional_currency: None,
12698                country: "US".to_string(),
12699                annual_transaction_volume: TransactionVolume::TenK,
12700                volume_weight: 1.0,
12701                fiscal_year_variant: "K4".to_string(),
12702            }],
12703            chart_of_accounts: ChartOfAccountsConfig {
12704                complexity: CoAComplexity::Small,
12705                industry_specific: true,
12706                custom_accounts: None,
12707                min_hierarchy_depth: 2,
12708                max_hierarchy_depth: 4,
12709            },
12710            transactions: TransactionConfig::default(),
12711            output: OutputConfig::default(),
12712            fraud: FraudConfig::default(),
12713            internal_controls: InternalControlsConfig::default(),
12714            business_processes: BusinessProcessConfig::default(),
12715            user_personas: UserPersonaConfig::default(),
12716            templates: TemplateConfig::default(),
12717            approval: ApprovalConfig::default(),
12718            departments: DepartmentConfig::default(),
12719            master_data: MasterDataConfig::default(),
12720            document_flows: DocumentFlowConfig::default(),
12721            intercompany: IntercompanyConfig::default(),
12722            balance: BalanceConfig::default(),
12723            ocpm: OcpmConfig::default(),
12724            audit: AuditGenerationConfig::default(),
12725            banking: datasynth_banking::BankingConfig::default(),
12726            data_quality: DataQualitySchemaConfig::default(),
12727            scenario: ScenarioConfig::default(),
12728            temporal: TemporalDriftConfig::default(),
12729            graph_export: GraphExportConfig::default(),
12730            streaming: StreamingSchemaConfig::default(),
12731            rate_limit: RateLimitSchemaConfig::default(),
12732            temporal_attributes: TemporalAttributeSchemaConfig::default(),
12733            relationships: RelationshipSchemaConfig::default(),
12734            accounting_standards: AccountingStandardsConfig::default(),
12735            audit_standards: AuditStandardsConfig::default(),
12736            distributions: Default::default(),
12737            temporal_patterns: Default::default(),
12738            vendor_network: VendorNetworkSchemaConfig::default(),
12739            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
12740            relationship_strength: RelationshipStrengthSchemaConfig::default(),
12741            cross_process_links: CrossProcessLinksSchemaConfig::default(),
12742            organizational_events: OrganizationalEventsSchemaConfig::default(),
12743            behavioral_drift: BehavioralDriftSchemaConfig::default(),
12744            market_drift: MarketDriftSchemaConfig::default(),
12745            drift_labeling: DriftLabelingSchemaConfig::default(),
12746            anomaly_injection: Default::default(),
12747            industry_specific: Default::default(),
12748            fingerprint_privacy: Default::default(),
12749            quality_gates: Default::default(),
12750            compliance: Default::default(),
12751            webhooks: Default::default(),
12752            llm: Default::default(),
12753            diffusion: Default::default(),
12754            causal: Default::default(),
12755            source_to_pay: Default::default(),
12756            financial_reporting: Default::default(),
12757            hr: Default::default(),
12758            manufacturing: Default::default(),
12759            sales_quotes: Default::default(),
12760            tax: Default::default(),
12761            treasury: Default::default(),
12762            project_accounting: Default::default(),
12763            esg: Default::default(),
12764            country_packs: None,
12765            scenarios: Default::default(),
12766            session: Default::default(),
12767            compliance_regulations: Default::default(),
12768        }
12769    }
12770
12771    #[test]
12772    fn test_enhanced_orchestrator_creation() {
12773        let config = create_test_config();
12774        let orchestrator = EnhancedOrchestrator::with_defaults(config);
12775        assert!(orchestrator.is_ok());
12776    }
12777
12778    #[test]
12779    fn test_minimal_generation() {
12780        let config = create_test_config();
12781        let phase_config = PhaseConfig {
12782            generate_master_data: false,
12783            generate_document_flows: false,
12784            generate_journal_entries: true,
12785            inject_anomalies: false,
12786            show_progress: false,
12787            ..Default::default()
12788        };
12789
12790        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12791        let result = orchestrator.generate();
12792
12793        assert!(result.is_ok());
12794        let result = result.unwrap();
12795        assert!(!result.journal_entries.is_empty());
12796    }
12797
12798    #[test]
12799    fn test_master_data_generation() {
12800        let config = create_test_config();
12801        let phase_config = PhaseConfig {
12802            generate_master_data: true,
12803            generate_document_flows: false,
12804            generate_journal_entries: false,
12805            inject_anomalies: false,
12806            show_progress: false,
12807            vendors_per_company: 5,
12808            customers_per_company: 5,
12809            materials_per_company: 10,
12810            assets_per_company: 5,
12811            employees_per_company: 10,
12812            ..Default::default()
12813        };
12814
12815        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12816        let result = orchestrator.generate().unwrap();
12817
12818        assert!(!result.master_data.vendors.is_empty());
12819        assert!(!result.master_data.customers.is_empty());
12820        assert!(!result.master_data.materials.is_empty());
12821    }
12822
12823    #[test]
12824    fn test_document_flow_generation() {
12825        let config = create_test_config();
12826        let phase_config = PhaseConfig {
12827            generate_master_data: true,
12828            generate_document_flows: true,
12829            generate_journal_entries: false,
12830            inject_anomalies: false,
12831            inject_data_quality: false,
12832            validate_balances: false,
12833            generate_ocpm_events: false,
12834            show_progress: false,
12835            vendors_per_company: 5,
12836            customers_per_company: 5,
12837            materials_per_company: 10,
12838            assets_per_company: 5,
12839            employees_per_company: 10,
12840            p2p_chains: 5,
12841            o2c_chains: 5,
12842            ..Default::default()
12843        };
12844
12845        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12846        let result = orchestrator.generate().unwrap();
12847
12848        // Should have generated P2P and O2C chains
12849        assert!(!result.document_flows.p2p_chains.is_empty());
12850        assert!(!result.document_flows.o2c_chains.is_empty());
12851
12852        // Flattened documents should be populated
12853        assert!(!result.document_flows.purchase_orders.is_empty());
12854        assert!(!result.document_flows.sales_orders.is_empty());
12855    }
12856
12857    #[test]
12858    fn test_anomaly_injection() {
12859        let config = create_test_config();
12860        let phase_config = PhaseConfig {
12861            generate_master_data: false,
12862            generate_document_flows: false,
12863            generate_journal_entries: true,
12864            inject_anomalies: true,
12865            show_progress: false,
12866            ..Default::default()
12867        };
12868
12869        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12870        let result = orchestrator.generate().unwrap();
12871
12872        // Should have journal entries
12873        assert!(!result.journal_entries.is_empty());
12874
12875        // With ~833 entries and 2% rate, expect some anomalies
12876        // Note: This is probabilistic, so we just verify the structure exists
12877        assert!(result.anomaly_labels.summary.is_some());
12878    }
12879
12880    #[test]
12881    fn test_full_generation_pipeline() {
12882        let config = create_test_config();
12883        let phase_config = PhaseConfig {
12884            generate_master_data: true,
12885            generate_document_flows: true,
12886            generate_journal_entries: true,
12887            inject_anomalies: false,
12888            inject_data_quality: false,
12889            validate_balances: true,
12890            generate_ocpm_events: false,
12891            show_progress: false,
12892            vendors_per_company: 3,
12893            customers_per_company: 3,
12894            materials_per_company: 5,
12895            assets_per_company: 3,
12896            employees_per_company: 5,
12897            p2p_chains: 3,
12898            o2c_chains: 3,
12899            ..Default::default()
12900        };
12901
12902        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12903        let result = orchestrator.generate().unwrap();
12904
12905        // All phases should have results
12906        assert!(!result.master_data.vendors.is_empty());
12907        assert!(!result.master_data.customers.is_empty());
12908        assert!(!result.document_flows.p2p_chains.is_empty());
12909        assert!(!result.document_flows.o2c_chains.is_empty());
12910        assert!(!result.journal_entries.is_empty());
12911        assert!(result.statistics.accounts_count > 0);
12912
12913        // Subledger linking should have run
12914        assert!(!result.subledger.ap_invoices.is_empty());
12915        assert!(!result.subledger.ar_invoices.is_empty());
12916
12917        // Balance validation should have run
12918        assert!(result.balance_validation.validated);
12919        assert!(result.balance_validation.entries_processed > 0);
12920    }
12921
12922    #[test]
12923    fn test_subledger_linking() {
12924        let config = create_test_config();
12925        let phase_config = PhaseConfig {
12926            generate_master_data: true,
12927            generate_document_flows: true,
12928            generate_journal_entries: false,
12929            inject_anomalies: false,
12930            inject_data_quality: false,
12931            validate_balances: false,
12932            generate_ocpm_events: false,
12933            show_progress: false,
12934            vendors_per_company: 5,
12935            customers_per_company: 5,
12936            materials_per_company: 10,
12937            assets_per_company: 3,
12938            employees_per_company: 5,
12939            p2p_chains: 5,
12940            o2c_chains: 5,
12941            ..Default::default()
12942        };
12943
12944        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12945        let result = orchestrator.generate().unwrap();
12946
12947        // Should have document flows
12948        assert!(!result.document_flows.vendor_invoices.is_empty());
12949        assert!(!result.document_flows.customer_invoices.is_empty());
12950
12951        // Subledger should be linked from document flows
12952        assert!(!result.subledger.ap_invoices.is_empty());
12953        assert!(!result.subledger.ar_invoices.is_empty());
12954
12955        // AP invoices count should match vendor invoices count
12956        assert_eq!(
12957            result.subledger.ap_invoices.len(),
12958            result.document_flows.vendor_invoices.len()
12959        );
12960
12961        // AR invoices count should match customer invoices count
12962        assert_eq!(
12963            result.subledger.ar_invoices.len(),
12964            result.document_flows.customer_invoices.len()
12965        );
12966
12967        // Statistics should reflect subledger counts
12968        assert_eq!(
12969            result.statistics.ap_invoice_count,
12970            result.subledger.ap_invoices.len()
12971        );
12972        assert_eq!(
12973            result.statistics.ar_invoice_count,
12974            result.subledger.ar_invoices.len()
12975        );
12976    }
12977
12978    #[test]
12979    fn test_balance_validation() {
12980        let config = create_test_config();
12981        let phase_config = PhaseConfig {
12982            generate_master_data: false,
12983            generate_document_flows: false,
12984            generate_journal_entries: true,
12985            inject_anomalies: false,
12986            validate_balances: true,
12987            show_progress: false,
12988            ..Default::default()
12989        };
12990
12991        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12992        let result = orchestrator.generate().unwrap();
12993
12994        // Balance validation should run
12995        assert!(result.balance_validation.validated);
12996        assert!(result.balance_validation.entries_processed > 0);
12997
12998        // Generated JEs should be balanced (no unbalanced entries)
12999        assert!(!result.balance_validation.has_unbalanced_entries);
13000
13001        // Total debits should equal total credits
13002        assert_eq!(
13003            result.balance_validation.total_debits,
13004            result.balance_validation.total_credits
13005        );
13006    }
13007
13008    #[test]
13009    fn test_statistics_accuracy() {
13010        let config = create_test_config();
13011        let phase_config = PhaseConfig {
13012            generate_master_data: true,
13013            generate_document_flows: false,
13014            generate_journal_entries: true,
13015            inject_anomalies: false,
13016            show_progress: false,
13017            vendors_per_company: 10,
13018            customers_per_company: 20,
13019            materials_per_company: 15,
13020            assets_per_company: 5,
13021            employees_per_company: 8,
13022            ..Default::default()
13023        };
13024
13025        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13026        let result = orchestrator.generate().unwrap();
13027
13028        // Statistics should match actual data
13029        assert_eq!(
13030            result.statistics.vendor_count,
13031            result.master_data.vendors.len()
13032        );
13033        assert_eq!(
13034            result.statistics.customer_count,
13035            result.master_data.customers.len()
13036        );
13037        assert_eq!(
13038            result.statistics.material_count,
13039            result.master_data.materials.len()
13040        );
13041        assert_eq!(
13042            result.statistics.total_entries as usize,
13043            result.journal_entries.len()
13044        );
13045    }
13046
13047    #[test]
13048    fn test_phase_config_defaults() {
13049        let config = PhaseConfig::default();
13050        assert!(config.generate_master_data);
13051        assert!(config.generate_document_flows);
13052        assert!(config.generate_journal_entries);
13053        assert!(!config.inject_anomalies);
13054        assert!(config.validate_balances);
13055        assert!(config.show_progress);
13056        assert!(config.vendors_per_company > 0);
13057        assert!(config.customers_per_company > 0);
13058    }
13059
13060    #[test]
13061    fn test_get_coa_before_generation() {
13062        let config = create_test_config();
13063        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13064
13065        // Before generation, CoA should be None
13066        assert!(orchestrator.get_coa().is_none());
13067    }
13068
13069    #[test]
13070    fn test_get_coa_after_generation() {
13071        let config = create_test_config();
13072        let phase_config = PhaseConfig {
13073            generate_master_data: false,
13074            generate_document_flows: false,
13075            generate_journal_entries: true,
13076            inject_anomalies: false,
13077            show_progress: false,
13078            ..Default::default()
13079        };
13080
13081        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13082        let _ = orchestrator.generate().unwrap();
13083
13084        // After generation, CoA should be available
13085        assert!(orchestrator.get_coa().is_some());
13086    }
13087
13088    #[test]
13089    fn test_get_master_data() {
13090        let config = create_test_config();
13091        let phase_config = PhaseConfig {
13092            generate_master_data: true,
13093            generate_document_flows: false,
13094            generate_journal_entries: false,
13095            inject_anomalies: false,
13096            show_progress: false,
13097            vendors_per_company: 5,
13098            customers_per_company: 5,
13099            materials_per_company: 5,
13100            assets_per_company: 5,
13101            employees_per_company: 5,
13102            ..Default::default()
13103        };
13104
13105        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13106        let result = orchestrator.generate().unwrap();
13107
13108        // After generate(), master_data is moved into the result
13109        assert!(!result.master_data.vendors.is_empty());
13110    }
13111
13112    #[test]
13113    fn test_with_progress_builder() {
13114        let config = create_test_config();
13115        let orchestrator = EnhancedOrchestrator::with_defaults(config)
13116            .unwrap()
13117            .with_progress(false);
13118
13119        // Should still work without progress
13120        assert!(!orchestrator.phase_config.show_progress);
13121    }
13122
13123    #[test]
13124    fn test_multi_company_generation() {
13125        let mut config = create_test_config();
13126        config.companies.push(CompanyConfig {
13127            code: "2000".to_string(),
13128            name: "Subsidiary".to_string(),
13129            currency: "EUR".to_string(),
13130            functional_currency: None,
13131            country: "DE".to_string(),
13132            annual_transaction_volume: TransactionVolume::TenK,
13133            volume_weight: 0.5,
13134            fiscal_year_variant: "K4".to_string(),
13135        });
13136
13137        let phase_config = PhaseConfig {
13138            generate_master_data: true,
13139            generate_document_flows: false,
13140            generate_journal_entries: true,
13141            inject_anomalies: false,
13142            show_progress: false,
13143            vendors_per_company: 5,
13144            customers_per_company: 5,
13145            materials_per_company: 5,
13146            assets_per_company: 5,
13147            employees_per_company: 5,
13148            ..Default::default()
13149        };
13150
13151        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13152        let result = orchestrator.generate().unwrap();
13153
13154        // Should have master data for both companies
13155        assert!(result.statistics.vendor_count >= 10); // 5 per company
13156        assert!(result.statistics.customer_count >= 10);
13157        assert!(result.statistics.companies_count == 2);
13158    }
13159
13160    #[test]
13161    fn test_empty_master_data_skips_document_flows() {
13162        let config = create_test_config();
13163        let phase_config = PhaseConfig {
13164            generate_master_data: false,   // Skip master data
13165            generate_document_flows: true, // Try to generate flows
13166            generate_journal_entries: false,
13167            inject_anomalies: false,
13168            show_progress: false,
13169            ..Default::default()
13170        };
13171
13172        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13173        let result = orchestrator.generate().unwrap();
13174
13175        // Without master data, document flows should be empty
13176        assert!(result.document_flows.p2p_chains.is_empty());
13177        assert!(result.document_flows.o2c_chains.is_empty());
13178    }
13179
13180    #[test]
13181    fn test_journal_entry_line_item_count() {
13182        let config = create_test_config();
13183        let phase_config = PhaseConfig {
13184            generate_master_data: false,
13185            generate_document_flows: false,
13186            generate_journal_entries: true,
13187            inject_anomalies: false,
13188            show_progress: false,
13189            ..Default::default()
13190        };
13191
13192        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13193        let result = orchestrator.generate().unwrap();
13194
13195        // Total line items should match sum of all entry line counts
13196        let calculated_line_items: u64 = result
13197            .journal_entries
13198            .iter()
13199            .map(|e| e.line_count() as u64)
13200            .sum();
13201        assert_eq!(result.statistics.total_line_items, calculated_line_items);
13202    }
13203
13204    #[test]
13205    fn test_audit_generation() {
13206        let config = create_test_config();
13207        let phase_config = PhaseConfig {
13208            generate_master_data: false,
13209            generate_document_flows: false,
13210            generate_journal_entries: true,
13211            inject_anomalies: false,
13212            show_progress: false,
13213            generate_audit: true,
13214            audit_engagements: 2,
13215            workpapers_per_engagement: 5,
13216            evidence_per_workpaper: 2,
13217            risks_per_engagement: 3,
13218            findings_per_engagement: 2,
13219            judgments_per_engagement: 2,
13220            ..Default::default()
13221        };
13222
13223        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13224        let result = orchestrator.generate().unwrap();
13225
13226        // Should have generated audit data
13227        assert_eq!(result.audit.engagements.len(), 2);
13228        assert!(!result.audit.workpapers.is_empty());
13229        assert!(!result.audit.evidence.is_empty());
13230        assert!(!result.audit.risk_assessments.is_empty());
13231        assert!(!result.audit.findings.is_empty());
13232        assert!(!result.audit.judgments.is_empty());
13233
13234        // New ISA entity collections should also be populated
13235        assert!(
13236            !result.audit.confirmations.is_empty(),
13237            "ISA 505 confirmations should be generated"
13238        );
13239        assert!(
13240            !result.audit.confirmation_responses.is_empty(),
13241            "ISA 505 confirmation responses should be generated"
13242        );
13243        assert!(
13244            !result.audit.procedure_steps.is_empty(),
13245            "ISA 330 procedure steps should be generated"
13246        );
13247        // Samples may or may not be generated depending on workpaper sampling methods
13248        assert!(
13249            !result.audit.analytical_results.is_empty(),
13250            "ISA 520 analytical procedures should be generated"
13251        );
13252        assert!(
13253            !result.audit.ia_functions.is_empty(),
13254            "ISA 610 IA functions should be generated (one per engagement)"
13255        );
13256        assert!(
13257            !result.audit.related_parties.is_empty(),
13258            "ISA 550 related parties should be generated"
13259        );
13260
13261        // Statistics should match
13262        assert_eq!(
13263            result.statistics.audit_engagement_count,
13264            result.audit.engagements.len()
13265        );
13266        assert_eq!(
13267            result.statistics.audit_workpaper_count,
13268            result.audit.workpapers.len()
13269        );
13270        assert_eq!(
13271            result.statistics.audit_evidence_count,
13272            result.audit.evidence.len()
13273        );
13274        assert_eq!(
13275            result.statistics.audit_risk_count,
13276            result.audit.risk_assessments.len()
13277        );
13278        assert_eq!(
13279            result.statistics.audit_finding_count,
13280            result.audit.findings.len()
13281        );
13282        assert_eq!(
13283            result.statistics.audit_judgment_count,
13284            result.audit.judgments.len()
13285        );
13286        assert_eq!(
13287            result.statistics.audit_confirmation_count,
13288            result.audit.confirmations.len()
13289        );
13290        assert_eq!(
13291            result.statistics.audit_confirmation_response_count,
13292            result.audit.confirmation_responses.len()
13293        );
13294        assert_eq!(
13295            result.statistics.audit_procedure_step_count,
13296            result.audit.procedure_steps.len()
13297        );
13298        assert_eq!(
13299            result.statistics.audit_sample_count,
13300            result.audit.samples.len()
13301        );
13302        assert_eq!(
13303            result.statistics.audit_analytical_result_count,
13304            result.audit.analytical_results.len()
13305        );
13306        assert_eq!(
13307            result.statistics.audit_ia_function_count,
13308            result.audit.ia_functions.len()
13309        );
13310        assert_eq!(
13311            result.statistics.audit_ia_report_count,
13312            result.audit.ia_reports.len()
13313        );
13314        assert_eq!(
13315            result.statistics.audit_related_party_count,
13316            result.audit.related_parties.len()
13317        );
13318        assert_eq!(
13319            result.statistics.audit_related_party_transaction_count,
13320            result.audit.related_party_transactions.len()
13321        );
13322    }
13323
13324    #[test]
13325    fn test_new_phases_disabled_by_default() {
13326        let config = create_test_config();
13327        // Verify new config fields default to disabled
13328        assert!(!config.llm.enabled);
13329        assert!(!config.diffusion.enabled);
13330        assert!(!config.causal.enabled);
13331
13332        let phase_config = PhaseConfig {
13333            generate_master_data: false,
13334            generate_document_flows: false,
13335            generate_journal_entries: true,
13336            inject_anomalies: false,
13337            show_progress: false,
13338            ..Default::default()
13339        };
13340
13341        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13342        let result = orchestrator.generate().unwrap();
13343
13344        // All new phase statistics should be zero when disabled
13345        assert_eq!(result.statistics.llm_enrichment_ms, 0);
13346        assert_eq!(result.statistics.llm_vendors_enriched, 0);
13347        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
13348        assert_eq!(result.statistics.diffusion_samples_generated, 0);
13349        assert_eq!(result.statistics.causal_generation_ms, 0);
13350        assert_eq!(result.statistics.causal_samples_generated, 0);
13351        assert!(result.statistics.causal_validation_passed.is_none());
13352        assert_eq!(result.statistics.counterfactual_pair_count, 0);
13353        assert!(result.counterfactual_pairs.is_empty());
13354    }
13355
13356    #[test]
13357    fn test_counterfactual_generation_enabled() {
13358        let config = create_test_config();
13359        let phase_config = PhaseConfig {
13360            generate_master_data: false,
13361            generate_document_flows: false,
13362            generate_journal_entries: true,
13363            inject_anomalies: false,
13364            show_progress: false,
13365            generate_counterfactuals: true,
13366            generate_period_close: false, // Disable so entry count matches counterfactual pairs
13367            ..Default::default()
13368        };
13369
13370        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13371        let result = orchestrator.generate().unwrap();
13372
13373        // With JE generation enabled, counterfactual pairs should be generated
13374        if !result.journal_entries.is_empty() {
13375            assert_eq!(
13376                result.counterfactual_pairs.len(),
13377                result.journal_entries.len()
13378            );
13379            assert_eq!(
13380                result.statistics.counterfactual_pair_count,
13381                result.journal_entries.len()
13382            );
13383            // Each pair should have a distinct pair_id
13384            let ids: std::collections::HashSet<_> = result
13385                .counterfactual_pairs
13386                .iter()
13387                .map(|p| p.pair_id.clone())
13388                .collect();
13389            assert_eq!(ids.len(), result.counterfactual_pairs.len());
13390        }
13391    }
13392
13393    #[test]
13394    fn test_llm_enrichment_enabled() {
13395        let mut config = create_test_config();
13396        config.llm.enabled = true;
13397        config.llm.max_vendor_enrichments = 3;
13398
13399        let phase_config = PhaseConfig {
13400            generate_master_data: true,
13401            generate_document_flows: false,
13402            generate_journal_entries: false,
13403            inject_anomalies: false,
13404            show_progress: false,
13405            vendors_per_company: 5,
13406            customers_per_company: 3,
13407            materials_per_company: 3,
13408            assets_per_company: 3,
13409            employees_per_company: 3,
13410            ..Default::default()
13411        };
13412
13413        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13414        let result = orchestrator.generate().unwrap();
13415
13416        // LLM enrichment should have run
13417        assert!(result.statistics.llm_vendors_enriched > 0);
13418        assert!(result.statistics.llm_vendors_enriched <= 3);
13419    }
13420
13421    #[test]
13422    fn test_diffusion_enhancement_enabled() {
13423        let mut config = create_test_config();
13424        config.diffusion.enabled = true;
13425        config.diffusion.n_steps = 50;
13426        config.diffusion.sample_size = 20;
13427
13428        let phase_config = PhaseConfig {
13429            generate_master_data: false,
13430            generate_document_flows: false,
13431            generate_journal_entries: true,
13432            inject_anomalies: false,
13433            show_progress: false,
13434            ..Default::default()
13435        };
13436
13437        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13438        let result = orchestrator.generate().unwrap();
13439
13440        // Diffusion phase should have generated samples
13441        assert_eq!(result.statistics.diffusion_samples_generated, 20);
13442    }
13443
13444    #[test]
13445    fn test_causal_overlay_enabled() {
13446        let mut config = create_test_config();
13447        config.causal.enabled = true;
13448        config.causal.template = "fraud_detection".to_string();
13449        config.causal.sample_size = 100;
13450        config.causal.validate = true;
13451
13452        let phase_config = PhaseConfig {
13453            generate_master_data: false,
13454            generate_document_flows: false,
13455            generate_journal_entries: true,
13456            inject_anomalies: false,
13457            show_progress: false,
13458            ..Default::default()
13459        };
13460
13461        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13462        let result = orchestrator.generate().unwrap();
13463
13464        // Causal phase should have generated samples
13465        assert_eq!(result.statistics.causal_samples_generated, 100);
13466        // Validation should have run
13467        assert!(result.statistics.causal_validation_passed.is_some());
13468    }
13469
13470    #[test]
13471    fn test_causal_overlay_revenue_cycle_template() {
13472        let mut config = create_test_config();
13473        config.causal.enabled = true;
13474        config.causal.template = "revenue_cycle".to_string();
13475        config.causal.sample_size = 50;
13476        config.causal.validate = false;
13477
13478        let phase_config = PhaseConfig {
13479            generate_master_data: false,
13480            generate_document_flows: false,
13481            generate_journal_entries: true,
13482            inject_anomalies: false,
13483            show_progress: false,
13484            ..Default::default()
13485        };
13486
13487        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13488        let result = orchestrator.generate().unwrap();
13489
13490        // Causal phase should have generated samples
13491        assert_eq!(result.statistics.causal_samples_generated, 50);
13492        // Validation was disabled
13493        assert!(result.statistics.causal_validation_passed.is_none());
13494    }
13495
13496    #[test]
13497    fn test_all_new_phases_enabled_together() {
13498        let mut config = create_test_config();
13499        config.llm.enabled = true;
13500        config.llm.max_vendor_enrichments = 2;
13501        config.diffusion.enabled = true;
13502        config.diffusion.n_steps = 20;
13503        config.diffusion.sample_size = 10;
13504        config.causal.enabled = true;
13505        config.causal.sample_size = 50;
13506        config.causal.validate = true;
13507
13508        let phase_config = PhaseConfig {
13509            generate_master_data: true,
13510            generate_document_flows: false,
13511            generate_journal_entries: true,
13512            inject_anomalies: false,
13513            show_progress: false,
13514            vendors_per_company: 5,
13515            customers_per_company: 3,
13516            materials_per_company: 3,
13517            assets_per_company: 3,
13518            employees_per_company: 3,
13519            ..Default::default()
13520        };
13521
13522        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13523        let result = orchestrator.generate().unwrap();
13524
13525        // All three phases should have run
13526        assert!(result.statistics.llm_vendors_enriched > 0);
13527        assert_eq!(result.statistics.diffusion_samples_generated, 10);
13528        assert_eq!(result.statistics.causal_samples_generated, 50);
13529        assert!(result.statistics.causal_validation_passed.is_some());
13530    }
13531
13532    #[test]
13533    fn test_statistics_serialization_with_new_fields() {
13534        let stats = EnhancedGenerationStatistics {
13535            total_entries: 100,
13536            total_line_items: 500,
13537            llm_enrichment_ms: 42,
13538            llm_vendors_enriched: 10,
13539            diffusion_enhancement_ms: 100,
13540            diffusion_samples_generated: 50,
13541            causal_generation_ms: 200,
13542            causal_samples_generated: 100,
13543            causal_validation_passed: Some(true),
13544            ..Default::default()
13545        };
13546
13547        let json = serde_json::to_string(&stats).unwrap();
13548        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
13549
13550        assert_eq!(deserialized.llm_enrichment_ms, 42);
13551        assert_eq!(deserialized.llm_vendors_enriched, 10);
13552        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
13553        assert_eq!(deserialized.diffusion_samples_generated, 50);
13554        assert_eq!(deserialized.causal_generation_ms, 200);
13555        assert_eq!(deserialized.causal_samples_generated, 100);
13556        assert_eq!(deserialized.causal_validation_passed, Some(true));
13557    }
13558
13559    #[test]
13560    fn test_statistics_backward_compat_deserialization() {
13561        // Old JSON without the new fields should still deserialize
13562        let old_json = r#"{
13563            "total_entries": 100,
13564            "total_line_items": 500,
13565            "accounts_count": 50,
13566            "companies_count": 1,
13567            "period_months": 12,
13568            "vendor_count": 10,
13569            "customer_count": 20,
13570            "material_count": 15,
13571            "asset_count": 5,
13572            "employee_count": 8,
13573            "p2p_chain_count": 5,
13574            "o2c_chain_count": 5,
13575            "ap_invoice_count": 5,
13576            "ar_invoice_count": 5,
13577            "ocpm_event_count": 0,
13578            "ocpm_object_count": 0,
13579            "ocpm_case_count": 0,
13580            "audit_engagement_count": 0,
13581            "audit_workpaper_count": 0,
13582            "audit_evidence_count": 0,
13583            "audit_risk_count": 0,
13584            "audit_finding_count": 0,
13585            "audit_judgment_count": 0,
13586            "anomalies_injected": 0,
13587            "data_quality_issues": 0,
13588            "banking_customer_count": 0,
13589            "banking_account_count": 0,
13590            "banking_transaction_count": 0,
13591            "banking_suspicious_count": 0,
13592            "graph_export_count": 0,
13593            "graph_node_count": 0,
13594            "graph_edge_count": 0
13595        }"#;
13596
13597        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
13598
13599        // New fields should default to 0 / None
13600        assert_eq!(stats.llm_enrichment_ms, 0);
13601        assert_eq!(stats.llm_vendors_enriched, 0);
13602        assert_eq!(stats.diffusion_enhancement_ms, 0);
13603        assert_eq!(stats.diffusion_samples_generated, 0);
13604        assert_eq!(stats.causal_generation_ms, 0);
13605        assert_eq!(stats.causal_samples_generated, 0);
13606        assert!(stats.causal_validation_passed.is_none());
13607    }
13608}