Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    MaterialGenerator,
117    O2CDocumentChain,
118    O2CGenerator,
119    O2CGeneratorConfig,
120    O2CPaymentBehavior,
121    P2PDocumentChain,
122    // Document flow generators
123    P2PGenerator,
124    P2PGeneratorConfig,
125    P2PPaymentBehavior,
126    PaymentReference,
127    // Provisions and contingencies generator (IAS 37 / ASC 450)
128    ProvisionGenerator,
129    QualificationGenerator,
130    RfxGenerator,
131    RiskAssessmentGenerator,
132    // Balance validation
133    RunningBalanceTracker,
134    ScorecardGenerator,
135    // Segment reporting generator (IFRS 8 / ASC 280)
136    SegmentGenerator,
137    SegmentSeed,
138    SourcingProjectGenerator,
139    SpendAnalysisGenerator,
140    ValidationError,
141    // Master data generators
142    VendorGenerator,
143    WorkpaperGenerator,
144};
145use datasynth_graph::{
146    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148    TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153    OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177// ============================================================================
178// Configuration Conversion Functions
179// ============================================================================
180
181/// Convert P2P flow config from schema to generator config.
182fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183    let payment_behavior = &schema_config.payment_behavior;
184    let late_dist = &payment_behavior.late_payment_days_distribution;
185
186    P2PGeneratorConfig {
187        three_way_match_rate: schema_config.three_way_match_rate,
188        partial_delivery_rate: schema_config.partial_delivery_rate,
189        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190        price_variance_rate: schema_config.price_variance_rate,
191        max_price_variance_percent: schema_config.max_price_variance_percent,
192        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195        payment_method_distribution: vec![
196            (PaymentMethod::BankTransfer, 0.60),
197            (PaymentMethod::Check, 0.25),
198            (PaymentMethod::Wire, 0.10),
199            (PaymentMethod::CreditCard, 0.05),
200        ],
201        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202        payment_behavior: P2PPaymentBehavior {
203            late_payment_rate: payment_behavior.late_payment_rate,
204            late_payment_distribution: LatePaymentDistribution {
205                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206                late_8_to_14: late_dist.late_8_to_14,
207                very_late_15_to_30: late_dist.very_late_15_to_30,
208                severely_late_31_to_60: late_dist.severely_late_31_to_60,
209                extremely_late_over_60: late_dist.extremely_late_over_60,
210            },
211            partial_payment_rate: payment_behavior.partial_payment_rate,
212            payment_correction_rate: payment_behavior.payment_correction_rate,
213            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214        },
215    }
216}
217
218/// Convert O2C flow config from schema to generator config.
219fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220    let payment_behavior = &schema_config.payment_behavior;
221
222    O2CGeneratorConfig {
223        credit_check_failure_rate: schema_config.credit_check_failure_rate,
224        partial_shipment_rate: schema_config.partial_shipment_rate,
225        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229        bad_debt_rate: schema_config.bad_debt_rate,
230        returns_rate: schema_config.return_rate,
231        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232        payment_method_distribution: vec![
233            (PaymentMethod::BankTransfer, 0.50),
234            (PaymentMethod::Check, 0.30),
235            (PaymentMethod::Wire, 0.15),
236            (PaymentMethod::CreditCard, 0.05),
237        ],
238        payment_behavior: O2CPaymentBehavior {
239            partial_payment_rate: payment_behavior.partial_payments.rate,
240            short_payment_rate: payment_behavior.short_payments.rate,
241            max_short_percent: payment_behavior.short_payments.max_short_percent,
242            on_account_rate: payment_behavior.on_account_payments.rate,
243            payment_correction_rate: payment_behavior.payment_corrections.rate,
244            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245        },
246    }
247}
248
249/// Configuration for which generation phases to run.
250#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252    /// Generate master data (vendors, customers, materials, assets, employees).
253    pub generate_master_data: bool,
254    /// Generate document flows (P2P, O2C).
255    pub generate_document_flows: bool,
256    /// Generate OCPM events from document flows.
257    pub generate_ocpm_events: bool,
258    /// Generate journal entries.
259    pub generate_journal_entries: bool,
260    /// Inject anomalies.
261    pub inject_anomalies: bool,
262    /// Inject data quality variations (typos, missing values, format variations).
263    pub inject_data_quality: bool,
264    /// Validate balance sheet equation after generation.
265    pub validate_balances: bool,
266    /// Show progress bars.
267    pub show_progress: bool,
268    /// Number of vendors to generate per company.
269    pub vendors_per_company: usize,
270    /// Number of customers to generate per company.
271    pub customers_per_company: usize,
272    /// Number of materials to generate per company.
273    pub materials_per_company: usize,
274    /// Number of assets to generate per company.
275    pub assets_per_company: usize,
276    /// Number of employees to generate per company.
277    pub employees_per_company: usize,
278    /// Number of P2P chains to generate.
279    pub p2p_chains: usize,
280    /// Number of O2C chains to generate.
281    pub o2c_chains: usize,
282    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
283    pub generate_audit: bool,
284    /// Number of audit engagements to generate.
285    pub audit_engagements: usize,
286    /// Number of workpapers per engagement.
287    pub workpapers_per_engagement: usize,
288    /// Number of evidence items per workpaper.
289    pub evidence_per_workpaper: usize,
290    /// Number of risk assessments per engagement.
291    pub risks_per_engagement: usize,
292    /// Number of findings per engagement.
293    pub findings_per_engagement: usize,
294    /// Number of professional judgments per engagement.
295    pub judgments_per_engagement: usize,
296    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
297    pub generate_banking: bool,
298    /// Generate graph exports (accounting network for ML training).
299    pub generate_graph_export: bool,
300    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
301    pub generate_sourcing: bool,
302    /// Generate bank reconciliations from payments.
303    pub generate_bank_reconciliation: bool,
304    /// Generate financial statements from trial balances.
305    pub generate_financial_statements: bool,
306    /// Generate accounting standards data (revenue recognition, impairment).
307    pub generate_accounting_standards: bool,
308    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
309    pub generate_manufacturing: bool,
310    /// Generate sales quotes, management KPIs, and budgets.
311    pub generate_sales_kpi_budgets: bool,
312    /// Generate tax jurisdictions and tax codes.
313    pub generate_tax: bool,
314    /// Generate ESG data (emissions, energy, water, waste, social, governance).
315    pub generate_esg: bool,
316    /// Generate intercompany transactions and eliminations.
317    pub generate_intercompany: bool,
318    /// Generate process evolution and organizational events.
319    pub generate_evolution_events: bool,
320    /// Generate counterfactual (original, mutated) JE pairs for ML training.
321    pub generate_counterfactuals: bool,
322    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
323    pub generate_compliance_regulations: bool,
324    /// Generate period-close journal entries (tax provision, income statement close).
325    pub generate_period_close: bool,
326    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
327    pub generate_hr: bool,
328    /// Generate treasury data (cash management, hedging, debt, pooling).
329    pub generate_treasury: bool,
330    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
331    pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335    fn default() -> Self {
336        Self {
337            generate_master_data: true,
338            generate_document_flows: true,
339            generate_ocpm_events: false, // Off by default
340            generate_journal_entries: true,
341            inject_anomalies: false,
342            inject_data_quality: false, // Off by default (to preserve clean test data)
343            validate_balances: true,
344            show_progress: true,
345            vendors_per_company: 50,
346            customers_per_company: 100,
347            materials_per_company: 200,
348            assets_per_company: 50,
349            employees_per_company: 100,
350            p2p_chains: 100,
351            o2c_chains: 100,
352            generate_audit: false, // Off by default
353            audit_engagements: 5,
354            workpapers_per_engagement: 20,
355            evidence_per_workpaper: 5,
356            risks_per_engagement: 15,
357            findings_per_engagement: 8,
358            judgments_per_engagement: 10,
359            generate_banking: false,                // Off by default
360            generate_graph_export: false,           // Off by default
361            generate_sourcing: false,               // Off by default
362            generate_bank_reconciliation: false,    // Off by default
363            generate_financial_statements: false,   // Off by default
364            generate_accounting_standards: false,   // Off by default
365            generate_manufacturing: false,          // Off by default
366            generate_sales_kpi_budgets: false,      // Off by default
367            generate_tax: false,                    // Off by default
368            generate_esg: false,                    // Off by default
369            generate_intercompany: false,           // Off by default
370            generate_evolution_events: true,        // On by default
371            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
372            generate_compliance_regulations: false, // Off by default
373            generate_period_close: true,            // On by default
374            generate_hr: false,                     // Off by default
375            generate_treasury: false,               // Off by default
376            generate_project_accounting: false,     // Off by default
377        }
378    }
379}
380
381impl PhaseConfig {
382    /// Derive phase flags from [`GeneratorConfig`].
383    ///
384    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
385    /// CLI flags can override individual fields after calling this method.
386    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387        Self {
388            // Always-on phases
389            generate_master_data: true,
390            generate_document_flows: true,
391            generate_journal_entries: true,
392            validate_balances: true,
393            generate_period_close: true,
394            generate_evolution_events: true,
395            show_progress: true,
396
397            // Feature-gated phases — derived from config sections
398            generate_audit: cfg.audit.enabled,
399            generate_banking: cfg.banking.enabled,
400            generate_graph_export: cfg.graph_export.enabled,
401            generate_sourcing: cfg.source_to_pay.enabled,
402            generate_intercompany: cfg.intercompany.enabled,
403            generate_financial_statements: cfg.financial_reporting.enabled,
404            generate_bank_reconciliation: cfg.financial_reporting.enabled,
405            generate_accounting_standards: cfg.accounting_standards.enabled,
406            generate_manufacturing: cfg.manufacturing.enabled,
407            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408            generate_tax: cfg.tax.enabled,
409            generate_esg: cfg.esg.enabled,
410            generate_ocpm_events: cfg.ocpm.enabled,
411            generate_compliance_regulations: cfg.compliance_regulations.enabled,
412            generate_hr: cfg.hr.enabled,
413            generate_treasury: cfg.treasury.enabled,
414            generate_project_accounting: cfg.project_accounting.enabled,
415
416            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
417            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
418
419            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420            inject_data_quality: cfg.data_quality.enabled,
421
422            // Count defaults (CLI can override after calling this method)
423            vendors_per_company: 50,
424            customers_per_company: 100,
425            materials_per_company: 200,
426            assets_per_company: 50,
427            employees_per_company: 100,
428            p2p_chains: 100,
429            o2c_chains: 100,
430            audit_engagements: 5,
431            workpapers_per_engagement: 20,
432            evidence_per_workpaper: 5,
433            risks_per_engagement: 15,
434            findings_per_engagement: 8,
435            judgments_per_engagement: 10,
436        }
437    }
438}
439
440/// Master data snapshot containing all generated entities.
441#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443    /// Generated vendors.
444    pub vendors: Vec<Vendor>,
445    /// Generated customers.
446    pub customers: Vec<Customer>,
447    /// Generated materials.
448    pub materials: Vec<Material>,
449    /// Generated fixed assets.
450    pub assets: Vec<FixedAsset>,
451    /// Generated employees.
452    pub employees: Vec<Employee>,
453    /// Generated cost center hierarchy (two-level: departments + sub-departments).
454    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
455    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
456    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
457}
458
459/// Info about a completed hypergraph export.
460#[derive(Debug, Clone)]
461pub struct HypergraphExportInfo {
462    /// Number of nodes exported.
463    pub node_count: usize,
464    /// Number of pairwise edges exported.
465    pub edge_count: usize,
466    /// Number of hyperedges exported.
467    pub hyperedge_count: usize,
468    /// Output directory path.
469    pub output_path: PathBuf,
470}
471
472/// Document flow snapshot containing all generated document chains.
473#[derive(Debug, Clone, Default)]
474pub struct DocumentFlowSnapshot {
475    /// P2P document chains.
476    pub p2p_chains: Vec<P2PDocumentChain>,
477    /// O2C document chains.
478    pub o2c_chains: Vec<O2CDocumentChain>,
479    /// All purchase orders (flattened).
480    pub purchase_orders: Vec<documents::PurchaseOrder>,
481    /// All goods receipts (flattened).
482    pub goods_receipts: Vec<documents::GoodsReceipt>,
483    /// All vendor invoices (flattened).
484    pub vendor_invoices: Vec<documents::VendorInvoice>,
485    /// All sales orders (flattened).
486    pub sales_orders: Vec<documents::SalesOrder>,
487    /// All deliveries (flattened).
488    pub deliveries: Vec<documents::Delivery>,
489    /// All customer invoices (flattened).
490    pub customer_invoices: Vec<documents::CustomerInvoice>,
491    /// All payments (flattened).
492    pub payments: Vec<documents::Payment>,
493    /// Cross-document references collected from all document headers
494    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
495    pub document_references: Vec<documents::DocumentReference>,
496}
497
498/// Subledger snapshot containing generated subledger records.
499#[derive(Debug, Clone, Default)]
500pub struct SubledgerSnapshot {
501    /// AP invoices linked from document flow vendor invoices.
502    pub ap_invoices: Vec<APInvoice>,
503    /// AR invoices linked from document flow customer invoices.
504    pub ar_invoices: Vec<ARInvoice>,
505    /// FA subledger records (asset acquisitions from FA generator).
506    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
507    /// Inventory positions from inventory generator.
508    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
509    /// Inventory movements from inventory generator.
510    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
511    /// AR aging reports, one per company, computed after payment settlement.
512    pub ar_aging_reports: Vec<ARAgingReport>,
513    /// AP aging reports, one per company, computed after payment settlement.
514    pub ap_aging_reports: Vec<APAgingReport>,
515    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
516    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
517    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
518    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
519    /// Dunning runs executed after AR aging (one per company per dunning cycle).
520    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
521    /// Dunning letters generated across all dunning runs.
522    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
523}
524
525/// OCPM snapshot containing generated OCPM event log data.
526#[derive(Debug, Clone, Default)]
527pub struct OcpmSnapshot {
528    /// OCPM event log (if generated)
529    pub event_log: Option<OcpmEventLog>,
530    /// Number of events generated
531    pub event_count: usize,
532    /// Number of objects generated
533    pub object_count: usize,
534    /// Number of cases generated
535    pub case_count: usize,
536}
537
538/// Audit data snapshot containing all generated audit-related entities.
539#[derive(Debug, Clone, Default)]
540pub struct AuditSnapshot {
541    /// Audit engagements per ISA 210/220.
542    pub engagements: Vec<AuditEngagement>,
543    /// Workpapers per ISA 230.
544    pub workpapers: Vec<Workpaper>,
545    /// Audit evidence per ISA 500.
546    pub evidence: Vec<AuditEvidence>,
547    /// Risk assessments per ISA 315/330.
548    pub risk_assessments: Vec<RiskAssessment>,
549    /// Audit findings per ISA 265.
550    pub findings: Vec<AuditFinding>,
551    /// Professional judgments per ISA 200.
552    pub judgments: Vec<ProfessionalJudgment>,
553    /// External confirmations per ISA 505.
554    pub confirmations: Vec<ExternalConfirmation>,
555    /// Confirmation responses per ISA 505.
556    pub confirmation_responses: Vec<ConfirmationResponse>,
557    /// Audit procedure steps per ISA 330/530.
558    pub procedure_steps: Vec<AuditProcedureStep>,
559    /// Audit samples per ISA 530.
560    pub samples: Vec<AuditSample>,
561    /// Analytical procedure results per ISA 520.
562    pub analytical_results: Vec<AnalyticalProcedureResult>,
563    /// Internal audit functions per ISA 610.
564    pub ia_functions: Vec<InternalAuditFunction>,
565    /// Internal audit reports per ISA 610.
566    pub ia_reports: Vec<InternalAuditReport>,
567    /// Related parties per ISA 550.
568    pub related_parties: Vec<RelatedParty>,
569    /// Related party transactions per ISA 550.
570    pub related_party_transactions: Vec<RelatedPartyTransaction>,
571    // ---- ISA 600: Group Audits ----
572    /// Component auditors assigned by jurisdiction (ISA 600).
573    pub component_auditors: Vec<ComponentAuditor>,
574    /// Group audit plan with materiality allocations (ISA 600).
575    pub group_audit_plan: Option<GroupAuditPlan>,
576    /// Component instructions issued to component auditors (ISA 600).
577    pub component_instructions: Vec<ComponentInstruction>,
578    /// Reports received from component auditors (ISA 600).
579    pub component_reports: Vec<ComponentAuditorReport>,
580    // ---- ISA 210: Engagement Letters ----
581    /// Engagement letters per ISA 210.
582    pub engagement_letters: Vec<EngagementLetter>,
583    // ---- ISA 560 / IAS 10: Subsequent Events ----
584    /// Subsequent events per ISA 560 / IAS 10.
585    pub subsequent_events: Vec<SubsequentEvent>,
586    // ---- ISA 402: Service Organization Controls ----
587    /// Service organizations identified per ISA 402.
588    pub service_organizations: Vec<ServiceOrganization>,
589    /// SOC reports obtained per ISA 402.
590    pub soc_reports: Vec<SocReport>,
591    /// User entity controls documented per ISA 402.
592    pub user_entity_controls: Vec<UserEntityControl>,
593    // ---- ISA 570: Going Concern ----
594    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
595    pub going_concern_assessments:
596        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
597    // ---- ISA 540: Accounting Estimates ----
598    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
599    pub accounting_estimates:
600        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
601    // ---- ISA 700/701/705/706: Audit Opinions ----
602    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
603    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
604    /// Key Audit Matters per ISA 701 (flattened across all opinions).
605    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
606    // ---- SOX 302 / 404 ----
607    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
608    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
609    /// SOX Section 404 ICFR assessments (one per entity per year).
610    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
611    // ---- ISA 320: Materiality ----
612    /// Materiality calculations per entity per period (ISA 320).
613    pub materiality_calculations:
614        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
615    // ---- ISA 315: Combined Risk Assessments ----
616    /// Combined Risk Assessments per account area / assertion (ISA 315).
617    pub combined_risk_assessments:
618        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
619    // ---- ISA 530: Sampling Plans ----
620    /// Sampling plans per CRA at Moderate or higher (ISA 530).
621    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
622    /// Individual sampled items (key items + representative items) per ISA 530.
623    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
624    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
625    /// Significant classes of transactions per ISA 315 (one set per entity).
626    pub significant_transaction_classes:
627        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
628    // ---- ISA 520: Unusual Item Markers ----
629    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
630    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
631    // ---- ISA 520: Analytical Relationships ----
632    /// Analytical relationships (ratios, trends, correlations) per entity.
633    pub analytical_relationships:
634        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
635    // ---- PCAOB-ISA Cross-Reference ----
636    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
637    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
638    // ---- ISA Standard Reference ----
639    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
640    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
641    // ---- ISA 220 / ISA 300: Audit Scopes ----
642    /// Audit scope records (one per engagement) describing the audit boundary.
643    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
644}
645
646/// Banking KYC/AML data snapshot containing all generated banking entities.
647#[derive(Debug, Clone, Default)]
648pub struct BankingSnapshot {
649    /// Banking customers (retail, business, trust).
650    pub customers: Vec<BankingCustomer>,
651    /// Bank accounts.
652    pub accounts: Vec<BankAccount>,
653    /// Bank transactions with AML labels.
654    pub transactions: Vec<BankTransaction>,
655    /// Transaction-level AML labels with features.
656    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
657    /// Customer-level AML labels.
658    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
659    /// Account-level AML labels.
660    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
661    /// Relationship-level AML labels.
662    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
663    /// Case narratives for AML scenarios.
664    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
665    /// Number of suspicious transactions.
666    pub suspicious_count: usize,
667    /// Number of AML scenarios generated.
668    pub scenario_count: usize,
669}
670
671/// Graph export snapshot containing exported graph metadata.
672#[derive(Debug, Clone, Default, Serialize)]
673pub struct GraphExportSnapshot {
674    /// Whether graph export was performed.
675    pub exported: bool,
676    /// Number of graphs exported.
677    pub graph_count: usize,
678    /// Exported graph metadata (by format name).
679    pub exports: HashMap<String, GraphExportInfo>,
680}
681
682/// Information about an exported graph.
683#[derive(Debug, Clone, Serialize)]
684pub struct GraphExportInfo {
685    /// Graph name.
686    pub name: String,
687    /// Export format (pytorch_geometric, neo4j, dgl).
688    pub format: String,
689    /// Output directory path.
690    pub output_path: PathBuf,
691    /// Number of nodes.
692    pub node_count: usize,
693    /// Number of edges.
694    pub edge_count: usize,
695}
696
697/// S2C sourcing data snapshot.
698#[derive(Debug, Clone, Default)]
699pub struct SourcingSnapshot {
700    /// Spend analyses.
701    pub spend_analyses: Vec<SpendAnalysis>,
702    /// Sourcing projects.
703    pub sourcing_projects: Vec<SourcingProject>,
704    /// Supplier qualifications.
705    pub qualifications: Vec<SupplierQualification>,
706    /// RFx events (RFI, RFP, RFQ).
707    pub rfx_events: Vec<RfxEvent>,
708    /// Supplier bids.
709    pub bids: Vec<SupplierBid>,
710    /// Bid evaluations.
711    pub bid_evaluations: Vec<BidEvaluation>,
712    /// Procurement contracts.
713    pub contracts: Vec<ProcurementContract>,
714    /// Catalog items.
715    pub catalog_items: Vec<CatalogItem>,
716    /// Supplier scorecards.
717    pub scorecards: Vec<SupplierScorecard>,
718}
719
720/// A single period's trial balance with metadata.
721#[derive(Debug, Clone, Serialize, Deserialize)]
722pub struct PeriodTrialBalance {
723    /// Fiscal year.
724    pub fiscal_year: u16,
725    /// Fiscal period (1-12).
726    pub fiscal_period: u8,
727    /// Period start date.
728    pub period_start: NaiveDate,
729    /// Period end date.
730    pub period_end: NaiveDate,
731    /// Trial balance entries for this period.
732    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
733}
734
735/// Financial reporting snapshot (financial statements + bank reconciliations).
736#[derive(Debug, Clone, Default)]
737pub struct FinancialReportingSnapshot {
738    /// Financial statements (balance sheet, income statement, cash flow).
739    /// For multi-entity configs this includes all standalone statements.
740    pub financial_statements: Vec<FinancialStatement>,
741    /// Standalone financial statements keyed by entity code.
742    /// Each entity has its own slice of statements.
743    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
744    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
745    pub consolidated_statements: Vec<FinancialStatement>,
746    /// Consolidation schedules (one per period) showing pre/post elimination detail.
747    pub consolidation_schedules: Vec<ConsolidationSchedule>,
748    /// Bank reconciliations.
749    pub bank_reconciliations: Vec<BankReconciliation>,
750    /// Period-close trial balances (one per period).
751    pub trial_balances: Vec<PeriodTrialBalance>,
752    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
753    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
754    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
755    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
756    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
757    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
758}
759
760/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
761#[derive(Debug, Clone, Default)]
762pub struct HrSnapshot {
763    /// Payroll runs (actual data).
764    pub payroll_runs: Vec<PayrollRun>,
765    /// Payroll line items (actual data).
766    pub payroll_line_items: Vec<PayrollLineItem>,
767    /// Time entries (actual data).
768    pub time_entries: Vec<TimeEntry>,
769    /// Expense reports (actual data).
770    pub expense_reports: Vec<ExpenseReport>,
771    /// Benefit enrollments (actual data).
772    pub benefit_enrollments: Vec<BenefitEnrollment>,
773    /// Defined benefit pension plans (IAS 19 / ASC 715).
774    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
775    /// Pension obligation (DBO) roll-forwards.
776    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
777    /// Plan asset roll-forwards.
778    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
779    /// Pension disclosures.
780    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
781    /// Journal entries generated from pension expense and OCI remeasurements.
782    pub pension_journal_entries: Vec<JournalEntry>,
783    /// Stock grants (ASC 718 / IFRS 2).
784    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
785    /// Stock-based compensation period expense records.
786    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
787    /// Journal entries generated from stock-based compensation expense.
788    pub stock_comp_journal_entries: Vec<JournalEntry>,
789    /// Payroll runs.
790    pub payroll_run_count: usize,
791    /// Payroll line item count.
792    pub payroll_line_item_count: usize,
793    /// Time entry count.
794    pub time_entry_count: usize,
795    /// Expense report count.
796    pub expense_report_count: usize,
797    /// Benefit enrollment count.
798    pub benefit_enrollment_count: usize,
799    /// Pension plan count.
800    pub pension_plan_count: usize,
801    /// Stock grant count.
802    pub stock_grant_count: usize,
803}
804
805/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
806#[derive(Debug, Clone, Default)]
807pub struct AccountingStandardsSnapshot {
808    /// Revenue recognition contracts (actual data).
809    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
810    /// Impairment tests (actual data).
811    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
812    /// Business combinations (IFRS 3 / ASC 805).
813    pub business_combinations:
814        Vec<datasynth_core::models::business_combination::BusinessCombination>,
815    /// Journal entries generated from business combinations (Day 1 + amortization).
816    pub business_combination_journal_entries: Vec<JournalEntry>,
817    /// ECL models (IFRS 9 / ASC 326).
818    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
819    /// ECL provision movements.
820    pub ecl_provision_movements:
821        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
822    /// Journal entries from ECL provision.
823    pub ecl_journal_entries: Vec<JournalEntry>,
824    /// Provisions (IAS 37 / ASC 450).
825    pub provisions: Vec<datasynth_core::models::provision::Provision>,
826    /// Provision movement roll-forwards (IAS 37 / ASC 450).
827    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
828    /// Contingent liabilities (IAS 37 / ASC 450).
829    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
830    /// Journal entries from provisions.
831    pub provision_journal_entries: Vec<JournalEntry>,
832    /// IAS 21 functional currency translation results (one per entity per period).
833    pub currency_translation_results:
834        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
835    /// Revenue recognition contract count.
836    pub revenue_contract_count: usize,
837    /// Impairment test count.
838    pub impairment_test_count: usize,
839    /// Business combination count.
840    pub business_combination_count: usize,
841    /// ECL model count.
842    pub ecl_model_count: usize,
843    /// Provision count.
844    pub provision_count: usize,
845    /// Currency translation result count (IAS 21).
846    pub currency_translation_count: usize,
847}
848
849/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
850#[derive(Debug, Clone, Default)]
851pub struct ComplianceRegulationsSnapshot {
852    /// Flattened standard records for output.
853    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
854    /// Cross-reference records.
855    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
856    /// Jurisdiction profile records.
857    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
858    /// Generated audit procedures.
859    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
860    /// Generated compliance findings.
861    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
862    /// Generated regulatory filings.
863    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
864    /// Compliance graph (if graph integration enabled).
865    pub compliance_graph: Option<datasynth_graph::Graph>,
866}
867
868/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
869#[derive(Debug, Clone, Default)]
870pub struct ManufacturingSnapshot {
871    /// Production orders (actual data).
872    pub production_orders: Vec<ProductionOrder>,
873    /// Quality inspections (actual data).
874    pub quality_inspections: Vec<QualityInspection>,
875    /// Cycle counts (actual data).
876    pub cycle_counts: Vec<CycleCount>,
877    /// BOM components (actual data).
878    pub bom_components: Vec<BomComponent>,
879    /// Inventory movements (actual data).
880    pub inventory_movements: Vec<InventoryMovement>,
881    /// Production order count.
882    pub production_order_count: usize,
883    /// Quality inspection count.
884    pub quality_inspection_count: usize,
885    /// Cycle count count.
886    pub cycle_count_count: usize,
887    /// BOM component count.
888    pub bom_component_count: usize,
889    /// Inventory movement count.
890    pub inventory_movement_count: usize,
891}
892
893/// Sales, KPI, and budget data snapshot.
894#[derive(Debug, Clone, Default)]
895pub struct SalesKpiBudgetsSnapshot {
896    /// Sales quotes (actual data).
897    pub sales_quotes: Vec<SalesQuote>,
898    /// Management KPIs (actual data).
899    pub kpis: Vec<ManagementKpi>,
900    /// Budgets (actual data).
901    pub budgets: Vec<Budget>,
902    /// Sales quote count.
903    pub sales_quote_count: usize,
904    /// Management KPI count.
905    pub kpi_count: usize,
906    /// Budget line count.
907    pub budget_line_count: usize,
908}
909
910/// Anomaly labels generated during injection.
911#[derive(Debug, Clone, Default)]
912pub struct AnomalyLabels {
913    /// All anomaly labels.
914    pub labels: Vec<LabeledAnomaly>,
915    /// Summary statistics.
916    pub summary: Option<AnomalySummary>,
917    /// Count by anomaly type.
918    pub by_type: HashMap<String, usize>,
919}
920
921/// Balance validation results from running balance tracker.
922#[derive(Debug, Clone, Default)]
923pub struct BalanceValidationResult {
924    /// Whether validation was performed.
925    pub validated: bool,
926    /// Whether balance sheet equation is satisfied.
927    pub is_balanced: bool,
928    /// Number of entries processed.
929    pub entries_processed: u64,
930    /// Total debits across all entries.
931    pub total_debits: rust_decimal::Decimal,
932    /// Total credits across all entries.
933    pub total_credits: rust_decimal::Decimal,
934    /// Number of accounts tracked.
935    pub accounts_tracked: usize,
936    /// Number of companies tracked.
937    pub companies_tracked: usize,
938    /// Validation errors encountered.
939    pub validation_errors: Vec<ValidationError>,
940    /// Whether any unbalanced entries were found.
941    pub has_unbalanced_entries: bool,
942}
943
944/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
945#[derive(Debug, Clone, Default)]
946pub struct TaxSnapshot {
947    /// Tax jurisdictions.
948    pub jurisdictions: Vec<TaxJurisdiction>,
949    /// Tax codes.
950    pub codes: Vec<TaxCode>,
951    /// Tax lines computed on documents.
952    pub tax_lines: Vec<TaxLine>,
953    /// Tax returns filed per period.
954    pub tax_returns: Vec<TaxReturn>,
955    /// Tax provisions.
956    pub tax_provisions: Vec<TaxProvision>,
957    /// Withholding tax records.
958    pub withholding_records: Vec<WithholdingTaxRecord>,
959    /// Tax anomaly labels.
960    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
961    /// Jurisdiction count.
962    pub jurisdiction_count: usize,
963    /// Code count.
964    pub code_count: usize,
965    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
966    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
967}
968
969/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
970#[derive(Debug, Clone, Default, Serialize, Deserialize)]
971pub struct IntercompanySnapshot {
972    /// Group ownership structure (parent/subsidiary/associate relationships).
973    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
974    /// IC matched pairs (transaction pairs between related entities).
975    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
976    /// IC journal entries generated from matched pairs (seller side).
977    pub seller_journal_entries: Vec<JournalEntry>,
978    /// IC journal entries generated from matched pairs (buyer side).
979    pub buyer_journal_entries: Vec<JournalEntry>,
980    /// Elimination entries for consolidation.
981    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
982    /// NCI measurements derived from group structure ownership percentages.
983    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
984    /// IC matched pair count.
985    pub matched_pair_count: usize,
986    /// IC elimination entry count.
987    pub elimination_entry_count: usize,
988    /// IC matching rate (0.0 to 1.0).
989    pub match_rate: f64,
990}
991
992/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
993#[derive(Debug, Clone, Default)]
994pub struct EsgSnapshot {
995    /// Emission records (scope 1, 2, 3).
996    pub emissions: Vec<EmissionRecord>,
997    /// Energy consumption records.
998    pub energy: Vec<EnergyConsumption>,
999    /// Water usage records.
1000    pub water: Vec<WaterUsage>,
1001    /// Waste records.
1002    pub waste: Vec<WasteRecord>,
1003    /// Workforce diversity metrics.
1004    pub diversity: Vec<WorkforceDiversityMetric>,
1005    /// Pay equity metrics.
1006    pub pay_equity: Vec<PayEquityMetric>,
1007    /// Safety incidents.
1008    pub safety_incidents: Vec<SafetyIncident>,
1009    /// Safety metrics.
1010    pub safety_metrics: Vec<SafetyMetric>,
1011    /// Governance metrics.
1012    pub governance: Vec<GovernanceMetric>,
1013    /// Supplier ESG assessments.
1014    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1015    /// Materiality assessments.
1016    pub materiality: Vec<MaterialityAssessment>,
1017    /// ESG disclosures.
1018    pub disclosures: Vec<EsgDisclosure>,
1019    /// Climate scenarios.
1020    pub climate_scenarios: Vec<ClimateScenario>,
1021    /// ESG anomaly labels.
1022    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1023    /// Total emission record count.
1024    pub emission_count: usize,
1025    /// Total disclosure count.
1026    pub disclosure_count: usize,
1027}
1028
1029/// Treasury data snapshot (cash management, hedging, debt, pooling).
1030#[derive(Debug, Clone, Default)]
1031pub struct TreasurySnapshot {
1032    /// Cash positions (daily balances per account).
1033    pub cash_positions: Vec<CashPosition>,
1034    /// Cash forecasts.
1035    pub cash_forecasts: Vec<CashForecast>,
1036    /// Cash pools.
1037    pub cash_pools: Vec<CashPool>,
1038    /// Cash pool sweep transactions.
1039    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1040    /// Hedging instruments.
1041    pub hedging_instruments: Vec<HedgingInstrument>,
1042    /// Hedge relationships (ASC 815/IFRS 9 designations).
1043    pub hedge_relationships: Vec<HedgeRelationship>,
1044    /// Debt instruments.
1045    pub debt_instruments: Vec<DebtInstrument>,
1046    /// Bank guarantees and letters of credit.
1047    pub bank_guarantees: Vec<BankGuarantee>,
1048    /// Intercompany netting runs.
1049    pub netting_runs: Vec<NettingRun>,
1050    /// Treasury anomaly labels.
1051    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1052}
1053
1054/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1055#[derive(Debug, Clone, Default)]
1056pub struct ProjectAccountingSnapshot {
1057    /// Projects with WBS hierarchies.
1058    pub projects: Vec<Project>,
1059    /// Project cost lines (linked from source documents).
1060    pub cost_lines: Vec<ProjectCostLine>,
1061    /// Revenue recognition records.
1062    pub revenue_records: Vec<ProjectRevenue>,
1063    /// Earned value metrics.
1064    pub earned_value_metrics: Vec<EarnedValueMetric>,
1065    /// Change orders.
1066    pub change_orders: Vec<ChangeOrder>,
1067    /// Project milestones.
1068    pub milestones: Vec<ProjectMilestone>,
1069}
1070
1071/// Complete result of enhanced generation run.
1072#[derive(Debug, Default)]
1073pub struct EnhancedGenerationResult {
1074    /// Generated chart of accounts.
1075    pub chart_of_accounts: ChartOfAccounts,
1076    /// Master data snapshot.
1077    pub master_data: MasterDataSnapshot,
1078    /// Document flow snapshot.
1079    pub document_flows: DocumentFlowSnapshot,
1080    /// Subledger snapshot (linked from document flows).
1081    pub subledger: SubledgerSnapshot,
1082    /// OCPM event log snapshot (if OCPM generation enabled).
1083    pub ocpm: OcpmSnapshot,
1084    /// Audit data snapshot (if audit generation enabled).
1085    pub audit: AuditSnapshot,
1086    /// Banking KYC/AML data snapshot (if banking generation enabled).
1087    pub banking: BankingSnapshot,
1088    /// Graph export snapshot (if graph export enabled).
1089    pub graph_export: GraphExportSnapshot,
1090    /// S2C sourcing data snapshot (if sourcing generation enabled).
1091    pub sourcing: SourcingSnapshot,
1092    /// Financial reporting snapshot (financial statements + bank reconciliations).
1093    pub financial_reporting: FinancialReportingSnapshot,
1094    /// HR data snapshot (payroll, time entries, expenses).
1095    pub hr: HrSnapshot,
1096    /// Accounting standards snapshot (revenue recognition, impairment).
1097    pub accounting_standards: AccountingStandardsSnapshot,
1098    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1099    pub manufacturing: ManufacturingSnapshot,
1100    /// Sales, KPI, and budget snapshot.
1101    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1102    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1103    pub tax: TaxSnapshot,
1104    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1105    pub esg: EsgSnapshot,
1106    /// Treasury data snapshot (cash management, hedging, debt).
1107    pub treasury: TreasurySnapshot,
1108    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1109    pub project_accounting: ProjectAccountingSnapshot,
1110    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1111    pub process_evolution: Vec<ProcessEvolutionEvent>,
1112    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1113    pub organizational_events: Vec<OrganizationalEvent>,
1114    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1115    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1116    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1117    pub intercompany: IntercompanySnapshot,
1118    /// Generated journal entries.
1119    pub journal_entries: Vec<JournalEntry>,
1120    /// Anomaly labels (if injection enabled).
1121    pub anomaly_labels: AnomalyLabels,
1122    /// Balance validation results (if validation enabled).
1123    pub balance_validation: BalanceValidationResult,
1124    /// Data quality statistics (if injection enabled).
1125    pub data_quality_stats: DataQualityStats,
1126    /// Data quality issue records (if injection enabled).
1127    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1128    /// Generation statistics.
1129    pub statistics: EnhancedGenerationStatistics,
1130    /// Data lineage graph (if tracking enabled).
1131    pub lineage: Option<super::lineage::LineageGraph>,
1132    /// Quality gate evaluation result.
1133    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1134    /// Internal controls (if controls generation enabled).
1135    pub internal_controls: Vec<InternalControl>,
1136    /// SoD (Segregation of Duties) violations identified during control application.
1137    ///
1138    /// Each record corresponds to a journal entry where `sod_violation == true`.
1139    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1140    /// Opening balances (if opening balance generation enabled).
1141    pub opening_balances: Vec<GeneratedOpeningBalance>,
1142    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1143    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1144    /// Counterfactual (original, mutated) JE pairs for ML training.
1145    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1146    /// Fraud red-flag indicators on P2P/O2C documents.
1147    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1148    /// Collusion rings (coordinated fraud networks).
1149    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1150    /// Bi-temporal version chains for vendor entities.
1151    pub temporal_vendor_chains:
1152        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1153    /// Entity relationship graph (nodes + edges with strength scores).
1154    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1155    /// Cross-process links (P2P ↔ O2C via inventory movements).
1156    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1157    /// Industry-specific GL accounts and metadata.
1158    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1159    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1160    pub compliance_regulations: ComplianceRegulationsSnapshot,
1161}
1162
1163/// Enhanced statistics about a generation run.
1164#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1165pub struct EnhancedGenerationStatistics {
1166    /// Total journal entries generated.
1167    pub total_entries: u64,
1168    /// Total line items generated.
1169    pub total_line_items: u64,
1170    /// Number of accounts in CoA.
1171    pub accounts_count: usize,
1172    /// Number of companies.
1173    pub companies_count: usize,
1174    /// Period in months.
1175    pub period_months: u32,
1176    /// Master data counts.
1177    pub vendor_count: usize,
1178    pub customer_count: usize,
1179    pub material_count: usize,
1180    pub asset_count: usize,
1181    pub employee_count: usize,
1182    /// Document flow counts.
1183    pub p2p_chain_count: usize,
1184    pub o2c_chain_count: usize,
1185    /// Subledger counts.
1186    pub ap_invoice_count: usize,
1187    pub ar_invoice_count: usize,
1188    /// OCPM counts.
1189    pub ocpm_event_count: usize,
1190    pub ocpm_object_count: usize,
1191    pub ocpm_case_count: usize,
1192    /// Audit counts.
1193    pub audit_engagement_count: usize,
1194    pub audit_workpaper_count: usize,
1195    pub audit_evidence_count: usize,
1196    pub audit_risk_count: usize,
1197    pub audit_finding_count: usize,
1198    pub audit_judgment_count: usize,
1199    /// ISA 505 confirmation counts.
1200    #[serde(default)]
1201    pub audit_confirmation_count: usize,
1202    #[serde(default)]
1203    pub audit_confirmation_response_count: usize,
1204    /// ISA 330/530 procedure step and sample counts.
1205    #[serde(default)]
1206    pub audit_procedure_step_count: usize,
1207    #[serde(default)]
1208    pub audit_sample_count: usize,
1209    /// ISA 520 analytical procedure counts.
1210    #[serde(default)]
1211    pub audit_analytical_result_count: usize,
1212    /// ISA 610 internal audit counts.
1213    #[serde(default)]
1214    pub audit_ia_function_count: usize,
1215    #[serde(default)]
1216    pub audit_ia_report_count: usize,
1217    /// ISA 550 related party counts.
1218    #[serde(default)]
1219    pub audit_related_party_count: usize,
1220    #[serde(default)]
1221    pub audit_related_party_transaction_count: usize,
1222    /// Anomaly counts.
1223    pub anomalies_injected: usize,
1224    /// Data quality issue counts.
1225    pub data_quality_issues: usize,
1226    /// Banking counts.
1227    pub banking_customer_count: usize,
1228    pub banking_account_count: usize,
1229    pub banking_transaction_count: usize,
1230    pub banking_suspicious_count: usize,
1231    /// Graph export counts.
1232    pub graph_export_count: usize,
1233    pub graph_node_count: usize,
1234    pub graph_edge_count: usize,
1235    /// LLM enrichment timing (milliseconds).
1236    #[serde(default)]
1237    pub llm_enrichment_ms: u64,
1238    /// Number of vendor names enriched by LLM.
1239    #[serde(default)]
1240    pub llm_vendors_enriched: usize,
1241    /// Diffusion enhancement timing (milliseconds).
1242    #[serde(default)]
1243    pub diffusion_enhancement_ms: u64,
1244    /// Number of diffusion samples generated.
1245    #[serde(default)]
1246    pub diffusion_samples_generated: usize,
1247    /// Causal generation timing (milliseconds).
1248    #[serde(default)]
1249    pub causal_generation_ms: u64,
1250    /// Number of causal samples generated.
1251    #[serde(default)]
1252    pub causal_samples_generated: usize,
1253    /// Whether causal validation passed.
1254    #[serde(default)]
1255    pub causal_validation_passed: Option<bool>,
1256    /// S2C sourcing counts.
1257    #[serde(default)]
1258    pub sourcing_project_count: usize,
1259    #[serde(default)]
1260    pub rfx_event_count: usize,
1261    #[serde(default)]
1262    pub bid_count: usize,
1263    #[serde(default)]
1264    pub contract_count: usize,
1265    #[serde(default)]
1266    pub catalog_item_count: usize,
1267    #[serde(default)]
1268    pub scorecard_count: usize,
1269    /// Financial reporting counts.
1270    #[serde(default)]
1271    pub financial_statement_count: usize,
1272    #[serde(default)]
1273    pub bank_reconciliation_count: usize,
1274    /// HR counts.
1275    #[serde(default)]
1276    pub payroll_run_count: usize,
1277    #[serde(default)]
1278    pub time_entry_count: usize,
1279    #[serde(default)]
1280    pub expense_report_count: usize,
1281    #[serde(default)]
1282    pub benefit_enrollment_count: usize,
1283    #[serde(default)]
1284    pub pension_plan_count: usize,
1285    #[serde(default)]
1286    pub stock_grant_count: usize,
1287    /// Accounting standards counts.
1288    #[serde(default)]
1289    pub revenue_contract_count: usize,
1290    #[serde(default)]
1291    pub impairment_test_count: usize,
1292    #[serde(default)]
1293    pub business_combination_count: usize,
1294    #[serde(default)]
1295    pub ecl_model_count: usize,
1296    #[serde(default)]
1297    pub provision_count: usize,
1298    /// Manufacturing counts.
1299    #[serde(default)]
1300    pub production_order_count: usize,
1301    #[serde(default)]
1302    pub quality_inspection_count: usize,
1303    #[serde(default)]
1304    pub cycle_count_count: usize,
1305    #[serde(default)]
1306    pub bom_component_count: usize,
1307    #[serde(default)]
1308    pub inventory_movement_count: usize,
1309    /// Sales & reporting counts.
1310    #[serde(default)]
1311    pub sales_quote_count: usize,
1312    #[serde(default)]
1313    pub kpi_count: usize,
1314    #[serde(default)]
1315    pub budget_line_count: usize,
1316    /// Tax counts.
1317    #[serde(default)]
1318    pub tax_jurisdiction_count: usize,
1319    #[serde(default)]
1320    pub tax_code_count: usize,
1321    /// ESG counts.
1322    #[serde(default)]
1323    pub esg_emission_count: usize,
1324    #[serde(default)]
1325    pub esg_disclosure_count: usize,
1326    /// Intercompany counts.
1327    #[serde(default)]
1328    pub ic_matched_pair_count: usize,
1329    #[serde(default)]
1330    pub ic_elimination_count: usize,
1331    /// Number of intercompany journal entries (seller + buyer side).
1332    #[serde(default)]
1333    pub ic_transaction_count: usize,
1334    /// Number of fixed asset subledger records.
1335    #[serde(default)]
1336    pub fa_subledger_count: usize,
1337    /// Number of inventory subledger records.
1338    #[serde(default)]
1339    pub inventory_subledger_count: usize,
1340    /// Treasury debt instrument count.
1341    #[serde(default)]
1342    pub treasury_debt_instrument_count: usize,
1343    /// Treasury hedging instrument count.
1344    #[serde(default)]
1345    pub treasury_hedging_instrument_count: usize,
1346    /// Project accounting project count.
1347    #[serde(default)]
1348    pub project_count: usize,
1349    /// Project accounting change order count.
1350    #[serde(default)]
1351    pub project_change_order_count: usize,
1352    /// Tax provision count.
1353    #[serde(default)]
1354    pub tax_provision_count: usize,
1355    /// Opening balance count.
1356    #[serde(default)]
1357    pub opening_balance_count: usize,
1358    /// Subledger reconciliation count.
1359    #[serde(default)]
1360    pub subledger_reconciliation_count: usize,
1361    /// Tax line count.
1362    #[serde(default)]
1363    pub tax_line_count: usize,
1364    /// Project cost line count.
1365    #[serde(default)]
1366    pub project_cost_line_count: usize,
1367    /// Cash position count.
1368    #[serde(default)]
1369    pub cash_position_count: usize,
1370    /// Cash forecast count.
1371    #[serde(default)]
1372    pub cash_forecast_count: usize,
1373    /// Cash pool count.
1374    #[serde(default)]
1375    pub cash_pool_count: usize,
1376    /// Process evolution event count.
1377    #[serde(default)]
1378    pub process_evolution_event_count: usize,
1379    /// Organizational event count.
1380    #[serde(default)]
1381    pub organizational_event_count: usize,
1382    /// Counterfactual pair count.
1383    #[serde(default)]
1384    pub counterfactual_pair_count: usize,
1385    /// Number of fraud red-flag indicators generated.
1386    #[serde(default)]
1387    pub red_flag_count: usize,
1388    /// Number of collusion rings generated.
1389    #[serde(default)]
1390    pub collusion_ring_count: usize,
1391    /// Number of bi-temporal vendor version chains generated.
1392    #[serde(default)]
1393    pub temporal_version_chain_count: usize,
1394    /// Number of nodes in the entity relationship graph.
1395    #[serde(default)]
1396    pub entity_relationship_node_count: usize,
1397    /// Number of edges in the entity relationship graph.
1398    #[serde(default)]
1399    pub entity_relationship_edge_count: usize,
1400    /// Number of cross-process links generated.
1401    #[serde(default)]
1402    pub cross_process_link_count: usize,
1403    /// Number of disruption events generated.
1404    #[serde(default)]
1405    pub disruption_event_count: usize,
1406    /// Number of industry-specific GL accounts generated.
1407    #[serde(default)]
1408    pub industry_gl_account_count: usize,
1409    /// Number of period-close journal entries generated (tax provision + closing entries).
1410    #[serde(default)]
1411    pub period_close_je_count: usize,
1412}
1413
1414/// Enhanced orchestrator with full feature integration.
1415pub struct EnhancedOrchestrator {
1416    config: GeneratorConfig,
1417    phase_config: PhaseConfig,
1418    coa: Option<Arc<ChartOfAccounts>>,
1419    master_data: MasterDataSnapshot,
1420    seed: u64,
1421    multi_progress: Option<MultiProgress>,
1422    /// Resource guard for memory, disk, and CPU monitoring
1423    resource_guard: ResourceGuard,
1424    /// Output path for disk space monitoring
1425    output_path: Option<PathBuf>,
1426    /// Copula generators for preserving correlations (from fingerprint)
1427    copula_generators: Vec<CopulaGeneratorSpec>,
1428    /// Country pack registry for localized data generation
1429    country_pack_registry: datasynth_core::CountryPackRegistry,
1430    /// Optional streaming sink for phase-by-phase output
1431    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1432}
1433
1434impl EnhancedOrchestrator {
1435    /// Create a new enhanced orchestrator.
1436    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1437        datasynth_config::validate_config(&config)?;
1438
1439        let seed = config.global.seed.unwrap_or_else(rand::random);
1440
1441        // Build resource guard from config
1442        let resource_guard = Self::build_resource_guard(&config, None);
1443
1444        // Build country pack registry from config
1445        let country_pack_registry = match &config.country_packs {
1446            Some(cp) => {
1447                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1448                    .map_err(|e| SynthError::config(e.to_string()))?
1449            }
1450            None => datasynth_core::CountryPackRegistry::builtin_only()
1451                .map_err(|e| SynthError::config(e.to_string()))?,
1452        };
1453
1454        Ok(Self {
1455            config,
1456            phase_config,
1457            coa: None,
1458            master_data: MasterDataSnapshot::default(),
1459            seed,
1460            multi_progress: None,
1461            resource_guard,
1462            output_path: None,
1463            copula_generators: Vec::new(),
1464            country_pack_registry,
1465            phase_sink: None,
1466        })
1467    }
1468
1469    /// Create with default phase config.
1470    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1471        Self::new(config, PhaseConfig::default())
1472    }
1473
1474    /// Set a streaming phase sink for real-time output.
1475    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1476        self.phase_sink = Some(sink);
1477        self
1478    }
1479
1480    /// Emit a batch of items to the phase sink (if configured).
1481    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1482        if let Some(ref sink) = self.phase_sink {
1483            for item in items {
1484                if let Ok(value) = serde_json::to_value(item) {
1485                    if let Err(e) = sink.emit(phase, type_name, &value) {
1486                        warn!(
1487                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1488                        );
1489                    }
1490                }
1491            }
1492            if let Err(e) = sink.phase_complete(phase) {
1493                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1494            }
1495        }
1496    }
1497
1498    /// Enable/disable progress bars.
1499    pub fn with_progress(mut self, show: bool) -> Self {
1500        self.phase_config.show_progress = show;
1501        if show {
1502            self.multi_progress = Some(MultiProgress::new());
1503        }
1504        self
1505    }
1506
1507    /// Set the output path for disk space monitoring.
1508    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1509        let path = path.into();
1510        self.output_path = Some(path.clone());
1511        // Rebuild resource guard with the output path
1512        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1513        self
1514    }
1515
1516    /// Access the country pack registry.
1517    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1518        &self.country_pack_registry
1519    }
1520
1521    /// Look up a country pack by country code string.
1522    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1523        self.country_pack_registry.get_by_str(country)
1524    }
1525
1526    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1527    /// company, defaulting to `"US"` if no companies are configured.
1528    fn primary_country_code(&self) -> &str {
1529        self.config
1530            .companies
1531            .first()
1532            .map(|c| c.country.as_str())
1533            .unwrap_or("US")
1534    }
1535
1536    /// Resolve the country pack for the primary (first) company.
1537    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1538        self.country_pack_for(self.primary_country_code())
1539    }
1540
1541    /// Resolve the CoA framework from config/country-pack.
1542    fn resolve_coa_framework(&self) -> CoAFramework {
1543        if self.config.accounting_standards.enabled {
1544            match self.config.accounting_standards.framework {
1545                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1546                    return CoAFramework::FrenchPcg;
1547                }
1548                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1549                    return CoAFramework::GermanSkr04;
1550                }
1551                _ => {}
1552            }
1553        }
1554        // Fallback: derive from country pack
1555        let pack = self.primary_pack();
1556        match pack.accounting.framework.as_str() {
1557            "french_gaap" => CoAFramework::FrenchPcg,
1558            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1559            _ => CoAFramework::UsGaap,
1560        }
1561    }
1562
1563    /// Check if copula generators are available.
1564    ///
1565    /// Returns true if the orchestrator has copula generators for preserving
1566    /// correlations (typically from fingerprint-based generation).
1567    pub fn has_copulas(&self) -> bool {
1568        !self.copula_generators.is_empty()
1569    }
1570
1571    /// Get the copula generators.
1572    ///
1573    /// Returns a reference to the copula generators for use during generation.
1574    /// These can be used to generate correlated samples that preserve the
1575    /// statistical relationships from the source data.
1576    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1577        &self.copula_generators
1578    }
1579
1580    /// Get a mutable reference to the copula generators.
1581    ///
1582    /// Allows generators to sample from copulas during data generation.
1583    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1584        &mut self.copula_generators
1585    }
1586
1587    /// Sample correlated values from a named copula.
1588    ///
1589    /// Returns None if the copula doesn't exist.
1590    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1591        self.copula_generators
1592            .iter_mut()
1593            .find(|c| c.name == copula_name)
1594            .map(|c| c.generator.sample())
1595    }
1596
1597    /// Create an orchestrator from a fingerprint file.
1598    ///
1599    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1600    /// and creates an orchestrator configured to generate data matching
1601    /// the statistical properties of the original data.
1602    ///
1603    /// # Arguments
1604    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1605    /// * `phase_config` - Phase configuration for generation
1606    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1607    ///
1608    /// # Example
1609    /// ```no_run
1610    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1611    /// use std::path::Path;
1612    ///
1613    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1614    ///     Path::new("fingerprint.dsf"),
1615    ///     PhaseConfig::default(),
1616    ///     1.0,
1617    /// ).unwrap();
1618    /// ```
1619    pub fn from_fingerprint(
1620        fingerprint_path: &std::path::Path,
1621        phase_config: PhaseConfig,
1622        scale: f64,
1623    ) -> SynthResult<Self> {
1624        info!("Loading fingerprint from: {}", fingerprint_path.display());
1625
1626        // Read the fingerprint
1627        let reader = FingerprintReader::new();
1628        let fingerprint = reader
1629            .read_from_file(fingerprint_path)
1630            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1631
1632        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1633    }
1634
1635    /// Create an orchestrator from a loaded fingerprint.
1636    ///
1637    /// # Arguments
1638    /// * `fingerprint` - The loaded fingerprint
1639    /// * `phase_config` - Phase configuration for generation
1640    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1641    pub fn from_fingerprint_data(
1642        fingerprint: Fingerprint,
1643        phase_config: PhaseConfig,
1644        scale: f64,
1645    ) -> SynthResult<Self> {
1646        info!(
1647            "Synthesizing config from fingerprint (version: {}, tables: {})",
1648            fingerprint.manifest.version,
1649            fingerprint.schema.tables.len()
1650        );
1651
1652        // Generate a seed for the synthesis
1653        let seed: u64 = rand::random();
1654        info!("Fingerprint synthesis seed: {}", seed);
1655
1656        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1657        let options = SynthesisOptions {
1658            scale,
1659            seed: Some(seed),
1660            preserve_correlations: true,
1661            inject_anomalies: true,
1662        };
1663        let synthesizer = ConfigSynthesizer::with_options(options);
1664
1665        // Synthesize full result including copula generators
1666        let synthesis_result = synthesizer
1667            .synthesize_full(&fingerprint, seed)
1668            .map_err(|e| {
1669                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1670            })?;
1671
1672        // Start with a base config from the fingerprint's industry if available
1673        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1674            Self::base_config_for_industry(industry)
1675        } else {
1676            Self::base_config_for_industry("manufacturing")
1677        };
1678
1679        // Apply the synthesized patches
1680        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1681
1682        // Log synthesis results
1683        info!(
1684            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1685            fingerprint.schema.tables.len(),
1686            scale,
1687            synthesis_result.copula_generators.len()
1688        );
1689
1690        if !synthesis_result.copula_generators.is_empty() {
1691            for spec in &synthesis_result.copula_generators {
1692                info!(
1693                    "  Copula '{}' for table '{}': {} columns",
1694                    spec.name,
1695                    spec.table,
1696                    spec.columns.len()
1697                );
1698            }
1699        }
1700
1701        // Create the orchestrator with the synthesized config
1702        let mut orchestrator = Self::new(config, phase_config)?;
1703
1704        // Store copula generators for use during generation
1705        orchestrator.copula_generators = synthesis_result.copula_generators;
1706
1707        Ok(orchestrator)
1708    }
1709
1710    /// Create a base config for a given industry.
1711    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1712        use datasynth_config::presets::create_preset;
1713        use datasynth_config::TransactionVolume;
1714        use datasynth_core::models::{CoAComplexity, IndustrySector};
1715
1716        let sector = match industry.to_lowercase().as_str() {
1717            "manufacturing" => IndustrySector::Manufacturing,
1718            "retail" => IndustrySector::Retail,
1719            "financial" | "financial_services" => IndustrySector::FinancialServices,
1720            "healthcare" => IndustrySector::Healthcare,
1721            "technology" | "tech" => IndustrySector::Technology,
1722            _ => IndustrySector::Manufacturing,
1723        };
1724
1725        // Create a preset with reasonable defaults
1726        create_preset(
1727            sector,
1728            1,  // company count
1729            12, // period months
1730            CoAComplexity::Medium,
1731            TransactionVolume::TenK,
1732        )
1733    }
1734
1735    /// Apply a config patch to a GeneratorConfig.
1736    fn apply_config_patch(
1737        mut config: GeneratorConfig,
1738        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1739    ) -> GeneratorConfig {
1740        use datasynth_fingerprint::synthesis::ConfigValue;
1741
1742        for (key, value) in patch.values() {
1743            match (key.as_str(), value) {
1744                // Transaction count is handled via TransactionVolume enum on companies
1745                // Log it but cannot directly set it (would need to modify company volumes)
1746                ("transactions.count", ConfigValue::Integer(n)) => {
1747                    info!(
1748                        "Fingerprint suggests {} transactions (apply via company volumes)",
1749                        n
1750                    );
1751                }
1752                ("global.period_months", ConfigValue::Integer(n)) => {
1753                    config.global.period_months = (*n).clamp(1, 120) as u32;
1754                }
1755                ("global.start_date", ConfigValue::String(s)) => {
1756                    config.global.start_date = s.clone();
1757                }
1758                ("global.seed", ConfigValue::Integer(n)) => {
1759                    config.global.seed = Some(*n as u64);
1760                }
1761                ("fraud.enabled", ConfigValue::Bool(b)) => {
1762                    config.fraud.enabled = *b;
1763                }
1764                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1765                    config.fraud.fraud_rate = *f;
1766                }
1767                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1768                    config.data_quality.enabled = *b;
1769                }
1770                // Handle anomaly injection paths (mapped to fraud config)
1771                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1772                    config.fraud.enabled = *b;
1773                }
1774                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1775                    config.fraud.fraud_rate = *f;
1776                }
1777                _ => {
1778                    debug!("Ignoring unknown config patch key: {}", key);
1779                }
1780            }
1781        }
1782
1783        config
1784    }
1785
1786    /// Build a resource guard from the configuration.
1787    fn build_resource_guard(
1788        config: &GeneratorConfig,
1789        output_path: Option<PathBuf>,
1790    ) -> ResourceGuard {
1791        let mut builder = ResourceGuardBuilder::new();
1792
1793        // Configure memory limit if set
1794        if config.global.memory_limit_mb > 0 {
1795            builder = builder.memory_limit(config.global.memory_limit_mb);
1796        }
1797
1798        // Configure disk monitoring for output path
1799        if let Some(path) = output_path {
1800            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1801        }
1802
1803        // Use conservative degradation settings for production safety
1804        builder = builder.conservative();
1805
1806        builder.build()
1807    }
1808
1809    /// Check resources (memory, disk, CPU) and return degradation level.
1810    ///
1811    /// Returns an error if hard limits are exceeded.
1812    /// Returns Ok(DegradationLevel) indicating current resource state.
1813    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1814        self.resource_guard.check()
1815    }
1816
1817    /// Check resources with logging.
1818    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1819        let level = self.resource_guard.check()?;
1820
1821        if level != DegradationLevel::Normal {
1822            warn!(
1823                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1824                phase,
1825                level,
1826                self.resource_guard.current_memory_mb(),
1827                self.resource_guard.available_disk_mb()
1828            );
1829        }
1830
1831        Ok(level)
1832    }
1833
1834    /// Get current degradation actions based on resource state.
1835    fn get_degradation_actions(&self) -> DegradationActions {
1836        self.resource_guard.get_actions()
1837    }
1838
1839    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1840    fn check_memory_limit(&self) -> SynthResult<()> {
1841        self.check_resources()?;
1842        Ok(())
1843    }
1844
1845    /// Run the complete generation workflow.
1846    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1847        info!("Starting enhanced generation workflow");
1848        info!(
1849            "Config: industry={:?}, period_months={}, companies={}",
1850            self.config.global.industry,
1851            self.config.global.period_months,
1852            self.config.companies.len()
1853        );
1854
1855        // Initial resource check before starting
1856        let initial_level = self.check_resources_with_log("initial")?;
1857        if initial_level == DegradationLevel::Emergency {
1858            return Err(SynthError::resource(
1859                "Insufficient resources to start generation",
1860            ));
1861        }
1862
1863        let mut stats = EnhancedGenerationStatistics {
1864            companies_count: self.config.companies.len(),
1865            period_months: self.config.global.period_months,
1866            ..Default::default()
1867        };
1868
1869        // Phase 1: Chart of Accounts
1870        let coa = self.phase_chart_of_accounts(&mut stats)?;
1871
1872        // Phase 2: Master Data
1873        self.phase_master_data(&mut stats)?;
1874
1875        // Emit master data to stream sink
1876        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1877        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1878        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1879
1880        // Phase 3: Document Flows + Subledger Linking
1881        let (mut document_flows, mut subledger, fa_journal_entries) =
1882            self.phase_document_flows(&mut stats)?;
1883
1884        // Emit document flows to stream sink
1885        self.emit_phase_items(
1886            "document_flows",
1887            "PurchaseOrder",
1888            &document_flows.purchase_orders,
1889        );
1890        self.emit_phase_items(
1891            "document_flows",
1892            "GoodsReceipt",
1893            &document_flows.goods_receipts,
1894        );
1895        self.emit_phase_items(
1896            "document_flows",
1897            "VendorInvoice",
1898            &document_flows.vendor_invoices,
1899        );
1900        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1901        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1902
1903        // Phase 3b: Opening Balances (before JE generation)
1904        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1905
1906        // Phase 3c: Convert opening balances to journal entries and prepend them.
1907        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1908        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1909        // balance map type.
1910        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1911            .iter()
1912            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1913            .collect();
1914        if !opening_balance_jes.is_empty() {
1915            debug!(
1916                "Prepending {} opening balance JEs to entries",
1917                opening_balance_jes.len()
1918            );
1919        }
1920
1921        // Phase 4: Journal Entries
1922        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1923
1924        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1925        // starts from the correct initial state.
1926        if !opening_balance_jes.is_empty() {
1927            let mut combined = opening_balance_jes;
1928            combined.extend(entries);
1929            entries = combined;
1930        }
1931
1932        // Phase 4c: Append FA acquisition journal entries to main entries
1933        if !fa_journal_entries.is_empty() {
1934            debug!(
1935                "Appending {} FA acquisition JEs to main entries",
1936                fa_journal_entries.len()
1937            );
1938            entries.extend(fa_journal_entries);
1939        }
1940
1941        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1942        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1943
1944        // Get current degradation actions for optional phases
1945        let actions = self.get_degradation_actions();
1946
1947        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1948        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1949
1950        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1951        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1952        if !sourcing.contracts.is_empty() {
1953            let mut linked_count = 0usize;
1954            // Collect (vendor_id, po_id) pairs from P2P chains
1955            let po_vendor_pairs: Vec<(String, String)> = document_flows
1956                .p2p_chains
1957                .iter()
1958                .map(|chain| {
1959                    (
1960                        chain.purchase_order.vendor_id.clone(),
1961                        chain.purchase_order.header.document_id.clone(),
1962                    )
1963                })
1964                .collect();
1965
1966            for chain in &mut document_flows.p2p_chains {
1967                if chain.purchase_order.contract_id.is_none() {
1968                    if let Some(contract) = sourcing
1969                        .contracts
1970                        .iter()
1971                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1972                    {
1973                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1974                        linked_count += 1;
1975                    }
1976                }
1977            }
1978
1979            // Populate reverse FK: purchase_order_ids on each contract
1980            for contract in &mut sourcing.contracts {
1981                let po_ids: Vec<String> = po_vendor_pairs
1982                    .iter()
1983                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
1984                    .map(|(_, po_id)| po_id.clone())
1985                    .collect();
1986                if !po_ids.is_empty() {
1987                    contract.purchase_order_ids = po_ids;
1988                }
1989            }
1990
1991            if linked_count > 0 {
1992                debug!(
1993                    "Linked {} purchase orders to S2C contracts by vendor match",
1994                    linked_count
1995                );
1996            }
1997        }
1998
1999        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2000        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2001
2002        // Phase 5c: Append IC journal entries to main entries
2003        if !intercompany.seller_journal_entries.is_empty()
2004            || !intercompany.buyer_journal_entries.is_empty()
2005        {
2006            let ic_je_count = intercompany.seller_journal_entries.len()
2007                + intercompany.buyer_journal_entries.len();
2008            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2009            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2010            debug!(
2011                "Appended {} IC journal entries to main entries",
2012                ic_je_count
2013            );
2014        }
2015
2016        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2017        if !intercompany.elimination_entries.is_empty() {
2018            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2019                &intercompany.elimination_entries,
2020            );
2021            if !elim_jes.is_empty() {
2022                debug!(
2023                    "Appended {} elimination journal entries to main entries",
2024                    elim_jes.len()
2025                );
2026                // IC elimination net-zero validation
2027                let elim_debit: rust_decimal::Decimal =
2028                    elim_jes.iter().map(|je| je.total_debit()).sum();
2029                let elim_credit: rust_decimal::Decimal =
2030                    elim_jes.iter().map(|je| je.total_credit()).sum();
2031                if elim_debit != elim_credit {
2032                    warn!(
2033                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2034                        elim_debit,
2035                        elim_credit,
2036                        elim_debit - elim_credit
2037                    );
2038                }
2039                entries.extend(elim_jes);
2040            }
2041        }
2042
2043        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2044        let hr = self.phase_hr_data(&mut stats)?;
2045
2046        // Phase 6b: Generate JEs from payroll runs
2047        if !hr.payroll_runs.is_empty() {
2048            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2049            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2050            entries.extend(payroll_jes);
2051        }
2052
2053        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2054        if !hr.pension_journal_entries.is_empty() {
2055            debug!(
2056                "Generated {} JEs from pension plans",
2057                hr.pension_journal_entries.len()
2058            );
2059            entries.extend(hr.pension_journal_entries.iter().cloned());
2060        }
2061
2062        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2063        if !hr.stock_comp_journal_entries.is_empty() {
2064            debug!(
2065                "Generated {} JEs from stock-based compensation",
2066                hr.stock_comp_journal_entries.len()
2067            );
2068            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2069        }
2070
2071        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2072        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2073
2074        // Phase 7a: Generate JEs from production orders
2075        if !manufacturing_snap.production_orders.is_empty() {
2076            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2077            debug!("Generated {} JEs from production orders", mfg_jes.len());
2078            entries.extend(mfg_jes);
2079        }
2080
2081        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2082        //
2083        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2084        // subledger inventory positions.  Here we reconcile them so that position balances
2085        // reflect the actual stock movements within the generation period.
2086        if !manufacturing_snap.inventory_movements.is_empty()
2087            && !subledger.inventory_positions.is_empty()
2088        {
2089            use datasynth_core::models::MovementType as MfgMovementType;
2090            let mut receipt_count = 0usize;
2091            let mut issue_count = 0usize;
2092            for movement in &manufacturing_snap.inventory_movements {
2093                // Find a matching position by material code and company
2094                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2095                    p.material_id == movement.material_code
2096                        && p.company_code == movement.entity_code
2097                }) {
2098                    match movement.movement_type {
2099                        MfgMovementType::GoodsReceipt => {
2100                            // Increase stock and update weighted-average cost
2101                            pos.add_quantity(
2102                                movement.quantity,
2103                                movement.value,
2104                                movement.movement_date,
2105                            );
2106                            receipt_count += 1;
2107                        }
2108                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2109                            // Decrease stock (best-effort; silently skip if insufficient)
2110                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2111                            issue_count += 1;
2112                        }
2113                        _ => {}
2114                    }
2115                }
2116            }
2117            debug!(
2118                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2119                manufacturing_snap.inventory_movements.len(),
2120                receipt_count,
2121                issue_count,
2122            );
2123        }
2124
2125        // Update final entry/line-item stats after all JE-generating phases
2126        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2127        if !entries.is_empty() {
2128            stats.total_entries = entries.len() as u64;
2129            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2130            debug!(
2131                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2132                stats.total_entries, stats.total_line_items
2133            );
2134        }
2135
2136        // Phase 7b: Apply internal controls to journal entries
2137        if self.config.internal_controls.enabled && !entries.is_empty() {
2138            info!("Phase 7b: Applying internal controls to journal entries");
2139            let control_config = ControlGeneratorConfig {
2140                exception_rate: self.config.internal_controls.exception_rate,
2141                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2142                enable_sox_marking: true,
2143                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2144                    self.config.internal_controls.sox_materiality_threshold,
2145                )
2146                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2147                ..Default::default()
2148            };
2149            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2150            for entry in &mut entries {
2151                control_gen.apply_controls(entry, &coa);
2152            }
2153            let with_controls = entries
2154                .iter()
2155                .filter(|e| !e.header.control_ids.is_empty())
2156                .count();
2157            info!(
2158                "Applied controls to {} entries ({} with control IDs assigned)",
2159                entries.len(),
2160                with_controls
2161            );
2162        }
2163
2164        // Phase 7c: Extract SoD violations from annotated journal entries.
2165        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2166        // Here we materialise those flags into standalone SodViolation records.
2167        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2168            .iter()
2169            .filter(|e| e.header.sod_violation)
2170            .filter_map(|e| {
2171                e.header.sod_conflict_type.map(|ct| {
2172                    use datasynth_core::models::{RiskLevel, SodViolation};
2173                    let severity = match ct {
2174                        datasynth_core::models::SodConflictType::PaymentReleaser
2175                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2176                            RiskLevel::Critical
2177                        }
2178                        datasynth_core::models::SodConflictType::PreparerApprover
2179                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2180                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2181                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2182                            RiskLevel::High
2183                        }
2184                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2185                            RiskLevel::Medium
2186                        }
2187                    };
2188                    let action = format!(
2189                        "SoD conflict {:?} on entry {} ({})",
2190                        ct, e.header.document_id, e.header.company_code
2191                    );
2192                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2193                })
2194            })
2195            .collect();
2196        if !sod_violations.is_empty() {
2197            info!(
2198                "Phase 7c: Extracted {} SoD violations from {} entries",
2199                sod_violations.len(),
2200                entries.len()
2201            );
2202        }
2203
2204        // Emit journal entries to stream sink (after all JE-generating phases)
2205        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2206
2207        // Phase 8: Anomaly Injection (after all JE-generating phases)
2208        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2209
2210        // Emit anomaly labels to stream sink
2211        self.emit_phase_items(
2212            "anomaly_injection",
2213            "LabeledAnomaly",
2214            &anomaly_labels.labels,
2215        );
2216
2217        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2218        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2219
2220        // Emit red flags to stream sink
2221        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2222
2223        // Phase 26b: Collusion Ring Generation (after red flags)
2224        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2225
2226        // Emit collusion rings to stream sink
2227        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2228
2229        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2230        let balance_validation = self.phase_balance_validation(&entries)?;
2231
2232        // Phase 9b: GL-to-Subledger Reconciliation
2233        let subledger_reconciliation =
2234            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2235
2236        // Phase 10: Data Quality Injection
2237        let (data_quality_stats, quality_issues) =
2238            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2239
2240        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2241        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2242
2243        // Phase 11: Audit Data
2244        let audit = self.phase_audit_data(&entries, &mut stats)?;
2245
2246        // Phase 12: Banking KYC/AML Data
2247        let banking = self.phase_banking_data(&mut stats)?;
2248
2249        // Phase 13: Graph Export
2250        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2251
2252        // Phase 14: LLM Enrichment
2253        self.phase_llm_enrichment(&mut stats);
2254
2255        // Phase 15: Diffusion Enhancement
2256        self.phase_diffusion_enhancement(&mut stats);
2257
2258        // Phase 16: Causal Overlay
2259        self.phase_causal_overlay(&mut stats);
2260
2261        // Phase 17: Bank Reconciliation + Financial Statements
2262        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2263        // provision data (from accounting_standards / tax snapshots) can be wired in.
2264        let mut financial_reporting = self.phase_financial_reporting(
2265            &document_flows,
2266            &entries,
2267            &coa,
2268            &hr,
2269            &audit,
2270            &mut stats,
2271        )?;
2272
2273        // BS coherence check: assets = liabilities + equity
2274        {
2275            use datasynth_core::models::StatementType;
2276            for stmt in &financial_reporting.consolidated_statements {
2277                if stmt.statement_type == StatementType::BalanceSheet {
2278                    let total_assets: rust_decimal::Decimal = stmt
2279                        .line_items
2280                        .iter()
2281                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2282                        .map(|li| li.amount)
2283                        .sum();
2284                    let total_le: rust_decimal::Decimal = stmt
2285                        .line_items
2286                        .iter()
2287                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2288                        .map(|li| li.amount)
2289                        .sum();
2290                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2291                        warn!(
2292                            "BS equation imbalance: assets={}, L+E={}",
2293                            total_assets, total_le
2294                        );
2295                    }
2296                }
2297            }
2298        }
2299
2300        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2301        let accounting_standards =
2302            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2303
2304        // Phase 18a: Merge ECL journal entries into main GL
2305        if !accounting_standards.ecl_journal_entries.is_empty() {
2306            debug!(
2307                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2308                accounting_standards.ecl_journal_entries.len()
2309            );
2310            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2311        }
2312
2313        // Phase 18a: Merge provision journal entries into main GL
2314        if !accounting_standards.provision_journal_entries.is_empty() {
2315            debug!(
2316                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2317                accounting_standards.provision_journal_entries.len()
2318            );
2319            entries.extend(
2320                accounting_standards
2321                    .provision_journal_entries
2322                    .iter()
2323                    .cloned(),
2324            );
2325        }
2326
2327        // Phase 18b: OCPM Events (after all process data is available)
2328        let ocpm = self.phase_ocpm_events(
2329            &document_flows,
2330            &sourcing,
2331            &hr,
2332            &manufacturing_snap,
2333            &banking,
2334            &audit,
2335            &financial_reporting,
2336            &mut stats,
2337        )?;
2338
2339        // Emit OCPM events to stream sink
2340        if let Some(ref event_log) = ocpm.event_log {
2341            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2342        }
2343
2344        // Phase 19: Sales Quotes, Management KPIs, Budgets
2345        let sales_kpi_budgets =
2346            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2347
2348        // Phase 20: Tax Generation
2349        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2350
2351        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2352        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2353        self.generate_notes_to_financial_statements(
2354            &mut financial_reporting,
2355            &accounting_standards,
2356            &tax,
2357            &hr,
2358            &audit,
2359        );
2360
2361        // Phase 21: ESG Data Generation
2362        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2363
2364        // Phase 22: Treasury Data Generation
2365        let treasury =
2366            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2367
2368        // Phase 23: Project Accounting Data Generation
2369        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2370
2371        // Phase 24: Process Evolution + Organizational Events
2372        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2373
2374        // Phase 24b: Disruption Events
2375        let disruption_events = self.phase_disruption_events(&mut stats)?;
2376
2377        // Phase 27: Bi-Temporal Vendor Version Chains
2378        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2379
2380        // Phase 28: Entity Relationship Graph + Cross-Process Links
2381        let (entity_relationship_graph, cross_process_links) =
2382            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2383
2384        // Phase 29: Industry-specific GL accounts
2385        let industry_output = self.phase_industry_data(&mut stats);
2386
2387        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2388        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2389
2390        // Phase 19b: Hypergraph Export (after all data is available)
2391        self.phase_hypergraph_export(
2392            &coa,
2393            &entries,
2394            &document_flows,
2395            &sourcing,
2396            &hr,
2397            &manufacturing_snap,
2398            &banking,
2399            &audit,
2400            &financial_reporting,
2401            &ocpm,
2402            &compliance_regulations,
2403            &mut stats,
2404        )?;
2405
2406        // Phase 10c: Additional graph builders (approval, entity, banking)
2407        // These run after all data is available since they need banking/IC data.
2408        if self.phase_config.generate_graph_export {
2409            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2410        }
2411
2412        // Log informational messages for config sections not yet fully wired
2413        if self.config.streaming.enabled {
2414            info!("Note: streaming config is enabled but batch mode does not use it");
2415        }
2416        if self.config.vendor_network.enabled {
2417            debug!("Vendor network config available; relationship graph generation is partial");
2418        }
2419        if self.config.customer_segmentation.enabled {
2420            debug!("Customer segmentation config available; segment-aware generation is partial");
2421        }
2422
2423        // Log final resource statistics
2424        let resource_stats = self.resource_guard.stats();
2425        info!(
2426            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2427            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2428            resource_stats.disk.estimated_bytes_written,
2429            resource_stats.degradation_level
2430        );
2431
2432        // Flush any remaining stream sink data
2433        if let Some(ref sink) = self.phase_sink {
2434            if let Err(e) = sink.flush() {
2435                warn!("Stream sink flush failed: {e}");
2436            }
2437        }
2438
2439        // Build data lineage graph
2440        let lineage = self.build_lineage_graph();
2441
2442        // Evaluate quality gates if enabled in config
2443        let gate_result = if self.config.quality_gates.enabled {
2444            let profile_name = &self.config.quality_gates.profile;
2445            match datasynth_eval::gates::get_profile(profile_name) {
2446                Some(profile) => {
2447                    // Build an evaluation populated with actual generation metrics.
2448                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2449
2450                    // Populate balance sheet evaluation from balance validation results
2451                    if balance_validation.validated {
2452                        eval.coherence.balance =
2453                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2454                                equation_balanced: balance_validation.is_balanced,
2455                                max_imbalance: (balance_validation.total_debits
2456                                    - balance_validation.total_credits)
2457                                    .abs(),
2458                                periods_evaluated: 1,
2459                                periods_imbalanced: if balance_validation.is_balanced {
2460                                    0
2461                                } else {
2462                                    1
2463                                },
2464                                period_results: Vec::new(),
2465                                companies_evaluated: self.config.companies.len(),
2466                            });
2467                    }
2468
2469                    // Set coherence passes based on balance validation
2470                    eval.coherence.passes = balance_validation.is_balanced;
2471                    if !balance_validation.is_balanced {
2472                        eval.coherence
2473                            .failures
2474                            .push("Balance sheet equation not satisfied".to_string());
2475                    }
2476
2477                    // Set statistical score based on entry count (basic sanity)
2478                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2479                    eval.statistical.passes = !entries.is_empty();
2480
2481                    // Set quality score from data quality stats
2482                    eval.quality.overall_score = 0.9; // Default high for generated data
2483                    eval.quality.passes = true;
2484
2485                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2486                    info!(
2487                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2488                        profile_name, result.gates_passed, result.gates_total, result.summary
2489                    );
2490                    Some(result)
2491                }
2492                None => {
2493                    warn!(
2494                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2495                        profile_name
2496                    );
2497                    None
2498                }
2499            }
2500        } else {
2501            None
2502        };
2503
2504        // Generate internal controls if enabled
2505        let internal_controls = if self.config.internal_controls.enabled {
2506            InternalControl::standard_controls()
2507        } else {
2508            Vec::new()
2509        };
2510
2511        Ok(EnhancedGenerationResult {
2512            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2513            master_data: std::mem::take(&mut self.master_data),
2514            document_flows,
2515            subledger,
2516            ocpm,
2517            audit,
2518            banking,
2519            graph_export,
2520            sourcing,
2521            financial_reporting,
2522            hr,
2523            accounting_standards,
2524            manufacturing: manufacturing_snap,
2525            sales_kpi_budgets,
2526            tax,
2527            esg: esg_snap,
2528            treasury,
2529            project_accounting,
2530            process_evolution,
2531            organizational_events,
2532            disruption_events,
2533            intercompany,
2534            journal_entries: entries,
2535            anomaly_labels,
2536            balance_validation,
2537            data_quality_stats,
2538            quality_issues,
2539            statistics: stats,
2540            lineage: Some(lineage),
2541            gate_result,
2542            internal_controls,
2543            sod_violations,
2544            opening_balances,
2545            subledger_reconciliation,
2546            counterfactual_pairs,
2547            red_flags,
2548            collusion_rings,
2549            temporal_vendor_chains,
2550            entity_relationship_graph,
2551            cross_process_links,
2552            industry_output,
2553            compliance_regulations,
2554        })
2555    }
2556
2557    // ========================================================================
2558    // Generation Phase Methods
2559    // ========================================================================
2560
2561    /// Phase 1: Generate Chart of Accounts and update statistics.
2562    fn phase_chart_of_accounts(
2563        &mut self,
2564        stats: &mut EnhancedGenerationStatistics,
2565    ) -> SynthResult<Arc<ChartOfAccounts>> {
2566        info!("Phase 1: Generating Chart of Accounts");
2567        let coa = self.generate_coa()?;
2568        stats.accounts_count = coa.account_count();
2569        info!(
2570            "Chart of Accounts generated: {} accounts",
2571            stats.accounts_count
2572        );
2573        self.check_resources_with_log("post-coa")?;
2574        Ok(coa)
2575    }
2576
2577    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2578    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2579        if self.phase_config.generate_master_data {
2580            info!("Phase 2: Generating Master Data");
2581            self.generate_master_data()?;
2582            stats.vendor_count = self.master_data.vendors.len();
2583            stats.customer_count = self.master_data.customers.len();
2584            stats.material_count = self.master_data.materials.len();
2585            stats.asset_count = self.master_data.assets.len();
2586            stats.employee_count = self.master_data.employees.len();
2587            info!(
2588                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2589                stats.vendor_count, stats.customer_count, stats.material_count,
2590                stats.asset_count, stats.employee_count
2591            );
2592            self.check_resources_with_log("post-master-data")?;
2593        } else {
2594            debug!("Phase 2: Skipped (master data generation disabled)");
2595        }
2596        Ok(())
2597    }
2598
2599    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2600    fn phase_document_flows(
2601        &mut self,
2602        stats: &mut EnhancedGenerationStatistics,
2603    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2604        let mut document_flows = DocumentFlowSnapshot::default();
2605        let mut subledger = SubledgerSnapshot::default();
2606        // Dunning JEs (interest + charges) accumulated here and merged into the
2607        // main FA-JE list below so they appear in the GL.
2608        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2609
2610        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2611            info!("Phase 3: Generating Document Flows");
2612            self.generate_document_flows(&mut document_flows)?;
2613            stats.p2p_chain_count = document_flows.p2p_chains.len();
2614            stats.o2c_chain_count = document_flows.o2c_chains.len();
2615            info!(
2616                "Document flows generated: {} P2P chains, {} O2C chains",
2617                stats.p2p_chain_count, stats.o2c_chain_count
2618            );
2619
2620            // Phase 3b: Link document flows to subledgers (for data coherence)
2621            debug!("Phase 3b: Linking document flows to subledgers");
2622            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2623            stats.ap_invoice_count = subledger.ap_invoices.len();
2624            stats.ar_invoice_count = subledger.ar_invoices.len();
2625            debug!(
2626                "Subledgers linked: {} AP invoices, {} AR invoices",
2627                stats.ap_invoice_count, stats.ar_invoice_count
2628            );
2629
2630            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
2631            // Without this step the subledger is systematically overstated because
2632            // amount_remaining is set at invoice creation and never reduced by
2633            // the payments that were generated in the document-flow phase.
2634            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2635            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2636            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2637            debug!("Payment settlements applied to AP and AR subledgers");
2638
2639            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
2640            // The as-of date is the last day of the configured period.
2641            if let Ok(start_date) =
2642                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2643            {
2644                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2645                    - chrono::Days::new(1);
2646                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2647                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
2648                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
2649                // derived from JE-level aggregation and will typically differ. This is a known
2650                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
2651                // generated independently. A future reconciliation phase should align them by
2652                // using subledger totals as the authoritative source for BS Receivables.
2653                for company in &self.config.companies {
2654                    let ar_report = ARAgingReport::from_invoices(
2655                        company.code.clone(),
2656                        &subledger.ar_invoices,
2657                        as_of_date,
2658                    );
2659                    subledger.ar_aging_reports.push(ar_report);
2660
2661                    let ap_report = APAgingReport::from_invoices(
2662                        company.code.clone(),
2663                        &subledger.ap_invoices,
2664                        as_of_date,
2665                    );
2666                    subledger.ap_aging_reports.push(ap_report);
2667                }
2668                debug!(
2669                    "AR/AP aging reports built: {} AR, {} AP",
2670                    subledger.ar_aging_reports.len(),
2671                    subledger.ap_aging_reports.len()
2672                );
2673
2674                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
2675                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2676                {
2677                    use datasynth_generators::DunningGenerator;
2678                    let mut dunning_gen = DunningGenerator::new(self.seed + 2000);
2679                    for company in &self.config.companies {
2680                        let currency = company.currency.as_str();
2681                        // Collect mutable references to AR invoices for this company
2682                        // (dunning generator updates dunning_info on invoices in-place).
2683                        let mut company_invoices: Vec<
2684                            datasynth_core::models::subledger::ar::ARInvoice,
2685                        > = subledger
2686                            .ar_invoices
2687                            .iter()
2688                            .filter(|inv| inv.company_code == company.code)
2689                            .cloned()
2690                            .collect();
2691
2692                        if company_invoices.is_empty() {
2693                            continue;
2694                        }
2695
2696                        let result = dunning_gen.execute_dunning_run(
2697                            &company.code,
2698                            as_of_date,
2699                            &mut company_invoices,
2700                            currency,
2701                        );
2702
2703                        // Write back updated dunning info to the main AR invoice list
2704                        for updated in &company_invoices {
2705                            if let Some(orig) = subledger
2706                                .ar_invoices
2707                                .iter_mut()
2708                                .find(|i| i.invoice_number == updated.invoice_number)
2709                            {
2710                                orig.dunning_info = updated.dunning_info.clone();
2711                            }
2712                        }
2713
2714                        subledger.dunning_runs.push(result.dunning_run);
2715                        subledger.dunning_letters.extend(result.letters);
2716                        // Dunning JEs (interest + charges) collected into local buffer.
2717                        dunning_journal_entries.extend(result.journal_entries);
2718                    }
2719                    debug!(
2720                        "Dunning runs complete: {} runs, {} letters",
2721                        subledger.dunning_runs.len(),
2722                        subledger.dunning_letters.len()
2723                    );
2724                }
2725            }
2726
2727            self.check_resources_with_log("post-document-flows")?;
2728        } else {
2729            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2730        }
2731
2732        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2733        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
2734        if !self.master_data.assets.is_empty() {
2735            debug!("Generating FA subledger records");
2736            let company_code = self
2737                .config
2738                .companies
2739                .first()
2740                .map(|c| c.code.as_str())
2741                .unwrap_or("1000");
2742            let currency = self
2743                .config
2744                .companies
2745                .first()
2746                .map(|c| c.currency.as_str())
2747                .unwrap_or("USD");
2748
2749            let mut fa_gen = datasynth_generators::FAGenerator::new(
2750                datasynth_generators::FAGeneratorConfig::default(),
2751                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2752            );
2753
2754            for asset in &self.master_data.assets {
2755                let (record, je) = fa_gen.generate_asset_acquisition(
2756                    company_code,
2757                    &format!("{:?}", asset.asset_class),
2758                    &asset.description,
2759                    asset.acquisition_date,
2760                    currency,
2761                    asset.cost_center.as_deref(),
2762                );
2763                subledger.fa_records.push(record);
2764                fa_journal_entries.push(je);
2765            }
2766
2767            stats.fa_subledger_count = subledger.fa_records.len();
2768            debug!(
2769                "FA subledger records generated: {} (with {} acquisition JEs)",
2770                stats.fa_subledger_count,
2771                fa_journal_entries.len()
2772            );
2773        }
2774
2775        // Generate Inventory subledger records from master data materials
2776        if !self.master_data.materials.is_empty() {
2777            debug!("Generating Inventory subledger records");
2778            let first_company = self.config.companies.first();
2779            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2780            let inv_currency = first_company
2781                .map(|c| c.currency.clone())
2782                .unwrap_or_else(|| "USD".to_string());
2783
2784            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2785                datasynth_generators::InventoryGeneratorConfig::default(),
2786                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2787                inv_currency.clone(),
2788            );
2789
2790            for (i, material) in self.master_data.materials.iter().enumerate() {
2791                let plant = format!("PLANT{:02}", (i % 3) + 1);
2792                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2793                let initial_qty = rust_decimal::Decimal::from(
2794                    material
2795                        .safety_stock
2796                        .to_string()
2797                        .parse::<i64>()
2798                        .unwrap_or(100),
2799                );
2800
2801                let position = inv_gen.generate_position(
2802                    company_code,
2803                    &plant,
2804                    &storage_loc,
2805                    &material.material_id,
2806                    &material.description,
2807                    initial_qty,
2808                    Some(material.standard_cost),
2809                    &inv_currency,
2810                );
2811                subledger.inventory_positions.push(position);
2812            }
2813
2814            stats.inventory_subledger_count = subledger.inventory_positions.len();
2815            debug!(
2816                "Inventory subledger records generated: {}",
2817                stats.inventory_subledger_count
2818            );
2819        }
2820
2821        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
2822        if !subledger.fa_records.is_empty() {
2823            if let Ok(start_date) =
2824                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2825            {
2826                let company_code = self
2827                    .config
2828                    .companies
2829                    .first()
2830                    .map(|c| c.code.as_str())
2831                    .unwrap_or("1000");
2832                let fiscal_year = start_date.year();
2833                let start_period = start_date.month();
2834                let end_period =
2835                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2836
2837                let depr_cfg = FaDepreciationScheduleConfig {
2838                    fiscal_year,
2839                    start_period,
2840                    end_period,
2841                    seed_offset: 800,
2842                };
2843                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2844                let runs = depr_gen.generate(company_code, &subledger.fa_records);
2845                let run_count = runs.len();
2846                subledger.depreciation_runs = runs;
2847                debug!(
2848                    "Depreciation runs generated: {} runs for {} periods",
2849                    run_count, self.config.global.period_months
2850                );
2851            }
2852        }
2853
2854        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
2855        if !subledger.inventory_positions.is_empty() {
2856            if let Ok(start_date) =
2857                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2858            {
2859                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2860                    - chrono::Days::new(1);
2861
2862                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2863                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2864
2865                for company in &self.config.companies {
2866                    let result = inv_val_gen.generate(
2867                        &company.code,
2868                        &subledger.inventory_positions,
2869                        as_of_date,
2870                    );
2871                    subledger.inventory_valuations.push(result);
2872                }
2873                debug!(
2874                    "Inventory valuations generated: {} company reports",
2875                    subledger.inventory_valuations.len()
2876                );
2877            }
2878        }
2879
2880        Ok((document_flows, subledger, fa_journal_entries))
2881    }
2882
2883    /// Phase 3c: Generate OCPM events from document flows.
2884    #[allow(clippy::too_many_arguments)]
2885    fn phase_ocpm_events(
2886        &mut self,
2887        document_flows: &DocumentFlowSnapshot,
2888        sourcing: &SourcingSnapshot,
2889        hr: &HrSnapshot,
2890        manufacturing: &ManufacturingSnapshot,
2891        banking: &BankingSnapshot,
2892        audit: &AuditSnapshot,
2893        financial_reporting: &FinancialReportingSnapshot,
2894        stats: &mut EnhancedGenerationStatistics,
2895    ) -> SynthResult<OcpmSnapshot> {
2896        let degradation = self.check_resources()?;
2897        if degradation >= DegradationLevel::Reduced {
2898            debug!(
2899                "Phase skipped due to resource pressure (degradation: {:?})",
2900                degradation
2901            );
2902            return Ok(OcpmSnapshot::default());
2903        }
2904        if self.phase_config.generate_ocpm_events {
2905            info!("Phase 3c: Generating OCPM Events");
2906            let ocpm_snapshot = self.generate_ocpm_events(
2907                document_flows,
2908                sourcing,
2909                hr,
2910                manufacturing,
2911                banking,
2912                audit,
2913                financial_reporting,
2914            )?;
2915            stats.ocpm_event_count = ocpm_snapshot.event_count;
2916            stats.ocpm_object_count = ocpm_snapshot.object_count;
2917            stats.ocpm_case_count = ocpm_snapshot.case_count;
2918            info!(
2919                "OCPM events generated: {} events, {} objects, {} cases",
2920                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2921            );
2922            self.check_resources_with_log("post-ocpm")?;
2923            Ok(ocpm_snapshot)
2924        } else {
2925            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2926            Ok(OcpmSnapshot::default())
2927        }
2928    }
2929
2930    /// Phase 4: Generate journal entries from document flows and standalone generation.
2931    fn phase_journal_entries(
2932        &mut self,
2933        coa: &Arc<ChartOfAccounts>,
2934        document_flows: &DocumentFlowSnapshot,
2935        _stats: &mut EnhancedGenerationStatistics,
2936    ) -> SynthResult<Vec<JournalEntry>> {
2937        let mut entries = Vec::new();
2938
2939        // Phase 4a: Generate JEs from document flows (for data coherence)
2940        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2941            debug!("Phase 4a: Generating JEs from document flows");
2942            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2943            debug!("Generated {} JEs from document flows", flow_entries.len());
2944            entries.extend(flow_entries);
2945        }
2946
2947        // Phase 4b: Generate standalone journal entries
2948        if self.phase_config.generate_journal_entries {
2949            info!("Phase 4: Generating Journal Entries");
2950            let je_entries = self.generate_journal_entries(coa)?;
2951            info!("Generated {} standalone journal entries", je_entries.len());
2952            entries.extend(je_entries);
2953        } else {
2954            debug!("Phase 4: Skipped (journal entry generation disabled)");
2955        }
2956
2957        if !entries.is_empty() {
2958            // Note: stats.total_entries/total_line_items are set in generate()
2959            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2960            self.check_resources_with_log("post-journal-entries")?;
2961        }
2962
2963        Ok(entries)
2964    }
2965
2966    /// Phase 5: Inject anomalies into journal entries.
2967    fn phase_anomaly_injection(
2968        &mut self,
2969        entries: &mut [JournalEntry],
2970        actions: &DegradationActions,
2971        stats: &mut EnhancedGenerationStatistics,
2972    ) -> SynthResult<AnomalyLabels> {
2973        if self.phase_config.inject_anomalies
2974            && !entries.is_empty()
2975            && !actions.skip_anomaly_injection
2976        {
2977            info!("Phase 5: Injecting Anomalies");
2978            let result = self.inject_anomalies(entries)?;
2979            stats.anomalies_injected = result.labels.len();
2980            info!("Injected {} anomalies", stats.anomalies_injected);
2981            self.check_resources_with_log("post-anomaly-injection")?;
2982            Ok(result)
2983        } else if actions.skip_anomaly_injection {
2984            warn!("Phase 5: Skipped due to resource degradation");
2985            Ok(AnomalyLabels::default())
2986        } else {
2987            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2988            Ok(AnomalyLabels::default())
2989        }
2990    }
2991
2992    /// Phase 6: Validate balance sheet equation on journal entries.
2993    fn phase_balance_validation(
2994        &mut self,
2995        entries: &[JournalEntry],
2996    ) -> SynthResult<BalanceValidationResult> {
2997        if self.phase_config.validate_balances && !entries.is_empty() {
2998            debug!("Phase 6: Validating Balances");
2999            let balance_validation = self.validate_journal_entries(entries)?;
3000            if balance_validation.is_balanced {
3001                debug!("Balance validation passed");
3002            } else {
3003                warn!(
3004                    "Balance validation found {} errors",
3005                    balance_validation.validation_errors.len()
3006                );
3007            }
3008            Ok(balance_validation)
3009        } else {
3010            Ok(BalanceValidationResult::default())
3011        }
3012    }
3013
3014    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3015    fn phase_data_quality_injection(
3016        &mut self,
3017        entries: &mut [JournalEntry],
3018        actions: &DegradationActions,
3019        stats: &mut EnhancedGenerationStatistics,
3020    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3021        if self.phase_config.inject_data_quality
3022            && !entries.is_empty()
3023            && !actions.skip_data_quality
3024        {
3025            info!("Phase 7: Injecting Data Quality Variations");
3026            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3027            stats.data_quality_issues = dq_stats.records_with_issues;
3028            info!("Injected {} data quality issues", stats.data_quality_issues);
3029            self.check_resources_with_log("post-data-quality")?;
3030            Ok((dq_stats, quality_issues))
3031        } else if actions.skip_data_quality {
3032            warn!("Phase 7: Skipped due to resource degradation");
3033            Ok((DataQualityStats::default(), Vec::new()))
3034        } else {
3035            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3036            Ok((DataQualityStats::default(), Vec::new()))
3037        }
3038    }
3039
3040    /// Phase 10b: Generate period-close journal entries.
3041    ///
3042    /// Generates:
3043    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3044    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3045    ///    for the configured period.
3046    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3047    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3048    ///    earnings via the Income Summary (3600) clearing account.
3049    fn phase_period_close(
3050        &mut self,
3051        entries: &mut Vec<JournalEntry>,
3052        subledger: &SubledgerSnapshot,
3053        stats: &mut EnhancedGenerationStatistics,
3054    ) -> SynthResult<()> {
3055        if !self.phase_config.generate_period_close || entries.is_empty() {
3056            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3057            return Ok(());
3058        }
3059
3060        info!("Phase 10b: Generating period-close journal entries");
3061
3062        use datasynth_core::accounts::{
3063            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3064        };
3065        use rust_decimal::Decimal;
3066
3067        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3068            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3069        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3070        // Posting date for close entries is the last day of the period
3071        let close_date = end_date - chrono::Days::new(1);
3072
3073        // Statutory tax rate (21% — configurable rates come in later tiers)
3074        let tax_rate = Decimal::new(21, 2); // 0.21
3075
3076        // Collect company codes from config
3077        let company_codes: Vec<String> = self
3078            .config
3079            .companies
3080            .iter()
3081            .map(|c| c.code.clone())
3082            .collect();
3083
3084        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3085        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3086        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3087
3088        // --- Depreciation JEs (per asset) ---
3089        // Compute period depreciation for each active fixed asset using straight-line method.
3090        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3091        let period_months = self.config.global.period_months;
3092        for asset in &subledger.fa_records {
3093            // Skip assets that are inactive / fully depreciated / non-depreciable
3094            use datasynth_core::models::subledger::fa::AssetStatus;
3095            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3096                continue;
3097            }
3098            let useful_life_months = asset.useful_life_months();
3099            if useful_life_months == 0 {
3100                // Land or CIP — not depreciated
3101                continue;
3102            }
3103            let salvage_value = asset.salvage_value();
3104            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3105            if depreciable_base == Decimal::ZERO {
3106                continue;
3107            }
3108            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3109                * Decimal::from(period_months))
3110            .round_dp(2);
3111            if period_depr <= Decimal::ZERO {
3112                continue;
3113            }
3114
3115            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3116            depr_header.document_type = "CL".to_string();
3117            depr_header.header_text = Some(format!(
3118                "Depreciation - {} {}",
3119                asset.asset_number, asset.description
3120            ));
3121            depr_header.created_by = "CLOSE_ENGINE".to_string();
3122            depr_header.source = TransactionSource::Automated;
3123            depr_header.business_process = Some(BusinessProcess::R2R);
3124
3125            let doc_id = depr_header.document_id;
3126            let mut depr_je = JournalEntry::new(depr_header);
3127
3128            // DR Depreciation Expense (6000)
3129            depr_je.add_line(JournalEntryLine::debit(
3130                doc_id,
3131                1,
3132                expense_accounts::DEPRECIATION.to_string(),
3133                period_depr,
3134            ));
3135            // CR Accumulated Depreciation (1510)
3136            depr_je.add_line(JournalEntryLine::credit(
3137                doc_id,
3138                2,
3139                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3140                period_depr,
3141            ));
3142
3143            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3144            close_jes.push(depr_je);
3145        }
3146
3147        if !subledger.fa_records.is_empty() {
3148            debug!(
3149                "Generated {} depreciation JEs from {} FA records",
3150                close_jes.len(),
3151                subledger.fa_records.len()
3152            );
3153        }
3154
3155        // --- Accrual entries (standard period-end accruals per company) ---
3156        // Generate standard accrued expense entries (utilities, rent, interest) using
3157        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3158        {
3159            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3160            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3161
3162            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3163            let accrual_items: &[(&str, &str, &str)] = &[
3164                ("Accrued Utilities", "6200", "2100"),
3165                ("Accrued Rent", "6300", "2100"),
3166                ("Accrued Interest", "6100", "2150"),
3167            ];
3168
3169            for company_code in &company_codes {
3170                // Estimate company revenue from existing JEs
3171                let company_revenue: Decimal = entries
3172                    .iter()
3173                    .filter(|e| e.header.company_code == *company_code)
3174                    .flat_map(|e| e.lines.iter())
3175                    .filter(|l| l.gl_account.starts_with('4'))
3176                    .map(|l| l.credit_amount - l.debit_amount)
3177                    .fold(Decimal::ZERO, |acc, v| acc + v);
3178
3179                if company_revenue <= Decimal::ZERO {
3180                    continue;
3181                }
3182
3183                // Use 0.5% of period revenue per accrual item as a proxy
3184                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3185                if accrual_base <= Decimal::ZERO {
3186                    continue;
3187                }
3188
3189                for (description, expense_acct, liability_acct) in accrual_items {
3190                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3191                        company_code,
3192                        description,
3193                        accrual_base,
3194                        expense_acct,
3195                        liability_acct,
3196                        close_date,
3197                        None,
3198                    );
3199                    close_jes.push(accrual_je);
3200                    if let Some(rev_je) = reversal_je {
3201                        close_jes.push(rev_je);
3202                    }
3203                }
3204            }
3205
3206            debug!(
3207                "Generated accrual entries for {} companies",
3208                company_codes.len()
3209            );
3210        }
3211
3212        for company_code in &company_codes {
3213            // Calculate net income for this company from existing JEs:
3214            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3215            // Revenue (4xxx): credit-normal, so net = credits - debits
3216            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3217            let mut total_revenue = Decimal::ZERO;
3218            let mut total_expenses = Decimal::ZERO;
3219
3220            for entry in entries.iter() {
3221                if entry.header.company_code != *company_code {
3222                    continue;
3223                }
3224                for line in &entry.lines {
3225                    let category = AccountCategory::from_account(&line.gl_account);
3226                    match category {
3227                        AccountCategory::Revenue => {
3228                            // Revenue is credit-normal: net revenue = credits - debits
3229                            total_revenue += line.credit_amount - line.debit_amount;
3230                        }
3231                        AccountCategory::Cogs
3232                        | AccountCategory::OperatingExpense
3233                        | AccountCategory::OtherIncomeExpense
3234                        | AccountCategory::Tax => {
3235                            // Expenses are debit-normal: net expense = debits - credits
3236                            total_expenses += line.debit_amount - line.credit_amount;
3237                        }
3238                        _ => {}
3239                    }
3240                }
3241            }
3242
3243            let pre_tax_income = total_revenue - total_expenses;
3244
3245            // Skip if no income statement activity
3246            if pre_tax_income == Decimal::ZERO {
3247                debug!(
3248                    "Company {}: no pre-tax income, skipping period close",
3249                    company_code
3250                );
3251                continue;
3252            }
3253
3254            // --- Tax provision / DTA JE ---
3255            if pre_tax_income > Decimal::ZERO {
3256                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3257                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3258
3259                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3260                tax_header.document_type = "CL".to_string();
3261                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3262                tax_header.created_by = "CLOSE_ENGINE".to_string();
3263                tax_header.source = TransactionSource::Automated;
3264                tax_header.business_process = Some(BusinessProcess::R2R);
3265
3266                let doc_id = tax_header.document_id;
3267                let mut tax_je = JournalEntry::new(tax_header);
3268
3269                // DR Tax Expense (8000)
3270                tax_je.add_line(JournalEntryLine::debit(
3271                    doc_id,
3272                    1,
3273                    tax_accounts::TAX_EXPENSE.to_string(),
3274                    tax_amount,
3275                ));
3276                // CR Income Tax Payable (2130)
3277                tax_je.add_line(JournalEntryLine::credit(
3278                    doc_id,
3279                    2,
3280                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3281                    tax_amount,
3282                ));
3283
3284                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3285                close_jes.push(tax_je);
3286            } else {
3287                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3288                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3289                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3290                if dta_amount > Decimal::ZERO {
3291                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3292                    dta_header.document_type = "CL".to_string();
3293                    dta_header.header_text =
3294                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3295                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3296                    dta_header.source = TransactionSource::Automated;
3297                    dta_header.business_process = Some(BusinessProcess::R2R);
3298
3299                    let doc_id = dta_header.document_id;
3300                    let mut dta_je = JournalEntry::new(dta_header);
3301
3302                    // DR Deferred Tax Asset (1600)
3303                    dta_je.add_line(JournalEntryLine::debit(
3304                        doc_id,
3305                        1,
3306                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3307                        dta_amount,
3308                    ));
3309                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3310                    // reflecting the benefit of the future deductible temporary difference.
3311                    dta_je.add_line(JournalEntryLine::credit(
3312                        doc_id,
3313                        2,
3314                        tax_accounts::TAX_EXPENSE.to_string(),
3315                        dta_amount,
3316                    ));
3317
3318                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3319                    close_jes.push(dta_je);
3320                    debug!(
3321                        "Company {}: loss year — recognised DTA of {}",
3322                        company_code, dta_amount
3323                    );
3324                }
3325            }
3326
3327            // --- Income statement closing JE ---
3328            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3329            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3330            // close the pre-tax loss into Retained Earnings as-is.
3331            let tax_provision = if pre_tax_income > Decimal::ZERO {
3332                (pre_tax_income * tax_rate).round_dp(2)
3333            } else {
3334                Decimal::ZERO
3335            };
3336            let net_income = pre_tax_income - tax_provision;
3337
3338            if net_income != Decimal::ZERO {
3339                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3340                close_header.document_type = "CL".to_string();
3341                close_header.header_text =
3342                    Some(format!("Income statement close - {}", company_code));
3343                close_header.created_by = "CLOSE_ENGINE".to_string();
3344                close_header.source = TransactionSource::Automated;
3345                close_header.business_process = Some(BusinessProcess::R2R);
3346
3347                let doc_id = close_header.document_id;
3348                let mut close_je = JournalEntry::new(close_header);
3349
3350                let abs_net_income = net_income.abs();
3351
3352                if net_income > Decimal::ZERO {
3353                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3354                    close_je.add_line(JournalEntryLine::debit(
3355                        doc_id,
3356                        1,
3357                        equity_accounts::INCOME_SUMMARY.to_string(),
3358                        abs_net_income,
3359                    ));
3360                    close_je.add_line(JournalEntryLine::credit(
3361                        doc_id,
3362                        2,
3363                        equity_accounts::RETAINED_EARNINGS.to_string(),
3364                        abs_net_income,
3365                    ));
3366                } else {
3367                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3368                    close_je.add_line(JournalEntryLine::debit(
3369                        doc_id,
3370                        1,
3371                        equity_accounts::RETAINED_EARNINGS.to_string(),
3372                        abs_net_income,
3373                    ));
3374                    close_je.add_line(JournalEntryLine::credit(
3375                        doc_id,
3376                        2,
3377                        equity_accounts::INCOME_SUMMARY.to_string(),
3378                        abs_net_income,
3379                    ));
3380                }
3381
3382                debug_assert!(
3383                    close_je.is_balanced(),
3384                    "Income statement closing JE must be balanced"
3385                );
3386                close_jes.push(close_je);
3387            }
3388        }
3389
3390        let close_count = close_jes.len();
3391        if close_count > 0 {
3392            info!("Generated {} period-close journal entries", close_count);
3393            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3394            entries.extend(close_jes);
3395            stats.period_close_je_count = close_count;
3396
3397            // Update total entry/line-item stats
3398            stats.total_entries = entries.len() as u64;
3399            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3400        } else {
3401            debug!("No period-close entries generated (no income statement activity)");
3402        }
3403
3404        Ok(())
3405    }
3406
3407    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3408    fn phase_audit_data(
3409        &mut self,
3410        entries: &[JournalEntry],
3411        stats: &mut EnhancedGenerationStatistics,
3412    ) -> SynthResult<AuditSnapshot> {
3413        if self.phase_config.generate_audit {
3414            info!("Phase 8: Generating Audit Data");
3415            let audit_snapshot = self.generate_audit_data(entries)?;
3416            stats.audit_engagement_count = audit_snapshot.engagements.len();
3417            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3418            stats.audit_evidence_count = audit_snapshot.evidence.len();
3419            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3420            stats.audit_finding_count = audit_snapshot.findings.len();
3421            stats.audit_judgment_count = audit_snapshot.judgments.len();
3422            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3423            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3424            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3425            stats.audit_sample_count = audit_snapshot.samples.len();
3426            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3427            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3428            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3429            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3430            stats.audit_related_party_transaction_count =
3431                audit_snapshot.related_party_transactions.len();
3432            info!(
3433                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3434                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3435                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3436                 {} RP transactions",
3437                stats.audit_engagement_count,
3438                stats.audit_workpaper_count,
3439                stats.audit_evidence_count,
3440                stats.audit_risk_count,
3441                stats.audit_finding_count,
3442                stats.audit_judgment_count,
3443                stats.audit_confirmation_count,
3444                stats.audit_procedure_step_count,
3445                stats.audit_sample_count,
3446                stats.audit_analytical_result_count,
3447                stats.audit_ia_function_count,
3448                stats.audit_ia_report_count,
3449                stats.audit_related_party_count,
3450                stats.audit_related_party_transaction_count,
3451            );
3452            self.check_resources_with_log("post-audit")?;
3453            Ok(audit_snapshot)
3454        } else {
3455            debug!("Phase 8: Skipped (audit generation disabled)");
3456            Ok(AuditSnapshot::default())
3457        }
3458    }
3459
3460    /// Phase 9: Generate banking KYC/AML data.
3461    fn phase_banking_data(
3462        &mut self,
3463        stats: &mut EnhancedGenerationStatistics,
3464    ) -> SynthResult<BankingSnapshot> {
3465        if self.phase_config.generate_banking {
3466            info!("Phase 9: Generating Banking KYC/AML Data");
3467            let banking_snapshot = self.generate_banking_data()?;
3468            stats.banking_customer_count = banking_snapshot.customers.len();
3469            stats.banking_account_count = banking_snapshot.accounts.len();
3470            stats.banking_transaction_count = banking_snapshot.transactions.len();
3471            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3472            info!(
3473                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3474                stats.banking_customer_count, stats.banking_account_count,
3475                stats.banking_transaction_count, stats.banking_suspicious_count
3476            );
3477            self.check_resources_with_log("post-banking")?;
3478            Ok(banking_snapshot)
3479        } else {
3480            debug!("Phase 9: Skipped (banking generation disabled)");
3481            Ok(BankingSnapshot::default())
3482        }
3483    }
3484
3485    /// Phase 10: Export accounting network graphs for ML training.
3486    fn phase_graph_export(
3487        &mut self,
3488        entries: &[JournalEntry],
3489        coa: &Arc<ChartOfAccounts>,
3490        stats: &mut EnhancedGenerationStatistics,
3491    ) -> SynthResult<GraphExportSnapshot> {
3492        if self.phase_config.generate_graph_export && !entries.is_empty() {
3493            info!("Phase 10: Exporting Accounting Network Graphs");
3494            match self.export_graphs(entries, coa, stats) {
3495                Ok(snapshot) => {
3496                    info!(
3497                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3498                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3499                    );
3500                    Ok(snapshot)
3501                }
3502                Err(e) => {
3503                    warn!("Phase 10: Graph export failed: {}", e);
3504                    Ok(GraphExportSnapshot::default())
3505                }
3506            }
3507        } else {
3508            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3509            Ok(GraphExportSnapshot::default())
3510        }
3511    }
3512
3513    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3514    #[allow(clippy::too_many_arguments)]
3515    fn phase_hypergraph_export(
3516        &self,
3517        coa: &Arc<ChartOfAccounts>,
3518        entries: &[JournalEntry],
3519        document_flows: &DocumentFlowSnapshot,
3520        sourcing: &SourcingSnapshot,
3521        hr: &HrSnapshot,
3522        manufacturing: &ManufacturingSnapshot,
3523        banking: &BankingSnapshot,
3524        audit: &AuditSnapshot,
3525        financial_reporting: &FinancialReportingSnapshot,
3526        ocpm: &OcpmSnapshot,
3527        compliance: &ComplianceRegulationsSnapshot,
3528        stats: &mut EnhancedGenerationStatistics,
3529    ) -> SynthResult<()> {
3530        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3531            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3532            match self.export_hypergraph(
3533                coa,
3534                entries,
3535                document_flows,
3536                sourcing,
3537                hr,
3538                manufacturing,
3539                banking,
3540                audit,
3541                financial_reporting,
3542                ocpm,
3543                compliance,
3544                stats,
3545            ) {
3546                Ok(info) => {
3547                    info!(
3548                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3549                        info.node_count, info.edge_count, info.hyperedge_count
3550                    );
3551                }
3552                Err(e) => {
3553                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3554                }
3555            }
3556        } else {
3557            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3558        }
3559        Ok(())
3560    }
3561
3562    /// Phase 11: LLM Enrichment.
3563    ///
3564    /// Uses an LLM provider (mock by default) to enrich vendor names with
3565    /// realistic, context-aware names. This phase is non-blocking: failures
3566    /// log a warning but do not stop the generation pipeline.
3567    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3568        if !self.config.llm.enabled {
3569            debug!("Phase 11: Skipped (LLM enrichment disabled)");
3570            return;
3571        }
3572
3573        info!("Phase 11: Starting LLM Enrichment");
3574        let start = std::time::Instant::now();
3575
3576        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3577            // Select provider: use HttpLlmProvider when a non-mock provider is configured
3578            // and the corresponding API key environment variable is present.
3579            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3580                let schema_provider = &self.config.llm.provider;
3581                let api_key_env = match schema_provider.as_str() {
3582                    "openai" => Some("OPENAI_API_KEY"),
3583                    "anthropic" => Some("ANTHROPIC_API_KEY"),
3584                    "custom" => Some("LLM_API_KEY"),
3585                    _ => None,
3586                };
3587                if let Some(key_env) = api_key_env {
3588                    if std::env::var(key_env).is_ok() {
3589                        let llm_config = datasynth_core::llm::LlmConfig {
3590                            model: self.config.llm.model.clone(),
3591                            api_key_env: key_env.to_string(),
3592                            ..datasynth_core::llm::LlmConfig::default()
3593                        };
3594                        match HttpLlmProvider::new(llm_config) {
3595                            Ok(p) => Arc::new(p),
3596                            Err(e) => {
3597                                warn!(
3598                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
3599                                    e
3600                                );
3601                                Arc::new(MockLlmProvider::new(self.seed))
3602                            }
3603                        }
3604                    } else {
3605                        Arc::new(MockLlmProvider::new(self.seed))
3606                    }
3607                } else {
3608                    Arc::new(MockLlmProvider::new(self.seed))
3609                }
3610            };
3611            let enricher = VendorLlmEnricher::new(provider);
3612
3613            let industry = format!("{:?}", self.config.global.industry);
3614            let max_enrichments = self
3615                .config
3616                .llm
3617                .max_vendor_enrichments
3618                .min(self.master_data.vendors.len());
3619
3620            let mut enriched_count = 0usize;
3621            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3622                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3623                    Ok(name) => {
3624                        vendor.name = name;
3625                        enriched_count += 1;
3626                    }
3627                    Err(e) => {
3628                        warn!(
3629                            "LLM vendor enrichment failed for {}: {}",
3630                            vendor.vendor_id, e
3631                        );
3632                    }
3633                }
3634            }
3635
3636            enriched_count
3637        }));
3638
3639        match result {
3640            Ok(enriched_count) => {
3641                stats.llm_vendors_enriched = enriched_count;
3642                let elapsed = start.elapsed();
3643                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3644                info!(
3645                    "Phase 11 complete: {} vendors enriched in {}ms",
3646                    enriched_count, stats.llm_enrichment_ms
3647                );
3648            }
3649            Err(_) => {
3650                let elapsed = start.elapsed();
3651                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3652                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3653            }
3654        }
3655    }
3656
3657    /// Phase 12: Diffusion Enhancement.
3658    ///
3659    /// Generates a sample set using the statistical diffusion backend to
3660    /// demonstrate distribution-matching data generation. This phase is
3661    /// non-blocking: failures log a warning but do not stop the pipeline.
3662    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3663        if !self.config.diffusion.enabled {
3664            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3665            return;
3666        }
3667
3668        info!("Phase 12: Starting Diffusion Enhancement");
3669        let start = std::time::Instant::now();
3670
3671        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3672            // Target distribution: transaction amounts (log-normal-like)
3673            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
3674            let stds = vec![2000.0, 1.5, 1.0];
3675
3676            let diffusion_config = DiffusionConfig {
3677                n_steps: self.config.diffusion.n_steps,
3678                seed: self.seed,
3679                ..Default::default()
3680            };
3681
3682            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3683
3684            let n_samples = self.config.diffusion.sample_size;
3685            let n_features = 3; // amount, line_items, approval_level
3686            let samples = backend.generate(n_samples, n_features, self.seed);
3687
3688            samples.len()
3689        }));
3690
3691        match result {
3692            Ok(sample_count) => {
3693                stats.diffusion_samples_generated = sample_count;
3694                let elapsed = start.elapsed();
3695                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3696                info!(
3697                    "Phase 12 complete: {} diffusion samples generated in {}ms",
3698                    sample_count, stats.diffusion_enhancement_ms
3699                );
3700            }
3701            Err(_) => {
3702                let elapsed = start.elapsed();
3703                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3704                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3705            }
3706        }
3707    }
3708
3709    /// Phase 13: Causal Overlay.
3710    ///
3711    /// Builds a structural causal model from a built-in template (e.g.,
3712    /// fraud_detection) and generates causal samples. Optionally validates
3713    /// that the output respects the causal structure. This phase is
3714    /// non-blocking: failures log a warning but do not stop the pipeline.
3715    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3716        if !self.config.causal.enabled {
3717            debug!("Phase 13: Skipped (causal generation disabled)");
3718            return;
3719        }
3720
3721        info!("Phase 13: Starting Causal Overlay");
3722        let start = std::time::Instant::now();
3723
3724        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3725            // Select template based on config
3726            let graph = match self.config.causal.template.as_str() {
3727                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3728                _ => CausalGraph::fraud_detection_template(),
3729            };
3730
3731            let scm = StructuralCausalModel::new(graph.clone())
3732                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3733
3734            let n_samples = self.config.causal.sample_size;
3735            let samples = scm
3736                .generate(n_samples, self.seed)
3737                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3738
3739            // Optionally validate causal structure
3740            let validation_passed = if self.config.causal.validate {
3741                let report = CausalValidator::validate_causal_structure(&samples, &graph);
3742                if report.valid {
3743                    info!(
3744                        "Causal validation passed: all {} checks OK",
3745                        report.checks.len()
3746                    );
3747                } else {
3748                    warn!(
3749                        "Causal validation: {} violations detected: {:?}",
3750                        report.violations.len(),
3751                        report.violations
3752                    );
3753                }
3754                Some(report.valid)
3755            } else {
3756                None
3757            };
3758
3759            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3760        }));
3761
3762        match result {
3763            Ok(Ok((sample_count, validation_passed))) => {
3764                stats.causal_samples_generated = sample_count;
3765                stats.causal_validation_passed = validation_passed;
3766                let elapsed = start.elapsed();
3767                stats.causal_generation_ms = elapsed.as_millis() as u64;
3768                info!(
3769                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3770                    sample_count, stats.causal_generation_ms, validation_passed,
3771                );
3772            }
3773            Ok(Err(e)) => {
3774                let elapsed = start.elapsed();
3775                stats.causal_generation_ms = elapsed.as_millis() as u64;
3776                warn!("Phase 13: Causal generation failed: {}", e);
3777            }
3778            Err(_) => {
3779                let elapsed = start.elapsed();
3780                stats.causal_generation_ms = elapsed.as_millis() as u64;
3781                warn!("Phase 13: Causal generation failed (panic caught), continuing");
3782            }
3783        }
3784    }
3785
3786    /// Phase 14: Generate S2C sourcing data.
3787    fn phase_sourcing_data(
3788        &mut self,
3789        stats: &mut EnhancedGenerationStatistics,
3790    ) -> SynthResult<SourcingSnapshot> {
3791        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3792            debug!("Phase 14: Skipped (sourcing generation disabled)");
3793            return Ok(SourcingSnapshot::default());
3794        }
3795        let degradation = self.check_resources()?;
3796        if degradation >= DegradationLevel::Reduced {
3797            debug!(
3798                "Phase skipped due to resource pressure (degradation: {:?})",
3799                degradation
3800            );
3801            return Ok(SourcingSnapshot::default());
3802        }
3803
3804        info!("Phase 14: Generating S2C Sourcing Data");
3805        let seed = self.seed;
3806
3807        // Gather vendor data from master data
3808        let vendor_ids: Vec<String> = self
3809            .master_data
3810            .vendors
3811            .iter()
3812            .map(|v| v.vendor_id.clone())
3813            .collect();
3814        if vendor_ids.is_empty() {
3815            debug!("Phase 14: Skipped (no vendors available)");
3816            return Ok(SourcingSnapshot::default());
3817        }
3818
3819        let categories: Vec<(String, String)> = vec![
3820            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3821            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3822            ("CAT-IT".to_string(), "IT Equipment".to_string()),
3823            ("CAT-SVC".to_string(), "Professional Services".to_string()),
3824            ("CAT-LOG".to_string(), "Logistics".to_string()),
3825        ];
3826        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3827            .iter()
3828            .map(|(id, name)| {
3829                (
3830                    id.clone(),
3831                    name.clone(),
3832                    rust_decimal::Decimal::from(100_000),
3833                )
3834            })
3835            .collect();
3836
3837        let company_code = self
3838            .config
3839            .companies
3840            .first()
3841            .map(|c| c.code.as_str())
3842            .unwrap_or("1000");
3843        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3844            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3845        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3846        let fiscal_year = start_date.year() as u16;
3847        let owner_ids: Vec<String> = self
3848            .master_data
3849            .employees
3850            .iter()
3851            .take(5)
3852            .map(|e| e.employee_id.clone())
3853            .collect();
3854        let owner_id = owner_ids
3855            .first()
3856            .map(std::string::String::as_str)
3857            .unwrap_or("BUYER-001");
3858
3859        // Step 1: Spend Analysis
3860        let mut spend_gen = SpendAnalysisGenerator::new(seed);
3861        let spend_analyses =
3862            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3863
3864        // Step 2: Sourcing Projects
3865        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3866        let sourcing_projects = if owner_ids.is_empty() {
3867            Vec::new()
3868        } else {
3869            project_gen.generate(
3870                company_code,
3871                &categories_with_spend,
3872                &owner_ids,
3873                start_date,
3874                self.config.global.period_months,
3875            )
3876        };
3877        stats.sourcing_project_count = sourcing_projects.len();
3878
3879        // Step 3: Qualifications
3880        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3881        let mut qual_gen = QualificationGenerator::new(seed + 2);
3882        let qualifications = qual_gen.generate(
3883            company_code,
3884            &qual_vendor_ids,
3885            sourcing_projects.first().map(|p| p.project_id.as_str()),
3886            owner_id,
3887            start_date,
3888        );
3889
3890        // Step 4: RFx Events
3891        let mut rfx_gen = RfxGenerator::new(seed + 3);
3892        let rfx_events: Vec<RfxEvent> = sourcing_projects
3893            .iter()
3894            .map(|proj| {
3895                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3896                rfx_gen.generate(
3897                    company_code,
3898                    &proj.project_id,
3899                    &proj.category_id,
3900                    &qualified_vids,
3901                    owner_id,
3902                    start_date,
3903                    50000.0,
3904                )
3905            })
3906            .collect();
3907        stats.rfx_event_count = rfx_events.len();
3908
3909        // Step 5: Bids
3910        let mut bid_gen = BidGenerator::new(seed + 4);
3911        let mut all_bids = Vec::new();
3912        for rfx in &rfx_events {
3913            let bidder_count = vendor_ids.len().clamp(2, 5);
3914            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3915            let bids = bid_gen.generate(rfx, &responding, start_date);
3916            all_bids.extend(bids);
3917        }
3918        stats.bid_count = all_bids.len();
3919
3920        // Step 6: Bid Evaluations
3921        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3922        let bid_evaluations: Vec<BidEvaluation> = rfx_events
3923            .iter()
3924            .map(|rfx| {
3925                let rfx_bids: Vec<SupplierBid> = all_bids
3926                    .iter()
3927                    .filter(|b| b.rfx_id == rfx.rfx_id)
3928                    .cloned()
3929                    .collect();
3930                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3931            })
3932            .collect();
3933
3934        // Step 7: Contracts from winning bids
3935        let mut contract_gen = ContractGenerator::new(seed + 6);
3936        let contracts: Vec<ProcurementContract> = bid_evaluations
3937            .iter()
3938            .zip(rfx_events.iter())
3939            .filter_map(|(eval, rfx)| {
3940                eval.ranked_bids.first().and_then(|winner| {
3941                    all_bids
3942                        .iter()
3943                        .find(|b| b.bid_id == winner.bid_id)
3944                        .map(|winning_bid| {
3945                            contract_gen.generate_from_bid(
3946                                winning_bid,
3947                                Some(&rfx.sourcing_project_id),
3948                                &rfx.category_id,
3949                                owner_id,
3950                                start_date,
3951                            )
3952                        })
3953                })
3954            })
3955            .collect();
3956        stats.contract_count = contracts.len();
3957
3958        // Step 8: Catalog Items
3959        let mut catalog_gen = CatalogGenerator::new(seed + 7);
3960        let catalog_items = catalog_gen.generate(&contracts);
3961        stats.catalog_item_count = catalog_items.len();
3962
3963        // Step 9: Scorecards
3964        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3965        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3966            .iter()
3967            .fold(
3968                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3969                |mut acc, c| {
3970                    acc.entry(c.vendor_id.clone()).or_default().push(c);
3971                    acc
3972                },
3973            )
3974            .into_iter()
3975            .collect();
3976        let scorecards = scorecard_gen.generate(
3977            company_code,
3978            &vendor_contracts,
3979            start_date,
3980            end_date,
3981            owner_id,
3982        );
3983        stats.scorecard_count = scorecards.len();
3984
3985        // Back-populate cross-references on sourcing projects (Task 35)
3986        // Link each project to its RFx events, contracts, and spend analyses
3987        let mut sourcing_projects = sourcing_projects;
3988        for project in &mut sourcing_projects {
3989            // Link RFx events generated for this project
3990            project.rfx_ids = rfx_events
3991                .iter()
3992                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3993                .map(|rfx| rfx.rfx_id.clone())
3994                .collect();
3995
3996            // Link contract awarded from this project's RFx
3997            project.contract_id = contracts
3998                .iter()
3999                .find(|c| {
4000                    c.sourcing_project_id
4001                        .as_deref()
4002                        .is_some_and(|sp| sp == project.project_id)
4003                })
4004                .map(|c| c.contract_id.clone());
4005
4006            // Link spend analysis for matching category (use category_id as the reference)
4007            project.spend_analysis_id = spend_analyses
4008                .iter()
4009                .find(|sa| sa.category_id == project.category_id)
4010                .map(|sa| sa.category_id.clone());
4011        }
4012
4013        info!(
4014            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4015            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4016            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4017        );
4018        self.check_resources_with_log("post-sourcing")?;
4019
4020        Ok(SourcingSnapshot {
4021            spend_analyses,
4022            sourcing_projects,
4023            qualifications,
4024            rfx_events,
4025            bids: all_bids,
4026            bid_evaluations,
4027            contracts,
4028            catalog_items,
4029            scorecards,
4030        })
4031    }
4032
4033    /// Build a [`GroupStructure`] from the current company configuration.
4034    ///
4035    /// The first company in the configuration is treated as the ultimate parent.
4036    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4037    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4038    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4039        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4040
4041        let parent_code = self
4042            .config
4043            .companies
4044            .first()
4045            .map(|c| c.code.clone())
4046            .unwrap_or_else(|| "PARENT".to_string());
4047
4048        let mut group = GroupStructure::new(parent_code);
4049
4050        for company in self.config.companies.iter().skip(1) {
4051            let sub =
4052                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4053            group.add_subsidiary(sub);
4054        }
4055
4056        group
4057    }
4058
4059    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4060    fn phase_intercompany(
4061        &mut self,
4062        journal_entries: &[JournalEntry],
4063        stats: &mut EnhancedGenerationStatistics,
4064    ) -> SynthResult<IntercompanySnapshot> {
4065        // Skip if intercompany is disabled in config
4066        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4067            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4068            return Ok(IntercompanySnapshot::default());
4069        }
4070
4071        // Intercompany requires at least 2 companies
4072        if self.config.companies.len() < 2 {
4073            debug!(
4074                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4075                self.config.companies.len()
4076            );
4077            return Ok(IntercompanySnapshot::default());
4078        }
4079
4080        info!("Phase 14b: Generating Intercompany Transactions");
4081
4082        // Build the group structure early — used by ISA 600 component auditor scope
4083        // and consolidated financial statement generators downstream.
4084        let group_structure = self.build_group_structure();
4085        debug!(
4086            "Group structure built: parent={}, subsidiaries={}",
4087            group_structure.parent_entity,
4088            group_structure.subsidiaries.len()
4089        );
4090
4091        let seed = self.seed;
4092        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4093            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4094        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4095
4096        // Build ownership structure from company configs
4097        // First company is treated as the parent, remaining are subsidiaries
4098        let parent_code = self.config.companies[0].code.clone();
4099        let mut ownership_structure =
4100            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4101
4102        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4103            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4104                format!("REL{:03}", i + 1),
4105                parent_code.clone(),
4106                company.code.clone(),
4107                rust_decimal::Decimal::from(100), // Default 100% ownership
4108                start_date,
4109            );
4110            ownership_structure.add_relationship(relationship);
4111        }
4112
4113        // Convert config transfer pricing method to core model enum
4114        let tp_method = match self.config.intercompany.transfer_pricing_method {
4115            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4116                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4117            }
4118            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4119                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4120            }
4121            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4122                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4123            }
4124            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4125                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4126            }
4127            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4128                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4129            }
4130        };
4131
4132        // Build IC generator config from schema config
4133        let ic_currency = self
4134            .config
4135            .companies
4136            .first()
4137            .map(|c| c.currency.clone())
4138            .unwrap_or_else(|| "USD".to_string());
4139        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4140            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4141            transfer_pricing_method: tp_method,
4142            markup_percent: rust_decimal::Decimal::from_f64_retain(
4143                self.config.intercompany.markup_percent,
4144            )
4145            .unwrap_or(rust_decimal::Decimal::from(5)),
4146            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4147            default_currency: ic_currency,
4148            ..Default::default()
4149        };
4150
4151        // Create IC generator
4152        let mut ic_generator = datasynth_generators::ICGenerator::new(
4153            ic_gen_config,
4154            ownership_structure.clone(),
4155            seed + 50,
4156        );
4157
4158        // Generate IC transactions for the period
4159        // Use ~3 transactions per day as a reasonable default
4160        let transactions_per_day = 3;
4161        let matched_pairs = ic_generator.generate_transactions_for_period(
4162            start_date,
4163            end_date,
4164            transactions_per_day,
4165        );
4166
4167        // Generate journal entries from matched pairs
4168        let mut seller_entries = Vec::new();
4169        let mut buyer_entries = Vec::new();
4170        let fiscal_year = start_date.year();
4171
4172        for pair in &matched_pairs {
4173            let fiscal_period = pair.posting_date.month();
4174            let (seller_je, buyer_je) =
4175                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4176            seller_entries.push(seller_je);
4177            buyer_entries.push(buyer_je);
4178        }
4179
4180        // Run matching engine
4181        let matching_config = datasynth_generators::ICMatchingConfig {
4182            base_currency: self
4183                .config
4184                .companies
4185                .first()
4186                .map(|c| c.currency.clone())
4187                .unwrap_or_else(|| "USD".to_string()),
4188            ..Default::default()
4189        };
4190        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4191        matching_engine.load_matched_pairs(&matched_pairs);
4192        let matching_result = matching_engine.run_matching(end_date);
4193
4194        // Generate elimination entries if configured
4195        let mut elimination_entries = Vec::new();
4196        if self.config.intercompany.generate_eliminations {
4197            let elim_config = datasynth_generators::EliminationConfig {
4198                consolidation_entity: "GROUP".to_string(),
4199                base_currency: self
4200                    .config
4201                    .companies
4202                    .first()
4203                    .map(|c| c.currency.clone())
4204                    .unwrap_or_else(|| "USD".to_string()),
4205                ..Default::default()
4206            };
4207
4208            let mut elim_generator =
4209                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4210
4211            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4212            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4213                matching_result
4214                    .matched_balances
4215                    .iter()
4216                    .chain(matching_result.unmatched_balances.iter())
4217                    .cloned()
4218                    .collect();
4219
4220            // Build investment and equity maps from the group structure so that the
4221            // elimination generator can produce equity-investment elimination entries
4222            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4223            //
4224            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4225            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4226            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4227            //
4228            // Net assets are derived from the journal entries using account-range heuristics:
4229            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4230            // no JE data is available (IC phase runs early in the generation pipeline).
4231            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4232                std::collections::HashMap::new();
4233            let mut equity_amounts: std::collections::HashMap<
4234                String,
4235                std::collections::HashMap<String, rust_decimal::Decimal>,
4236            > = std::collections::HashMap::new();
4237            {
4238                use rust_decimal::Decimal;
4239                let hundred = Decimal::from(100u32);
4240                let ten_pct = Decimal::new(10, 2); // 0.10
4241                let thirty_pct = Decimal::new(30, 2); // 0.30
4242                let sixty_pct = Decimal::new(60, 2); // 0.60
4243                let parent_code = &group_structure.parent_entity;
4244                for sub in &group_structure.subsidiaries {
4245                    let net_assets = {
4246                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4247                        if na > Decimal::ZERO {
4248                            na
4249                        } else {
4250                            Decimal::from(1_000_000u64)
4251                        }
4252                    };
4253                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4254                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4255                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4256
4257                    // Split subsidiary equity into conventional components:
4258                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4259                    let mut eq_map = std::collections::HashMap::new();
4260                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4261                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4262                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4263                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4264                }
4265            }
4266
4267            let journal = elim_generator.generate_eliminations(
4268                &fiscal_period,
4269                end_date,
4270                &all_balances,
4271                &matched_pairs,
4272                &investment_amounts,
4273                &equity_amounts,
4274            );
4275
4276            elimination_entries = journal.entries.clone();
4277        }
4278
4279        let matched_pair_count = matched_pairs.len();
4280        let elimination_entry_count = elimination_entries.len();
4281        let match_rate = matching_result.match_rate;
4282
4283        stats.ic_matched_pair_count = matched_pair_count;
4284        stats.ic_elimination_count = elimination_entry_count;
4285        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4286
4287        info!(
4288            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4289            matched_pair_count,
4290            stats.ic_transaction_count,
4291            seller_entries.len(),
4292            buyer_entries.len(),
4293            elimination_entry_count,
4294            match_rate * 100.0
4295        );
4296        self.check_resources_with_log("post-intercompany")?;
4297
4298        // ----------------------------------------------------------------
4299        // NCI measurements: derive from group structure ownership percentages
4300        // ----------------------------------------------------------------
4301        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4302            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4303            use rust_decimal::Decimal;
4304
4305            let eight_pct = Decimal::new(8, 2); // 0.08
4306
4307            group_structure
4308                .subsidiaries
4309                .iter()
4310                .filter(|sub| {
4311                    sub.nci_percentage > Decimal::ZERO
4312                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4313                })
4314                .map(|sub| {
4315                    // Compute net assets from actual journal entries for this subsidiary.
4316                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4317                    // IC phase runs before the main JE batch has been populated).
4318                    let net_assets_from_jes =
4319                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4320
4321                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4322                        net_assets_from_jes.round_dp(2)
4323                    } else {
4324                        // Fallback: use a plausible base amount
4325                        Decimal::from(1_000_000u64)
4326                    };
4327
4328                    // Net income approximated as 8% of net assets
4329                    let net_income = (net_assets * eight_pct).round_dp(2);
4330
4331                    NciMeasurement::compute(
4332                        sub.entity_code.clone(),
4333                        sub.nci_percentage,
4334                        net_assets,
4335                        net_income,
4336                    )
4337                })
4338                .collect()
4339        };
4340
4341        if !nci_measurements.is_empty() {
4342            info!(
4343                "NCI measurements: {} subsidiaries with non-controlling interests",
4344                nci_measurements.len()
4345            );
4346        }
4347
4348        Ok(IntercompanySnapshot {
4349            group_structure: Some(group_structure),
4350            matched_pairs,
4351            seller_journal_entries: seller_entries,
4352            buyer_journal_entries: buyer_entries,
4353            elimination_entries,
4354            nci_measurements,
4355            matched_pair_count,
4356            elimination_entry_count,
4357            match_rate,
4358        })
4359    }
4360
4361    /// Phase 15: Generate bank reconciliations and financial statements.
4362    fn phase_financial_reporting(
4363        &mut self,
4364        document_flows: &DocumentFlowSnapshot,
4365        journal_entries: &[JournalEntry],
4366        coa: &Arc<ChartOfAccounts>,
4367        _hr: &HrSnapshot,
4368        _audit: &AuditSnapshot,
4369        stats: &mut EnhancedGenerationStatistics,
4370    ) -> SynthResult<FinancialReportingSnapshot> {
4371        let fs_enabled = self.phase_config.generate_financial_statements
4372            || self.config.financial_reporting.enabled;
4373        let br_enabled = self.phase_config.generate_bank_reconciliation;
4374
4375        if !fs_enabled && !br_enabled {
4376            debug!("Phase 15: Skipped (financial reporting disabled)");
4377            return Ok(FinancialReportingSnapshot::default());
4378        }
4379
4380        info!("Phase 15: Generating Financial Reporting Data");
4381
4382        let seed = self.seed;
4383        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4384            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4385
4386        let mut financial_statements = Vec::new();
4387        let mut bank_reconciliations = Vec::new();
4388        let mut trial_balances = Vec::new();
4389        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4390        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4391            Vec::new();
4392        // Standalone statements keyed by entity code
4393        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4394            std::collections::HashMap::new();
4395        // Consolidated statements (one per period)
4396        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4397        // Consolidation schedules (one per period)
4398        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4399
4400        // Generate financial statements from JE-derived trial balances.
4401        //
4402        // When journal entries are available, we use cumulative trial balances for
4403        // balance sheet accounts and current-period trial balances for income
4404        // statement accounts. We also track prior-period trial balances so the
4405        // generator can produce comparative amounts, and we build a proper
4406        // cash flow statement from working capital changes rather than random data.
4407        if fs_enabled {
4408            let has_journal_entries = !journal_entries.is_empty();
4409
4410            // Use FinancialStatementGenerator for balance sheet and income statement,
4411            // but build cash flow ourselves from TB data when JEs are available.
4412            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4413            // Separate generator for consolidated statements (different seed offset)
4414            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4415
4416            // Collect elimination JEs once (reused across periods)
4417            let elimination_entries: Vec<&JournalEntry> = journal_entries
4418                .iter()
4419                .filter(|je| je.header.is_elimination)
4420                .collect();
4421
4422            // Generate one set of statements per period, per entity
4423            for period in 0..self.config.global.period_months {
4424                let period_start = start_date + chrono::Months::new(period);
4425                let period_end =
4426                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4427                let fiscal_year = period_end.year() as u16;
4428                let fiscal_period = period_end.month() as u8;
4429                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4430
4431                // Build per-entity trial balances for this period (non-elimination JEs)
4432                // We accumulate them for the consolidation step.
4433                let mut entity_tb_map: std::collections::HashMap<
4434                    String,
4435                    std::collections::HashMap<String, rust_decimal::Decimal>,
4436                > = std::collections::HashMap::new();
4437
4438                // --- Standalone: one set of statements per company ---
4439                for (company_idx, company) in self.config.companies.iter().enumerate() {
4440                    let company_code = company.code.as_str();
4441                    let currency = company.currency.as_str();
4442                    // Use a unique seed offset per company to keep statements deterministic
4443                    // and distinct across companies
4444                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4445                    let mut company_fs_gen =
4446                        FinancialStatementGenerator::new(seed + company_seed_offset);
4447
4448                    if has_journal_entries {
4449                        let tb_entries = Self::build_cumulative_trial_balance(
4450                            journal_entries,
4451                            coa,
4452                            company_code,
4453                            start_date,
4454                            period_end,
4455                            fiscal_year,
4456                            fiscal_period,
4457                        );
4458
4459                        // Accumulate per-entity category balances for consolidation
4460                        let entity_cat_map =
4461                            entity_tb_map.entry(company_code.to_string()).or_default();
4462                        for tb_entry in &tb_entries {
4463                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4464                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4465                        }
4466
4467                        let stmts = company_fs_gen.generate(
4468                            company_code,
4469                            currency,
4470                            &tb_entries,
4471                            period_start,
4472                            period_end,
4473                            fiscal_year,
4474                            fiscal_period,
4475                            None,
4476                            "SYS-AUTOCLOSE",
4477                        );
4478
4479                        let mut entity_stmts = Vec::new();
4480                        for stmt in stmts {
4481                            if stmt.statement_type == StatementType::CashFlowStatement {
4482                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4483                                let cf_items = Self::build_cash_flow_from_trial_balances(
4484                                    &tb_entries,
4485                                    None,
4486                                    net_income,
4487                                );
4488                                entity_stmts.push(FinancialStatement {
4489                                    cash_flow_items: cf_items,
4490                                    ..stmt
4491                                });
4492                            } else {
4493                                entity_stmts.push(stmt);
4494                            }
4495                        }
4496
4497                        // Add to the flat financial_statements list (used by KPI/budget)
4498                        financial_statements.extend(entity_stmts.clone());
4499
4500                        // Store standalone per-entity
4501                        standalone_statements
4502                            .entry(company_code.to_string())
4503                            .or_default()
4504                            .extend(entity_stmts);
4505
4506                        // Only store trial balance for the first company in the period
4507                        // to avoid duplicates in the trial_balances list
4508                        if company_idx == 0 {
4509                            trial_balances.push(PeriodTrialBalance {
4510                                fiscal_year,
4511                                fiscal_period,
4512                                period_start,
4513                                period_end,
4514                                entries: tb_entries,
4515                            });
4516                        }
4517                    } else {
4518                        // Fallback: no JEs available
4519                        let tb_entries = Self::build_trial_balance_from_entries(
4520                            journal_entries,
4521                            coa,
4522                            company_code,
4523                            fiscal_year,
4524                            fiscal_period,
4525                        );
4526
4527                        let stmts = company_fs_gen.generate(
4528                            company_code,
4529                            currency,
4530                            &tb_entries,
4531                            period_start,
4532                            period_end,
4533                            fiscal_year,
4534                            fiscal_period,
4535                            None,
4536                            "SYS-AUTOCLOSE",
4537                        );
4538                        financial_statements.extend(stmts.clone());
4539                        standalone_statements
4540                            .entry(company_code.to_string())
4541                            .or_default()
4542                            .extend(stmts);
4543
4544                        if company_idx == 0 && !tb_entries.is_empty() {
4545                            trial_balances.push(PeriodTrialBalance {
4546                                fiscal_year,
4547                                fiscal_period,
4548                                period_start,
4549                                period_end,
4550                                entries: tb_entries,
4551                            });
4552                        }
4553                    }
4554                }
4555
4556                // --- Consolidated: aggregate all entities + apply eliminations ---
4557                // Use the primary (first) company's currency for the consolidated statement
4558                let group_currency = self
4559                    .config
4560                    .companies
4561                    .first()
4562                    .map(|c| c.currency.as_str())
4563                    .unwrap_or("USD");
4564
4565                // Build owned elimination entries for this period
4566                let period_eliminations: Vec<JournalEntry> = elimination_entries
4567                    .iter()
4568                    .filter(|je| {
4569                        je.header.fiscal_year == fiscal_year
4570                            && je.header.fiscal_period == fiscal_period
4571                    })
4572                    .map(|je| (*je).clone())
4573                    .collect();
4574
4575                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4576                    &entity_tb_map,
4577                    &period_eliminations,
4578                    &period_label,
4579                );
4580
4581                // Build a pseudo trial balance from consolidated line items for the
4582                // FinancialStatementGenerator to use (only for cash flow direction).
4583                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4584                    .line_items
4585                    .iter()
4586                    .map(|li| {
4587                        let net = li.post_elimination_total;
4588                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4589                            (net, rust_decimal::Decimal::ZERO)
4590                        } else {
4591                            (rust_decimal::Decimal::ZERO, -net)
4592                        };
4593                        datasynth_generators::TrialBalanceEntry {
4594                            account_code: li.account_category.clone(),
4595                            account_name: li.account_category.clone(),
4596                            category: li.account_category.clone(),
4597                            debit_balance: debit,
4598                            credit_balance: credit,
4599                        }
4600                    })
4601                    .collect();
4602
4603                let mut cons_stmts = cons_gen.generate(
4604                    "GROUP",
4605                    group_currency,
4606                    &cons_tb,
4607                    period_start,
4608                    period_end,
4609                    fiscal_year,
4610                    fiscal_period,
4611                    None,
4612                    "SYS-AUTOCLOSE",
4613                );
4614
4615                // Split consolidated line items by statement type.
4616                // The consolidation generator returns BS items first, then IS items,
4617                // identified by their CONS- prefix and category.
4618                let bs_categories: &[&str] = &[
4619                    "CASH",
4620                    "RECEIVABLES",
4621                    "INVENTORY",
4622                    "FIXEDASSETS",
4623                    "PAYABLES",
4624                    "ACCRUEDLIABILITIES",
4625                    "LONGTERMDEBT",
4626                    "EQUITY",
4627                ];
4628                let (bs_items, is_items): (Vec<_>, Vec<_>) =
4629                    cons_line_items.into_iter().partition(|li| {
4630                        let upper = li.label.to_uppercase();
4631                        bs_categories.iter().any(|c| upper == *c)
4632                    });
4633
4634                for stmt in &mut cons_stmts {
4635                    stmt.is_consolidated = true;
4636                    match stmt.statement_type {
4637                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4638                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4639                        _ => {} // CF and equity change statements keep generator output
4640                    }
4641                }
4642
4643                consolidated_statements.extend(cons_stmts);
4644                consolidation_schedules.push(schedule);
4645            }
4646
4647            // Backward compat: if only 1 company, use existing code path logic
4648            // (prior_cumulative_tb for comparative amounts). Already handled above;
4649            // the prior_ref is omitted to keep this change minimal.
4650            let _ = &mut fs_gen; // suppress unused warning
4651
4652            stats.financial_statement_count = financial_statements.len();
4653            info!(
4654                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4655                stats.financial_statement_count,
4656                consolidated_statements.len(),
4657                has_journal_entries
4658            );
4659
4660            // ----------------------------------------------------------------
4661            // IFRS 8 / ASC 280: Operating Segment Reporting
4662            // ----------------------------------------------------------------
4663            // Build entity seeds from the company configuration.
4664            let entity_seeds: Vec<SegmentSeed> = self
4665                .config
4666                .companies
4667                .iter()
4668                .map(|c| SegmentSeed {
4669                    code: c.code.clone(),
4670                    name: c.name.clone(),
4671                    currency: c.currency.clone(),
4672                })
4673                .collect();
4674
4675            let mut seg_gen = SegmentGenerator::new(seed + 30);
4676
4677            // Generate one set of segment reports per period.
4678            // We extract consolidated revenue / profit / assets from the consolidated
4679            // financial statements produced above, falling back to simple sums when
4680            // no consolidated statements were generated (single-entity path).
4681            for period in 0..self.config.global.period_months {
4682                let period_end =
4683                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4684                let fiscal_year = period_end.year() as u16;
4685                let fiscal_period = period_end.month() as u8;
4686                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4687
4688                use datasynth_core::models::StatementType;
4689
4690                // Try to find consolidated income statement for this period
4691                let cons_is = consolidated_statements.iter().find(|s| {
4692                    s.fiscal_year == fiscal_year
4693                        && s.fiscal_period == fiscal_period
4694                        && s.statement_type == StatementType::IncomeStatement
4695                });
4696                let cons_bs = consolidated_statements.iter().find(|s| {
4697                    s.fiscal_year == fiscal_year
4698                        && s.fiscal_period == fiscal_period
4699                        && s.statement_type == StatementType::BalanceSheet
4700                });
4701
4702                // If consolidated statements not available fall back to the flat list
4703                let is_stmt = cons_is.or_else(|| {
4704                    financial_statements.iter().find(|s| {
4705                        s.fiscal_year == fiscal_year
4706                            && s.fiscal_period == fiscal_period
4707                            && s.statement_type == StatementType::IncomeStatement
4708                    })
4709                });
4710                let bs_stmt = cons_bs.or_else(|| {
4711                    financial_statements.iter().find(|s| {
4712                        s.fiscal_year == fiscal_year
4713                            && s.fiscal_period == fiscal_period
4714                            && s.statement_type == StatementType::BalanceSheet
4715                    })
4716                });
4717
4718                let consolidated_revenue = is_stmt
4719                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4720                    .map(|li| -li.amount) // revenue is stored as negative in IS
4721                    .unwrap_or(rust_decimal::Decimal::ZERO);
4722
4723                let consolidated_profit = is_stmt
4724                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4725                    .map(|li| li.amount)
4726                    .unwrap_or(rust_decimal::Decimal::ZERO);
4727
4728                let consolidated_assets = bs_stmt
4729                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4730                    .map(|li| li.amount)
4731                    .unwrap_or(rust_decimal::Decimal::ZERO);
4732
4733                // Skip periods where we have no financial data
4734                if consolidated_revenue == rust_decimal::Decimal::ZERO
4735                    && consolidated_assets == rust_decimal::Decimal::ZERO
4736                {
4737                    continue;
4738                }
4739
4740                let group_code = self
4741                    .config
4742                    .companies
4743                    .first()
4744                    .map(|c| c.code.as_str())
4745                    .unwrap_or("GROUP");
4746
4747                // Compute period depreciation from JEs with document type "CL" hitting account
4748                // 6000 (depreciation expense).  These are generated by phase_period_close.
4749                let total_depr: rust_decimal::Decimal = journal_entries
4750                    .iter()
4751                    .filter(|je| je.header.document_type == "CL")
4752                    .flat_map(|je| je.lines.iter())
4753                    .filter(|l| l.gl_account.starts_with("6000"))
4754                    .map(|l| l.debit_amount)
4755                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
4756                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
4757                    Some(total_depr)
4758                } else {
4759                    None
4760                };
4761
4762                let (segs, recon) = seg_gen.generate(
4763                    group_code,
4764                    &period_label,
4765                    consolidated_revenue,
4766                    consolidated_profit,
4767                    consolidated_assets,
4768                    &entity_seeds,
4769                    depr_param,
4770                );
4771                segment_reports.extend(segs);
4772                segment_reconciliations.push(recon);
4773            }
4774
4775            info!(
4776                "Segment reports generated: {} segments, {} reconciliations",
4777                segment_reports.len(),
4778                segment_reconciliations.len()
4779            );
4780        }
4781
4782        // Generate bank reconciliations from payment data
4783        if br_enabled && !document_flows.payments.is_empty() {
4784            let employee_ids: Vec<String> = self
4785                .master_data
4786                .employees
4787                .iter()
4788                .map(|e| e.employee_id.clone())
4789                .collect();
4790            let mut br_gen =
4791                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4792
4793            // Group payments by company code and period
4794            for company in &self.config.companies {
4795                let company_payments: Vec<PaymentReference> = document_flows
4796                    .payments
4797                    .iter()
4798                    .filter(|p| p.header.company_code == company.code)
4799                    .map(|p| PaymentReference {
4800                        id: p.header.document_id.clone(),
4801                        amount: if p.is_vendor { p.amount } else { -p.amount },
4802                        date: p.header.document_date,
4803                        reference: p
4804                            .check_number
4805                            .clone()
4806                            .or_else(|| p.wire_reference.clone())
4807                            .unwrap_or_else(|| p.header.document_id.clone()),
4808                    })
4809                    .collect();
4810
4811                if company_payments.is_empty() {
4812                    continue;
4813                }
4814
4815                let bank_account_id = format!("{}-MAIN", company.code);
4816
4817                // Generate one reconciliation per period
4818                for period in 0..self.config.global.period_months {
4819                    let period_start = start_date + chrono::Months::new(period);
4820                    let period_end =
4821                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4822
4823                    let period_payments: Vec<PaymentReference> = company_payments
4824                        .iter()
4825                        .filter(|p| p.date >= period_start && p.date <= period_end)
4826                        .cloned()
4827                        .collect();
4828
4829                    let recon = br_gen.generate(
4830                        &company.code,
4831                        &bank_account_id,
4832                        period_start,
4833                        period_end,
4834                        &company.currency,
4835                        &period_payments,
4836                    );
4837                    bank_reconciliations.push(recon);
4838                }
4839            }
4840            info!(
4841                "Bank reconciliations generated: {} reconciliations",
4842                bank_reconciliations.len()
4843            );
4844        }
4845
4846        stats.bank_reconciliation_count = bank_reconciliations.len();
4847        self.check_resources_with_log("post-financial-reporting")?;
4848
4849        if !trial_balances.is_empty() {
4850            info!(
4851                "Period-close trial balances captured: {} periods",
4852                trial_balances.len()
4853            );
4854        }
4855
4856        // Notes to financial statements are generated in a separate post-processing step
4857        // (generate_notes_to_financial_statements) called after accounting_standards and tax
4858        // phases have completed, so that deferred tax and provision data can be wired in.
4859        let notes_to_financial_statements = Vec::new();
4860
4861        Ok(FinancialReportingSnapshot {
4862            financial_statements,
4863            standalone_statements,
4864            consolidated_statements,
4865            consolidation_schedules,
4866            bank_reconciliations,
4867            trial_balances,
4868            segment_reports,
4869            segment_reconciliations,
4870            notes_to_financial_statements,
4871        })
4872    }
4873
4874    /// Populate notes to financial statements using fully-resolved snapshots.
4875    ///
4876    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
4877    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
4878    /// can be wired into the notes context.  The method mutates
4879    /// `financial_reporting.notes_to_financial_statements` in-place.
4880    fn generate_notes_to_financial_statements(
4881        &self,
4882        financial_reporting: &mut FinancialReportingSnapshot,
4883        accounting_standards: &AccountingStandardsSnapshot,
4884        tax: &TaxSnapshot,
4885        hr: &HrSnapshot,
4886        audit: &AuditSnapshot,
4887    ) {
4888        use datasynth_config::schema::AccountingFrameworkConfig;
4889        use datasynth_core::models::StatementType;
4890        use datasynth_generators::period_close::notes_generator::{
4891            NotesGenerator, NotesGeneratorContext,
4892        };
4893
4894        let seed = self.seed;
4895        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4896        {
4897            Ok(d) => d,
4898            Err(_) => return,
4899        };
4900
4901        let mut notes_gen = NotesGenerator::new(seed + 4235);
4902
4903        for company in &self.config.companies {
4904            let last_period_end = start_date
4905                + chrono::Months::new(self.config.global.period_months)
4906                - chrono::Days::new(1);
4907            let fiscal_year = last_period_end.year() as u16;
4908
4909            // Extract relevant amounts from the already-generated financial statements
4910            let entity_is = financial_reporting
4911                .standalone_statements
4912                .get(&company.code)
4913                .and_then(|stmts| {
4914                    stmts.iter().find(|s| {
4915                        s.fiscal_year == fiscal_year
4916                            && s.statement_type == StatementType::IncomeStatement
4917                    })
4918                });
4919            let entity_bs = financial_reporting
4920                .standalone_statements
4921                .get(&company.code)
4922                .and_then(|stmts| {
4923                    stmts.iter().find(|s| {
4924                        s.fiscal_year == fiscal_year
4925                            && s.statement_type == StatementType::BalanceSheet
4926                    })
4927                });
4928
4929            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
4930            let revenue_amount = entity_is
4931                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4932                .map(|li| li.amount);
4933            let ppe_gross = entity_bs
4934                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4935                .map(|li| li.amount);
4936
4937            let framework = match self
4938                .config
4939                .accounting_standards
4940                .framework
4941                .unwrap_or_default()
4942            {
4943                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4944                    "IFRS".to_string()
4945                }
4946                _ => "US GAAP".to_string(),
4947            };
4948
4949            // ---- Deferred tax (IAS 12 / ASC 740) ----
4950            // Sum closing DTA and DTL from rollforward entries for this entity.
4951            let (entity_dta, entity_dtl) = {
4952                let mut dta = rust_decimal::Decimal::ZERO;
4953                let mut dtl = rust_decimal::Decimal::ZERO;
4954                for rf in &tax.deferred_tax.rollforwards {
4955                    if rf.entity_code == company.code {
4956                        dta += rf.closing_dta;
4957                        dtl += rf.closing_dtl;
4958                    }
4959                }
4960                (
4961                    if dta > rust_decimal::Decimal::ZERO {
4962                        Some(dta)
4963                    } else {
4964                        None
4965                    },
4966                    if dtl > rust_decimal::Decimal::ZERO {
4967                        Some(dtl)
4968                    } else {
4969                        None
4970                    },
4971                )
4972            };
4973
4974            // ---- Provisions (IAS 37 / ASC 450) ----
4975            // Filter provisions to this entity; sum best_estimate amounts.
4976            let entity_provisions: Vec<_> = accounting_standards
4977                .provisions
4978                .iter()
4979                .filter(|p| p.entity_code == company.code)
4980                .collect();
4981            let provision_count = entity_provisions.len();
4982            let total_provisions = if provision_count > 0 {
4983                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4984            } else {
4985                None
4986            };
4987
4988            // ---- Pension data from HR snapshot ----
4989            let entity_pension_plan_count = hr
4990                .pension_plans
4991                .iter()
4992                .filter(|p| p.entity_code == company.code)
4993                .count();
4994            let entity_total_dbo: Option<rust_decimal::Decimal> = {
4995                let sum: rust_decimal::Decimal = hr
4996                    .pension_disclosures
4997                    .iter()
4998                    .filter(|d| {
4999                        hr.pension_plans
5000                            .iter()
5001                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5002                    })
5003                    .map(|d| d.net_pension_liability)
5004                    .sum();
5005                let plan_assets_sum: rust_decimal::Decimal = hr
5006                    .pension_plan_assets
5007                    .iter()
5008                    .filter(|a| {
5009                        hr.pension_plans
5010                            .iter()
5011                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5012                    })
5013                    .map(|a| a.fair_value_closing)
5014                    .sum();
5015                if entity_pension_plan_count > 0 {
5016                    Some(sum + plan_assets_sum)
5017                } else {
5018                    None
5019                }
5020            };
5021            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5022                let sum: rust_decimal::Decimal = hr
5023                    .pension_plan_assets
5024                    .iter()
5025                    .filter(|a| {
5026                        hr.pension_plans
5027                            .iter()
5028                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5029                    })
5030                    .map(|a| a.fair_value_closing)
5031                    .sum();
5032                if entity_pension_plan_count > 0 {
5033                    Some(sum)
5034                } else {
5035                    None
5036                }
5037            };
5038
5039            // ---- Audit data: related parties + subsequent events ----
5040            // Audit snapshot covers all entities; use total counts (common case = single entity).
5041            let rp_count = audit.related_party_transactions.len();
5042            let se_count = audit.subsequent_events.len();
5043            let adjusting_count = audit
5044                .subsequent_events
5045                .iter()
5046                .filter(|e| {
5047                    matches!(
5048                        e.classification,
5049                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5050                    )
5051                })
5052                .count();
5053
5054            let ctx = NotesGeneratorContext {
5055                entity_code: company.code.clone(),
5056                framework,
5057                period: format!("FY{}", fiscal_year),
5058                period_end: last_period_end,
5059                currency: company.currency.clone(),
5060                revenue_amount,
5061                total_ppe_gross: ppe_gross,
5062                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5063                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5064                deferred_tax_asset: entity_dta,
5065                deferred_tax_liability: entity_dtl,
5066                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5067                provision_count,
5068                total_provisions,
5069                // Pension data from HR snapshot
5070                pension_plan_count: entity_pension_plan_count,
5071                total_dbo: entity_total_dbo,
5072                total_plan_assets: entity_total_plan_assets,
5073                // Audit data
5074                related_party_transaction_count: rp_count,
5075                subsequent_event_count: se_count,
5076                adjusting_event_count: adjusting_count,
5077                ..NotesGeneratorContext::default()
5078            };
5079
5080            let entity_notes = notes_gen.generate(&ctx);
5081            info!(
5082                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5083                company.code,
5084                entity_notes.len(),
5085                entity_dta,
5086                entity_dtl,
5087                provision_count,
5088            );
5089            financial_reporting
5090                .notes_to_financial_statements
5091                .extend(entity_notes);
5092        }
5093    }
5094
5095    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5096    ///
5097    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5098    /// posted in the journal entries flows through to the trial balance, using the real
5099    /// GL account numbers from the CoA.
5100    fn build_trial_balance_from_entries(
5101        journal_entries: &[JournalEntry],
5102        coa: &ChartOfAccounts,
5103        company_code: &str,
5104        fiscal_year: u16,
5105        fiscal_period: u8,
5106    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5107        use rust_decimal::Decimal;
5108
5109        // Accumulate total debits and credits per GL account
5110        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5111        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5112
5113        for je in journal_entries {
5114            // Filter to matching company, fiscal year, and period
5115            if je.header.company_code != company_code
5116                || je.header.fiscal_year != fiscal_year
5117                || je.header.fiscal_period != fiscal_period
5118            {
5119                continue;
5120            }
5121
5122            for line in &je.lines {
5123                let acct = &line.gl_account;
5124                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5125                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5126            }
5127        }
5128
5129        // Build a TrialBalanceEntry for each account that had activity
5130        let mut all_accounts: Vec<&String> = account_debits
5131            .keys()
5132            .chain(account_credits.keys())
5133            .collect::<std::collections::HashSet<_>>()
5134            .into_iter()
5135            .collect();
5136        all_accounts.sort();
5137
5138        let mut entries = Vec::new();
5139
5140        for acct_number in all_accounts {
5141            let debit = account_debits
5142                .get(acct_number)
5143                .copied()
5144                .unwrap_or(Decimal::ZERO);
5145            let credit = account_credits
5146                .get(acct_number)
5147                .copied()
5148                .unwrap_or(Decimal::ZERO);
5149
5150            if debit.is_zero() && credit.is_zero() {
5151                continue;
5152            }
5153
5154            // Look up account name from CoA, fall back to "Account {code}"
5155            let account_name = coa
5156                .get_account(acct_number)
5157                .map(|gl| gl.short_description.clone())
5158                .unwrap_or_else(|| format!("Account {acct_number}"));
5159
5160            // Map account code prefix to the category strings expected by
5161            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5162            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5163            // OperatingExpenses).
5164            let category = Self::category_from_account_code(acct_number);
5165
5166            entries.push(datasynth_generators::TrialBalanceEntry {
5167                account_code: acct_number.clone(),
5168                account_name,
5169                category,
5170                debit_balance: debit,
5171                credit_balance: credit,
5172            });
5173        }
5174
5175        entries
5176    }
5177
5178    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5179    /// (and including) the given period end date.
5180    ///
5181    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5182    /// while income statement accounts (revenue, expenses) show only the current period.
5183    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5184    fn build_cumulative_trial_balance(
5185        journal_entries: &[JournalEntry],
5186        coa: &ChartOfAccounts,
5187        company_code: &str,
5188        start_date: NaiveDate,
5189        period_end: NaiveDate,
5190        fiscal_year: u16,
5191        fiscal_period: u8,
5192    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5193        use rust_decimal::Decimal;
5194
5195        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5196        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5197        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5198
5199        // Accumulate debits/credits for income statement accounts (current period only)
5200        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5201        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5202
5203        for je in journal_entries {
5204            if je.header.company_code != company_code {
5205                continue;
5206            }
5207
5208            for line in &je.lines {
5209                let acct = &line.gl_account;
5210                let category = Self::category_from_account_code(acct);
5211                let is_bs_account = matches!(
5212                    category.as_str(),
5213                    "Cash"
5214                        | "Receivables"
5215                        | "Inventory"
5216                        | "FixedAssets"
5217                        | "Payables"
5218                        | "AccruedLiabilities"
5219                        | "LongTermDebt"
5220                        | "Equity"
5221                );
5222
5223                if is_bs_account {
5224                    // Balance sheet: accumulate from start through period_end
5225                    if je.header.document_date <= period_end
5226                        && je.header.document_date >= start_date
5227                    {
5228                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5229                            line.debit_amount;
5230                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5231                            line.credit_amount;
5232                    }
5233                } else {
5234                    // Income statement: current period only
5235                    if je.header.fiscal_year == fiscal_year
5236                        && je.header.fiscal_period == fiscal_period
5237                    {
5238                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5239                            line.debit_amount;
5240                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5241                            line.credit_amount;
5242                    }
5243                }
5244            }
5245        }
5246
5247        // Merge all accounts
5248        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5249        all_accounts.extend(bs_debits.keys().cloned());
5250        all_accounts.extend(bs_credits.keys().cloned());
5251        all_accounts.extend(is_debits.keys().cloned());
5252        all_accounts.extend(is_credits.keys().cloned());
5253
5254        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5255        sorted_accounts.sort();
5256
5257        let mut entries = Vec::new();
5258
5259        for acct_number in &sorted_accounts {
5260            let category = Self::category_from_account_code(acct_number);
5261            let is_bs_account = matches!(
5262                category.as_str(),
5263                "Cash"
5264                    | "Receivables"
5265                    | "Inventory"
5266                    | "FixedAssets"
5267                    | "Payables"
5268                    | "AccruedLiabilities"
5269                    | "LongTermDebt"
5270                    | "Equity"
5271            );
5272
5273            let (debit, credit) = if is_bs_account {
5274                (
5275                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5276                    bs_credits
5277                        .get(acct_number)
5278                        .copied()
5279                        .unwrap_or(Decimal::ZERO),
5280                )
5281            } else {
5282                (
5283                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5284                    is_credits
5285                        .get(acct_number)
5286                        .copied()
5287                        .unwrap_or(Decimal::ZERO),
5288                )
5289            };
5290
5291            if debit.is_zero() && credit.is_zero() {
5292                continue;
5293            }
5294
5295            let account_name = coa
5296                .get_account(acct_number)
5297                .map(|gl| gl.short_description.clone())
5298                .unwrap_or_else(|| format!("Account {acct_number}"));
5299
5300            entries.push(datasynth_generators::TrialBalanceEntry {
5301                account_code: acct_number.clone(),
5302                account_name,
5303                category,
5304                debit_balance: debit,
5305                credit_balance: credit,
5306            });
5307        }
5308
5309        entries
5310    }
5311
5312    /// Build a JE-derived cash flow statement using the indirect method.
5313    ///
5314    /// Compares current and prior cumulative trial balances to derive working capital
5315    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5316    fn build_cash_flow_from_trial_balances(
5317        current_tb: &[datasynth_generators::TrialBalanceEntry],
5318        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5319        net_income: rust_decimal::Decimal,
5320    ) -> Vec<CashFlowItem> {
5321        use rust_decimal::Decimal;
5322
5323        // Helper: aggregate a TB by category and return net (debit - credit)
5324        let aggregate =
5325            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5326                let mut map: HashMap<String, Decimal> = HashMap::new();
5327                for entry in tb {
5328                    let net = entry.debit_balance - entry.credit_balance;
5329                    *map.entry(entry.category.clone()).or_default() += net;
5330                }
5331                map
5332            };
5333
5334        let current = aggregate(current_tb);
5335        let prior = prior_tb.map(aggregate);
5336
5337        // Get balance for a category, defaulting to zero
5338        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5339            *map.get(key).unwrap_or(&Decimal::ZERO)
5340        };
5341
5342        // Compute change: current - prior (or current if no prior)
5343        let change = |key: &str| -> Decimal {
5344            let curr = get(&current, key);
5345            match &prior {
5346                Some(p) => curr - get(p, key),
5347                None => curr,
5348            }
5349        };
5350
5351        // Operating activities (indirect method)
5352        // Depreciation add-back: approximate from FixedAssets decrease
5353        let fixed_asset_change = change("FixedAssets");
5354        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5355            -fixed_asset_change
5356        } else {
5357            Decimal::ZERO
5358        };
5359
5360        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5361        let ar_change = change("Receivables");
5362        let inventory_change = change("Inventory");
5363        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5364        let ap_change = change("Payables");
5365        let accrued_change = change("AccruedLiabilities");
5366
5367        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5368            + (-ap_change)
5369            + (-accrued_change);
5370
5371        // Investing activities
5372        let capex = if fixed_asset_change > Decimal::ZERO {
5373            -fixed_asset_change
5374        } else {
5375            Decimal::ZERO
5376        };
5377        let investing_cf = capex;
5378
5379        // Financing activities
5380        let debt_change = -change("LongTermDebt");
5381        let equity_change = -change("Equity");
5382        let financing_cf = debt_change + equity_change;
5383
5384        let net_change = operating_cf + investing_cf + financing_cf;
5385
5386        vec![
5387            CashFlowItem {
5388                item_code: "CF-NI".to_string(),
5389                label: "Net Income".to_string(),
5390                category: CashFlowCategory::Operating,
5391                amount: net_income,
5392                amount_prior: None,
5393                sort_order: 1,
5394                is_total: false,
5395            },
5396            CashFlowItem {
5397                item_code: "CF-DEP".to_string(),
5398                label: "Depreciation & Amortization".to_string(),
5399                category: CashFlowCategory::Operating,
5400                amount: depreciation_addback,
5401                amount_prior: None,
5402                sort_order: 2,
5403                is_total: false,
5404            },
5405            CashFlowItem {
5406                item_code: "CF-AR".to_string(),
5407                label: "Change in Accounts Receivable".to_string(),
5408                category: CashFlowCategory::Operating,
5409                amount: -ar_change,
5410                amount_prior: None,
5411                sort_order: 3,
5412                is_total: false,
5413            },
5414            CashFlowItem {
5415                item_code: "CF-AP".to_string(),
5416                label: "Change in Accounts Payable".to_string(),
5417                category: CashFlowCategory::Operating,
5418                amount: -ap_change,
5419                amount_prior: None,
5420                sort_order: 4,
5421                is_total: false,
5422            },
5423            CashFlowItem {
5424                item_code: "CF-INV".to_string(),
5425                label: "Change in Inventory".to_string(),
5426                category: CashFlowCategory::Operating,
5427                amount: -inventory_change,
5428                amount_prior: None,
5429                sort_order: 5,
5430                is_total: false,
5431            },
5432            CashFlowItem {
5433                item_code: "CF-OP".to_string(),
5434                label: "Net Cash from Operating Activities".to_string(),
5435                category: CashFlowCategory::Operating,
5436                amount: operating_cf,
5437                amount_prior: None,
5438                sort_order: 6,
5439                is_total: true,
5440            },
5441            CashFlowItem {
5442                item_code: "CF-CAPEX".to_string(),
5443                label: "Capital Expenditures".to_string(),
5444                category: CashFlowCategory::Investing,
5445                amount: capex,
5446                amount_prior: None,
5447                sort_order: 7,
5448                is_total: false,
5449            },
5450            CashFlowItem {
5451                item_code: "CF-INV-T".to_string(),
5452                label: "Net Cash from Investing Activities".to_string(),
5453                category: CashFlowCategory::Investing,
5454                amount: investing_cf,
5455                amount_prior: None,
5456                sort_order: 8,
5457                is_total: true,
5458            },
5459            CashFlowItem {
5460                item_code: "CF-DEBT".to_string(),
5461                label: "Net Borrowings / (Repayments)".to_string(),
5462                category: CashFlowCategory::Financing,
5463                amount: debt_change,
5464                amount_prior: None,
5465                sort_order: 9,
5466                is_total: false,
5467            },
5468            CashFlowItem {
5469                item_code: "CF-EQ".to_string(),
5470                label: "Equity Changes".to_string(),
5471                category: CashFlowCategory::Financing,
5472                amount: equity_change,
5473                amount_prior: None,
5474                sort_order: 10,
5475                is_total: false,
5476            },
5477            CashFlowItem {
5478                item_code: "CF-FIN-T".to_string(),
5479                label: "Net Cash from Financing Activities".to_string(),
5480                category: CashFlowCategory::Financing,
5481                amount: financing_cf,
5482                amount_prior: None,
5483                sort_order: 11,
5484                is_total: true,
5485            },
5486            CashFlowItem {
5487                item_code: "CF-NET".to_string(),
5488                label: "Net Change in Cash".to_string(),
5489                category: CashFlowCategory::Operating,
5490                amount: net_change,
5491                amount_prior: None,
5492                sort_order: 12,
5493                is_total: true,
5494            },
5495        ]
5496    }
5497
5498    /// Calculate net income from a set of trial balance entries.
5499    ///
5500    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
5501    fn calculate_net_income_from_tb(
5502        tb: &[datasynth_generators::TrialBalanceEntry],
5503    ) -> rust_decimal::Decimal {
5504        use rust_decimal::Decimal;
5505
5506        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5507        for entry in tb {
5508            let net = entry.debit_balance - entry.credit_balance;
5509            *aggregated.entry(entry.category.clone()).or_default() += net;
5510        }
5511
5512        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5513        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5514        let opex = *aggregated
5515            .get("OperatingExpenses")
5516            .unwrap_or(&Decimal::ZERO);
5517        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5518        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5519
5520        // revenue is negative (credit-normal), expenses are positive (debit-normal)
5521        // other_income is typically negative (credit), other_expenses is typically positive
5522        let operating_income = revenue - cogs - opex - other_expenses - other_income;
5523        let tax_rate = Decimal::new(25, 2); // 0.25
5524        let tax = operating_income * tax_rate;
5525        operating_income - tax
5526    }
5527
5528    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
5529    ///
5530    /// Uses the first two digits of the account code to classify into the categories
5531    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
5532    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
5533    /// OperatingExpenses, OtherIncome, OtherExpenses.
5534    fn category_from_account_code(code: &str) -> String {
5535        let prefix: String = code.chars().take(2).collect();
5536        match prefix.as_str() {
5537            "10" => "Cash",
5538            "11" => "Receivables",
5539            "12" | "13" | "14" => "Inventory",
5540            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5541            "20" => "Payables",
5542            "21" | "22" | "23" | "24" => "AccruedLiabilities",
5543            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5544            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5545            "40" | "41" | "42" | "43" | "44" => "Revenue",
5546            "50" | "51" | "52" => "CostOfSales",
5547            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5548                "OperatingExpenses"
5549            }
5550            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5551            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5552            _ => "OperatingExpenses",
5553        }
5554        .to_string()
5555    }
5556
5557    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
5558    fn phase_hr_data(
5559        &mut self,
5560        stats: &mut EnhancedGenerationStatistics,
5561    ) -> SynthResult<HrSnapshot> {
5562        if !self.phase_config.generate_hr {
5563            debug!("Phase 16: Skipped (HR generation disabled)");
5564            return Ok(HrSnapshot::default());
5565        }
5566
5567        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5568
5569        let seed = self.seed;
5570        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5571            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5572        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5573        let company_code = self
5574            .config
5575            .companies
5576            .first()
5577            .map(|c| c.code.as_str())
5578            .unwrap_or("1000");
5579        let currency = self
5580            .config
5581            .companies
5582            .first()
5583            .map(|c| c.currency.as_str())
5584            .unwrap_or("USD");
5585
5586        let employee_ids: Vec<String> = self
5587            .master_data
5588            .employees
5589            .iter()
5590            .map(|e| e.employee_id.clone())
5591            .collect();
5592
5593        if employee_ids.is_empty() {
5594            debug!("Phase 16: Skipped (no employees available)");
5595            return Ok(HrSnapshot::default());
5596        }
5597
5598        // Extract cost-center pool from master data employees for cross-reference
5599        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
5600        let cost_center_ids: Vec<String> = self
5601            .master_data
5602            .employees
5603            .iter()
5604            .filter_map(|e| e.cost_center.clone())
5605            .collect::<std::collections::HashSet<_>>()
5606            .into_iter()
5607            .collect();
5608
5609        let mut snapshot = HrSnapshot::default();
5610
5611        // Generate payroll runs (one per month)
5612        if self.config.hr.payroll.enabled {
5613            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5614                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5615
5616            // Look up country pack for payroll deductions and labels
5617            let payroll_pack = self.primary_pack();
5618
5619            // Store the pack on the generator so generate() resolves
5620            // localized deduction rates and labels from it.
5621            payroll_gen.set_country_pack(payroll_pack.clone());
5622
5623            let employees_with_salary: Vec<(
5624                String,
5625                rust_decimal::Decimal,
5626                Option<String>,
5627                Option<String>,
5628            )> = self
5629                .master_data
5630                .employees
5631                .iter()
5632                .map(|e| {
5633                    // Use the employee's actual annual base salary.
5634                    // Fall back to $60,000 / yr if somehow zero.
5635                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
5636                        e.base_salary
5637                    } else {
5638                        rust_decimal::Decimal::from(60_000)
5639                    };
5640                    (
5641                        e.employee_id.clone(),
5642                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
5643                        e.cost_center.clone(),
5644                        e.department_id.clone(),
5645                    )
5646                })
5647                .collect();
5648
5649            for month in 0..self.config.global.period_months {
5650                let period_start = start_date + chrono::Months::new(month);
5651                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5652                let (run, items) = payroll_gen.generate(
5653                    company_code,
5654                    &employees_with_salary,
5655                    period_start,
5656                    period_end,
5657                    currency,
5658                );
5659                snapshot.payroll_runs.push(run);
5660                snapshot.payroll_run_count += 1;
5661                snapshot.payroll_line_item_count += items.len();
5662                snapshot.payroll_line_items.extend(items);
5663            }
5664        }
5665
5666        // Generate time entries
5667        if self.config.hr.time_attendance.enabled {
5668            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5669                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5670            let entries = time_gen.generate(
5671                &employee_ids,
5672                start_date,
5673                end_date,
5674                &self.config.hr.time_attendance,
5675            );
5676            snapshot.time_entry_count = entries.len();
5677            snapshot.time_entries = entries;
5678        }
5679
5680        // Generate expense reports
5681        if self.config.hr.expenses.enabled {
5682            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5683                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5684            expense_gen.set_country_pack(self.primary_pack().clone());
5685            let company_currency = self
5686                .config
5687                .companies
5688                .first()
5689                .map(|c| c.currency.as_str())
5690                .unwrap_or("USD");
5691            let reports = expense_gen.generate_with_currency(
5692                &employee_ids,
5693                start_date,
5694                end_date,
5695                &self.config.hr.expenses,
5696                company_currency,
5697            );
5698            snapshot.expense_report_count = reports.len();
5699            snapshot.expense_reports = reports;
5700        }
5701
5702        // Generate benefit enrollments (gated on payroll, since benefits require employees)
5703        if self.config.hr.payroll.enabled {
5704            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5705            let employee_pairs: Vec<(String, String)> = self
5706                .master_data
5707                .employees
5708                .iter()
5709                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5710                .collect();
5711            let enrollments =
5712                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5713            snapshot.benefit_enrollment_count = enrollments.len();
5714            snapshot.benefit_enrollments = enrollments;
5715        }
5716
5717        // Generate defined benefit pension plans (IAS 19 / ASC 715)
5718        if self.phase_config.generate_hr {
5719            let entity_name = self
5720                .config
5721                .companies
5722                .first()
5723                .map(|c| c.name.as_str())
5724                .unwrap_or("Entity");
5725            let period_months = self.config.global.period_months;
5726            let period_label = {
5727                let y = start_date.year();
5728                let m = start_date.month();
5729                if period_months >= 12 {
5730                    format!("FY{y}")
5731                } else {
5732                    format!("{y}-{m:02}")
5733                }
5734            };
5735            let reporting_date =
5736                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5737
5738            // Compute average annual salary from actual payroll data when available.
5739            // PayrollRun.total_gross covers all employees for one pay period; we sum
5740            // across all runs and divide by employee_count to get per-employee total,
5741            // then annualise for sub-annual periods.
5742            let avg_salary: Option<rust_decimal::Decimal> = {
5743                let employee_count = employee_ids.len();
5744                if self.config.hr.payroll.enabled
5745                    && employee_count > 0
5746                    && !snapshot.payroll_runs.is_empty()
5747                {
5748                    // Sum total gross pay across all payroll runs for this company
5749                    let total_gross: rust_decimal::Decimal = snapshot
5750                        .payroll_runs
5751                        .iter()
5752                        .filter(|r| r.company_code == company_code)
5753                        .map(|r| r.total_gross)
5754                        .sum();
5755                    if total_gross > rust_decimal::Decimal::ZERO {
5756                        // Annualise: total_gross covers `period_months` months of pay
5757                        let annual_total = if period_months > 0 && period_months < 12 {
5758                            total_gross * rust_decimal::Decimal::from(12u32)
5759                                / rust_decimal::Decimal::from(period_months)
5760                        } else {
5761                            total_gross
5762                        };
5763                        Some(
5764                            (annual_total / rust_decimal::Decimal::from(employee_count))
5765                                .round_dp(2),
5766                        )
5767                    } else {
5768                        None
5769                    }
5770                } else {
5771                    None
5772                }
5773            };
5774
5775            let mut pension_gen =
5776                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5777            let pension_snap = pension_gen.generate(
5778                company_code,
5779                entity_name,
5780                &period_label,
5781                reporting_date,
5782                employee_ids.len(),
5783                currency,
5784                avg_salary,
5785                period_months,
5786            );
5787            snapshot.pension_plan_count = pension_snap.plans.len();
5788            snapshot.pension_plans = pension_snap.plans;
5789            snapshot.pension_obligations = pension_snap.obligations;
5790            snapshot.pension_plan_assets = pension_snap.plan_assets;
5791            snapshot.pension_disclosures = pension_snap.disclosures;
5792            // Pension JEs are returned here so they can be added to entries
5793            // in the caller (stored temporarily on snapshot for transfer).
5794            // We embed them in the hr snapshot for simplicity; the orchestrator
5795            // will extract and extend `entries`.
5796            snapshot.pension_journal_entries = pension_snap.journal_entries;
5797        }
5798
5799        // Generate stock-based compensation (ASC 718 / IFRS 2)
5800        if self.phase_config.generate_hr && !employee_ids.is_empty() {
5801            let period_months = self.config.global.period_months;
5802            let period_label = {
5803                let y = start_date.year();
5804                let m = start_date.month();
5805                if period_months >= 12 {
5806                    format!("FY{y}")
5807                } else {
5808                    format!("{y}-{m:02}")
5809                }
5810            };
5811            let reporting_date =
5812                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5813
5814            let mut stock_comp_gen =
5815                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5816            let stock_snap = stock_comp_gen.generate(
5817                company_code,
5818                &employee_ids,
5819                start_date,
5820                &period_label,
5821                reporting_date,
5822                currency,
5823            );
5824            snapshot.stock_grant_count = stock_snap.grants.len();
5825            snapshot.stock_grants = stock_snap.grants;
5826            snapshot.stock_comp_expenses = stock_snap.expenses;
5827            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5828        }
5829
5830        stats.payroll_run_count = snapshot.payroll_run_count;
5831        stats.time_entry_count = snapshot.time_entry_count;
5832        stats.expense_report_count = snapshot.expense_report_count;
5833        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5834        stats.pension_plan_count = snapshot.pension_plan_count;
5835        stats.stock_grant_count = snapshot.stock_grant_count;
5836
5837        info!(
5838            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5839            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5840            snapshot.time_entry_count, snapshot.expense_report_count,
5841            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5842            snapshot.stock_grant_count
5843        );
5844        self.check_resources_with_log("post-hr")?;
5845
5846        Ok(snapshot)
5847    }
5848
5849    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
5850    fn phase_accounting_standards(
5851        &mut self,
5852        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5853        journal_entries: &[JournalEntry],
5854        stats: &mut EnhancedGenerationStatistics,
5855    ) -> SynthResult<AccountingStandardsSnapshot> {
5856        if !self.phase_config.generate_accounting_standards {
5857            debug!("Phase 17: Skipped (accounting standards generation disabled)");
5858            return Ok(AccountingStandardsSnapshot::default());
5859        }
5860        info!("Phase 17: Generating Accounting Standards Data");
5861
5862        let seed = self.seed;
5863        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5864            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5865        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5866        let company_code = self
5867            .config
5868            .companies
5869            .first()
5870            .map(|c| c.code.as_str())
5871            .unwrap_or("1000");
5872        let currency = self
5873            .config
5874            .companies
5875            .first()
5876            .map(|c| c.currency.as_str())
5877            .unwrap_or("USD");
5878
5879        // Convert config framework to standards framework.
5880        // If the user explicitly set a framework in the YAML config, use that.
5881        // Otherwise, fall back to the country pack's accounting.framework field,
5882        // and if that is also absent or unrecognised, default to US GAAP.
5883        let framework = match self.config.accounting_standards.framework {
5884            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5885                datasynth_standards::framework::AccountingFramework::UsGaap
5886            }
5887            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5888                datasynth_standards::framework::AccountingFramework::Ifrs
5889            }
5890            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5891                datasynth_standards::framework::AccountingFramework::DualReporting
5892            }
5893            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5894                datasynth_standards::framework::AccountingFramework::FrenchGaap
5895            }
5896            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5897                datasynth_standards::framework::AccountingFramework::GermanGaap
5898            }
5899            None => {
5900                // Derive framework from the primary company's country pack
5901                let pack = self.primary_pack();
5902                let pack_fw = pack.accounting.framework.as_str();
5903                match pack_fw {
5904                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5905                    "dual_reporting" => {
5906                        datasynth_standards::framework::AccountingFramework::DualReporting
5907                    }
5908                    "french_gaap" => {
5909                        datasynth_standards::framework::AccountingFramework::FrenchGaap
5910                    }
5911                    "german_gaap" | "hgb" => {
5912                        datasynth_standards::framework::AccountingFramework::GermanGaap
5913                    }
5914                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
5915                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5916                }
5917            }
5918        };
5919
5920        let mut snapshot = AccountingStandardsSnapshot::default();
5921
5922        // Revenue recognition
5923        if self.config.accounting_standards.revenue_recognition.enabled {
5924            let customer_ids: Vec<String> = self
5925                .master_data
5926                .customers
5927                .iter()
5928                .map(|c| c.customer_id.clone())
5929                .collect();
5930
5931            if !customer_ids.is_empty() {
5932                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5933                let contracts = rev_gen.generate(
5934                    company_code,
5935                    &customer_ids,
5936                    start_date,
5937                    end_date,
5938                    currency,
5939                    &self.config.accounting_standards.revenue_recognition,
5940                    framework,
5941                );
5942                snapshot.revenue_contract_count = contracts.len();
5943                snapshot.contracts = contracts;
5944            }
5945        }
5946
5947        // Impairment testing
5948        if self.config.accounting_standards.impairment.enabled {
5949            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5950                .master_data
5951                .assets
5952                .iter()
5953                .map(|a| {
5954                    (
5955                        a.asset_id.clone(),
5956                        a.description.clone(),
5957                        a.acquisition_cost,
5958                    )
5959                })
5960                .collect();
5961
5962            if !asset_data.is_empty() {
5963                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5964                let tests = imp_gen.generate(
5965                    company_code,
5966                    &asset_data,
5967                    end_date,
5968                    &self.config.accounting_standards.impairment,
5969                    framework,
5970                );
5971                snapshot.impairment_test_count = tests.len();
5972                snapshot.impairment_tests = tests;
5973            }
5974        }
5975
5976        // Business combinations (IFRS 3 / ASC 805)
5977        if self
5978            .config
5979            .accounting_standards
5980            .business_combinations
5981            .enabled
5982        {
5983            let bc_config = &self.config.accounting_standards.business_combinations;
5984            let framework_str = match framework {
5985                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5986                _ => "US_GAAP",
5987            };
5988            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5989            let bc_snap = bc_gen.generate(
5990                company_code,
5991                currency,
5992                start_date,
5993                end_date,
5994                bc_config.acquisition_count,
5995                framework_str,
5996            );
5997            snapshot.business_combination_count = bc_snap.combinations.len();
5998            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
5999            snapshot.business_combinations = bc_snap.combinations;
6000        }
6001
6002        // Expected Credit Loss (IFRS 9 / ASC 326)
6003        if self
6004            .config
6005            .accounting_standards
6006            .expected_credit_loss
6007            .enabled
6008        {
6009            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6010            let framework_str = match framework {
6011                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6012                _ => "ASC_326",
6013            };
6014
6015            // Use AR aging data from the subledger snapshot if available;
6016            // otherwise generate synthetic bucket exposures.
6017            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6018
6019            let mut ecl_gen = EclGenerator::new(seed + 43);
6020
6021            // Collect combined bucket totals across all company AR aging reports.
6022            let bucket_exposures: Vec<(
6023                datasynth_core::models::subledger::ar::AgingBucket,
6024                rust_decimal::Decimal,
6025            )> = if ar_aging_reports.is_empty() {
6026                // No AR aging data — synthesise plausible bucket exposures.
6027                use datasynth_core::models::subledger::ar::AgingBucket;
6028                vec![
6029                    (
6030                        AgingBucket::Current,
6031                        rust_decimal::Decimal::from(500_000_u32),
6032                    ),
6033                    (
6034                        AgingBucket::Days1To30,
6035                        rust_decimal::Decimal::from(120_000_u32),
6036                    ),
6037                    (
6038                        AgingBucket::Days31To60,
6039                        rust_decimal::Decimal::from(45_000_u32),
6040                    ),
6041                    (
6042                        AgingBucket::Days61To90,
6043                        rust_decimal::Decimal::from(15_000_u32),
6044                    ),
6045                    (
6046                        AgingBucket::Over90Days,
6047                        rust_decimal::Decimal::from(8_000_u32),
6048                    ),
6049                ]
6050            } else {
6051                use datasynth_core::models::subledger::ar::AgingBucket;
6052                // Sum bucket totals from all reports.
6053                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6054                    std::collections::HashMap::new();
6055                for report in ar_aging_reports {
6056                    for (bucket, amount) in &report.bucket_totals {
6057                        *totals.entry(*bucket).or_default() += amount;
6058                    }
6059                }
6060                AgingBucket::all()
6061                    .into_iter()
6062                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6063                    .collect()
6064            };
6065
6066            let ecl_snap = ecl_gen.generate(
6067                company_code,
6068                end_date,
6069                &bucket_exposures,
6070                ecl_config,
6071                &period_label,
6072                framework_str,
6073            );
6074
6075            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6076            snapshot.ecl_models = ecl_snap.ecl_models;
6077            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6078            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6079        }
6080
6081        // Provisions and contingencies (IAS 37 / ASC 450)
6082        {
6083            let framework_str = match framework {
6084                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6085                _ => "US_GAAP",
6086            };
6087
6088            // Compute actual revenue from the journal entries generated so far.
6089            // The `journal_entries` slice passed to this phase contains all GL entries
6090            // up to and including Period Close. Fall back to a minimum of 100_000 to
6091            // avoid degenerate zero-based provision amounts on first-period datasets.
6092            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6093                .max(rust_decimal::Decimal::from(100_000_u32));
6094
6095            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6096
6097            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6098            let prov_snap = prov_gen.generate(
6099                company_code,
6100                currency,
6101                revenue_proxy,
6102                end_date,
6103                &period_label,
6104                framework_str,
6105                None, // prior_opening: no carry-forward data in single-period runs
6106            );
6107
6108            snapshot.provision_count = prov_snap.provisions.len();
6109            snapshot.provisions = prov_snap.provisions;
6110            snapshot.provision_movements = prov_snap.movements;
6111            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6112            snapshot.provision_journal_entries = prov_snap.journal_entries;
6113        }
6114
6115        // IAS 21 Functional Currency Translation
6116        // For each company whose functional currency differs from the presentation
6117        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6118        {
6119            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6120
6121            let presentation_currency = self
6122                .config
6123                .global
6124                .presentation_currency
6125                .clone()
6126                .unwrap_or_else(|| self.config.global.group_currency.clone());
6127
6128            // Build a minimal rate table populated with approximate rates from
6129            // the FX model base rates (USD-based) so we can do the translation.
6130            let mut rate_table = FxRateTable::new(&presentation_currency);
6131
6132            // Populate with base rates against USD; if presentation_currency is
6133            // not USD we do a best-effort two-step conversion using the table's
6134            // triangulation support.
6135            let base_rates = base_rates_usd();
6136            for (ccy, rate) in &base_rates {
6137                rate_table.add_rate(FxRate::new(
6138                    ccy,
6139                    "USD",
6140                    RateType::Closing,
6141                    end_date,
6142                    *rate,
6143                    "SYNTHETIC",
6144                ));
6145                // Average rate = 98% of closing (approximation).
6146                // 0.98 = 98/100 = Decimal::new(98, 2)
6147                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6148                rate_table.add_rate(FxRate::new(
6149                    ccy,
6150                    "USD",
6151                    RateType::Average,
6152                    end_date,
6153                    avg,
6154                    "SYNTHETIC",
6155                ));
6156            }
6157
6158            let mut translation_results = Vec::new();
6159            for company in &self.config.companies {
6160                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6161                // to ensure the translation produces non-trivial CTA amounts.
6162                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6163                    .max(rust_decimal::Decimal::from(100_000_u32));
6164
6165                let func_ccy = company
6166                    .functional_currency
6167                    .clone()
6168                    .unwrap_or_else(|| company.currency.clone());
6169
6170                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6171                    &company.code,
6172                    &func_ccy,
6173                    &presentation_currency,
6174                    &ias21_period_label,
6175                    end_date,
6176                    company_revenue,
6177                    &rate_table,
6178                );
6179                translation_results.push(result);
6180            }
6181
6182            snapshot.currency_translation_count = translation_results.len();
6183            snapshot.currency_translation_results = translation_results;
6184        }
6185
6186        stats.revenue_contract_count = snapshot.revenue_contract_count;
6187        stats.impairment_test_count = snapshot.impairment_test_count;
6188        stats.business_combination_count = snapshot.business_combination_count;
6189        stats.ecl_model_count = snapshot.ecl_model_count;
6190        stats.provision_count = snapshot.provision_count;
6191
6192        info!(
6193            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6194            snapshot.revenue_contract_count,
6195            snapshot.impairment_test_count,
6196            snapshot.business_combination_count,
6197            snapshot.ecl_model_count,
6198            snapshot.provision_count,
6199            snapshot.currency_translation_count
6200        );
6201        self.check_resources_with_log("post-accounting-standards")?;
6202
6203        Ok(snapshot)
6204    }
6205
6206    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6207    fn phase_manufacturing(
6208        &mut self,
6209        stats: &mut EnhancedGenerationStatistics,
6210    ) -> SynthResult<ManufacturingSnapshot> {
6211        if !self.phase_config.generate_manufacturing {
6212            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6213            return Ok(ManufacturingSnapshot::default());
6214        }
6215        info!("Phase 18: Generating Manufacturing Data");
6216
6217        let seed = self.seed;
6218        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6219            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6220        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6221        let company_code = self
6222            .config
6223            .companies
6224            .first()
6225            .map(|c| c.code.as_str())
6226            .unwrap_or("1000");
6227
6228        let material_data: Vec<(String, String)> = self
6229            .master_data
6230            .materials
6231            .iter()
6232            .map(|m| (m.material_id.clone(), m.description.clone()))
6233            .collect();
6234
6235        if material_data.is_empty() {
6236            debug!("Phase 18: Skipped (no materials available)");
6237            return Ok(ManufacturingSnapshot::default());
6238        }
6239
6240        let mut snapshot = ManufacturingSnapshot::default();
6241
6242        // Generate production orders
6243        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
6244        let production_orders = prod_gen.generate(
6245            company_code,
6246            &material_data,
6247            start_date,
6248            end_date,
6249            &self.config.manufacturing.production_orders,
6250            &self.config.manufacturing.costing,
6251            &self.config.manufacturing.routing,
6252        );
6253        snapshot.production_order_count = production_orders.len();
6254
6255        // Generate quality inspections from production orders
6256        let inspection_data: Vec<(String, String, String)> = production_orders
6257            .iter()
6258            .map(|po| {
6259                (
6260                    po.order_id.clone(),
6261                    po.material_id.clone(),
6262                    po.material_description.clone(),
6263                )
6264            })
6265            .collect();
6266
6267        snapshot.production_orders = production_orders;
6268
6269        if !inspection_data.is_empty() {
6270            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
6271            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6272            snapshot.quality_inspection_count = inspections.len();
6273            snapshot.quality_inspections = inspections;
6274        }
6275
6276        // Generate cycle counts (one per month)
6277        let storage_locations: Vec<(String, String)> = material_data
6278            .iter()
6279            .enumerate()
6280            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6281            .collect();
6282
6283        let employee_ids: Vec<String> = self
6284            .master_data
6285            .employees
6286            .iter()
6287            .map(|e| e.employee_id.clone())
6288            .collect();
6289        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
6290            .with_employee_pool(employee_ids);
6291        let mut cycle_count_total = 0usize;
6292        for month in 0..self.config.global.period_months {
6293            let count_date = start_date + chrono::Months::new(month);
6294            let items_per_count = storage_locations.len().clamp(10, 50);
6295            let cc = cc_gen.generate(
6296                company_code,
6297                &storage_locations,
6298                count_date,
6299                items_per_count,
6300            );
6301            snapshot.cycle_counts.push(cc);
6302            cycle_count_total += 1;
6303        }
6304        snapshot.cycle_count_count = cycle_count_total;
6305
6306        // Generate BOM components
6307        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
6308        let bom_components = bom_gen.generate(company_code, &material_data);
6309        snapshot.bom_component_count = bom_components.len();
6310        snapshot.bom_components = bom_components;
6311
6312        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6313        let currency = self
6314            .config
6315            .companies
6316            .first()
6317            .map(|c| c.currency.as_str())
6318            .unwrap_or("USD");
6319        let production_order_ids: Vec<String> = snapshot
6320            .production_orders
6321            .iter()
6322            .map(|po| po.order_id.clone())
6323            .collect();
6324        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
6325        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6326            company_code,
6327            &material_data,
6328            start_date,
6329            end_date,
6330            2,
6331            currency,
6332            &production_order_ids,
6333        );
6334        snapshot.inventory_movement_count = inventory_movements.len();
6335        snapshot.inventory_movements = inventory_movements;
6336
6337        stats.production_order_count = snapshot.production_order_count;
6338        stats.quality_inspection_count = snapshot.quality_inspection_count;
6339        stats.cycle_count_count = snapshot.cycle_count_count;
6340        stats.bom_component_count = snapshot.bom_component_count;
6341        stats.inventory_movement_count = snapshot.inventory_movement_count;
6342
6343        info!(
6344            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6345            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6346            snapshot.bom_component_count, snapshot.inventory_movement_count
6347        );
6348        self.check_resources_with_log("post-manufacturing")?;
6349
6350        Ok(snapshot)
6351    }
6352
6353    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6354    fn phase_sales_kpi_budgets(
6355        &mut self,
6356        coa: &Arc<ChartOfAccounts>,
6357        financial_reporting: &FinancialReportingSnapshot,
6358        stats: &mut EnhancedGenerationStatistics,
6359    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6360        if !self.phase_config.generate_sales_kpi_budgets {
6361            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6362            return Ok(SalesKpiBudgetsSnapshot::default());
6363        }
6364        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6365
6366        let seed = self.seed;
6367        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6368            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6369        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6370        let company_code = self
6371            .config
6372            .companies
6373            .first()
6374            .map(|c| c.code.as_str())
6375            .unwrap_or("1000");
6376
6377        let mut snapshot = SalesKpiBudgetsSnapshot::default();
6378
6379        // Sales Quotes
6380        if self.config.sales_quotes.enabled {
6381            let customer_data: Vec<(String, String)> = self
6382                .master_data
6383                .customers
6384                .iter()
6385                .map(|c| (c.customer_id.clone(), c.name.clone()))
6386                .collect();
6387            let material_data: Vec<(String, String)> = self
6388                .master_data
6389                .materials
6390                .iter()
6391                .map(|m| (m.material_id.clone(), m.description.clone()))
6392                .collect();
6393
6394            if !customer_data.is_empty() && !material_data.is_empty() {
6395                let employee_ids: Vec<String> = self
6396                    .master_data
6397                    .employees
6398                    .iter()
6399                    .map(|e| e.employee_id.clone())
6400                    .collect();
6401                let customer_ids: Vec<String> = self
6402                    .master_data
6403                    .customers
6404                    .iter()
6405                    .map(|c| c.customer_id.clone())
6406                    .collect();
6407                let company_currency = self
6408                    .config
6409                    .companies
6410                    .first()
6411                    .map(|c| c.currency.as_str())
6412                    .unwrap_or("USD");
6413
6414                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6415                    .with_pools(employee_ids, customer_ids);
6416                let quotes = quote_gen.generate_with_currency(
6417                    company_code,
6418                    &customer_data,
6419                    &material_data,
6420                    start_date,
6421                    end_date,
6422                    &self.config.sales_quotes,
6423                    company_currency,
6424                );
6425                snapshot.sales_quote_count = quotes.len();
6426                snapshot.sales_quotes = quotes;
6427            }
6428        }
6429
6430        // Management KPIs
6431        if self.config.financial_reporting.management_kpis.enabled {
6432            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6433            let mut kpis = kpi_gen.generate(
6434                company_code,
6435                start_date,
6436                end_date,
6437                &self.config.financial_reporting.management_kpis,
6438            );
6439
6440            // Override financial KPIs with actual data from financial statements
6441            {
6442                use rust_decimal::Decimal;
6443
6444                if let Some(income_stmt) =
6445                    financial_reporting.financial_statements.iter().find(|fs| {
6446                        fs.statement_type == StatementType::IncomeStatement
6447                            && fs.company_code == company_code
6448                    })
6449                {
6450                    // Extract revenue and COGS from income statement line items
6451                    let total_revenue: Decimal = income_stmt
6452                        .line_items
6453                        .iter()
6454                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
6455                        .map(|li| li.amount)
6456                        .sum();
6457                    let total_cogs: Decimal = income_stmt
6458                        .line_items
6459                        .iter()
6460                        .filter(|li| {
6461                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6462                                && !li.is_total
6463                        })
6464                        .map(|li| li.amount.abs())
6465                        .sum();
6466                    let total_opex: Decimal = income_stmt
6467                        .line_items
6468                        .iter()
6469                        .filter(|li| {
6470                            li.section.contains("Expense")
6471                                && !li.is_total
6472                                && !li.section.contains("Cost")
6473                        })
6474                        .map(|li| li.amount.abs())
6475                        .sum();
6476
6477                    if total_revenue > Decimal::ZERO {
6478                        let hundred = Decimal::from(100);
6479                        let gross_margin_pct =
6480                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6481                        let operating_income = total_revenue - total_cogs - total_opex;
6482                        let op_margin_pct =
6483                            (operating_income * hundred / total_revenue).round_dp(2);
6484
6485                        // Override gross margin and operating margin KPIs
6486                        for kpi in &mut kpis {
6487                            if kpi.name == "Gross Margin" {
6488                                kpi.value = gross_margin_pct;
6489                            } else if kpi.name == "Operating Margin" {
6490                                kpi.value = op_margin_pct;
6491                            }
6492                        }
6493                    }
6494                }
6495
6496                // Override Current Ratio from balance sheet
6497                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6498                    fs.statement_type == StatementType::BalanceSheet
6499                        && fs.company_code == company_code
6500                }) {
6501                    let current_assets: Decimal = bs
6502                        .line_items
6503                        .iter()
6504                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6505                        .map(|li| li.amount)
6506                        .sum();
6507                    let current_liabilities: Decimal = bs
6508                        .line_items
6509                        .iter()
6510                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6511                        .map(|li| li.amount.abs())
6512                        .sum();
6513
6514                    if current_liabilities > Decimal::ZERO {
6515                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
6516                        for kpi in &mut kpis {
6517                            if kpi.name == "Current Ratio" {
6518                                kpi.value = current_ratio;
6519                            }
6520                        }
6521                    }
6522                }
6523            }
6524
6525            snapshot.kpi_count = kpis.len();
6526            snapshot.kpis = kpis;
6527        }
6528
6529        // Budgets
6530        if self.config.financial_reporting.budgets.enabled {
6531            let account_data: Vec<(String, String)> = coa
6532                .accounts
6533                .iter()
6534                .map(|a| (a.account_number.clone(), a.short_description.clone()))
6535                .collect();
6536
6537            if !account_data.is_empty() {
6538                let fiscal_year = start_date.year() as u32;
6539                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6540                let budget = budget_gen.generate(
6541                    company_code,
6542                    fiscal_year,
6543                    &account_data,
6544                    &self.config.financial_reporting.budgets,
6545                );
6546                snapshot.budget_line_count = budget.line_items.len();
6547                snapshot.budgets.push(budget);
6548            }
6549        }
6550
6551        stats.sales_quote_count = snapshot.sales_quote_count;
6552        stats.kpi_count = snapshot.kpi_count;
6553        stats.budget_line_count = snapshot.budget_line_count;
6554
6555        info!(
6556            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6557            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6558        );
6559        self.check_resources_with_log("post-sales-kpi-budgets")?;
6560
6561        Ok(snapshot)
6562    }
6563
6564    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
6565    fn phase_tax_generation(
6566        &mut self,
6567        document_flows: &DocumentFlowSnapshot,
6568        journal_entries: &[JournalEntry],
6569        stats: &mut EnhancedGenerationStatistics,
6570    ) -> SynthResult<TaxSnapshot> {
6571        if !self.phase_config.generate_tax {
6572            debug!("Phase 20: Skipped (tax generation disabled)");
6573            return Ok(TaxSnapshot::default());
6574        }
6575        info!("Phase 20: Generating Tax Data");
6576
6577        let seed = self.seed;
6578        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6579            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6580        let fiscal_year = start_date.year();
6581        let company_code = self
6582            .config
6583            .companies
6584            .first()
6585            .map(|c| c.code.as_str())
6586            .unwrap_or("1000");
6587
6588        let mut gen =
6589            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6590
6591        let pack = self.primary_pack().clone();
6592        let (jurisdictions, codes) =
6593            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6594
6595        // Generate tax provisions for each company
6596        let mut provisions = Vec::new();
6597        if self.config.tax.provisions.enabled {
6598            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6599            for company in &self.config.companies {
6600                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6601                let statutory_rate = rust_decimal::Decimal::new(
6602                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6603                    2,
6604                );
6605                let provision = provision_gen.generate(
6606                    &company.code,
6607                    start_date,
6608                    pre_tax_income,
6609                    statutory_rate,
6610                );
6611                provisions.push(provision);
6612            }
6613        }
6614
6615        // Generate tax lines from document invoices
6616        let mut tax_lines = Vec::new();
6617        if !codes.is_empty() {
6618            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6619                datasynth_generators::TaxLineGeneratorConfig::default(),
6620                codes.clone(),
6621                seed + 72,
6622            );
6623
6624            // Tax lines from vendor invoices (input tax)
6625            // Use the first company's country as buyer country
6626            let buyer_country = self
6627                .config
6628                .companies
6629                .first()
6630                .map(|c| c.country.as_str())
6631                .unwrap_or("US");
6632            for vi in &document_flows.vendor_invoices {
6633                let lines = tax_line_gen.generate_for_document(
6634                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
6635                    &vi.header.document_id,
6636                    buyer_country, // seller approx same country
6637                    buyer_country,
6638                    vi.payable_amount,
6639                    vi.header.document_date,
6640                    None,
6641                );
6642                tax_lines.extend(lines);
6643            }
6644
6645            // Tax lines from customer invoices (output tax)
6646            for ci in &document_flows.customer_invoices {
6647                let lines = tax_line_gen.generate_for_document(
6648                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6649                    &ci.header.document_id,
6650                    buyer_country, // seller is the company
6651                    buyer_country,
6652                    ci.total_gross_amount,
6653                    ci.header.document_date,
6654                    None,
6655                );
6656                tax_lines.extend(lines);
6657            }
6658        }
6659
6660        // Generate deferred tax data (IAS 12 / ASC 740) for each company
6661        let deferred_tax = {
6662            let companies: Vec<(&str, &str)> = self
6663                .config
6664                .companies
6665                .iter()
6666                .map(|c| (c.code.as_str(), c.country.as_str()))
6667                .collect();
6668            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6669            deferred_gen.generate(&companies, start_date, journal_entries)
6670        };
6671
6672        let snapshot = TaxSnapshot {
6673            jurisdiction_count: jurisdictions.len(),
6674            code_count: codes.len(),
6675            jurisdictions,
6676            codes,
6677            tax_provisions: provisions,
6678            tax_lines,
6679            tax_returns: Vec::new(),
6680            withholding_records: Vec::new(),
6681            tax_anomaly_labels: Vec::new(),
6682            deferred_tax,
6683        };
6684
6685        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6686        stats.tax_code_count = snapshot.code_count;
6687        stats.tax_provision_count = snapshot.tax_provisions.len();
6688        stats.tax_line_count = snapshot.tax_lines.len();
6689
6690        info!(
6691            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6692            snapshot.jurisdiction_count,
6693            snapshot.code_count,
6694            snapshot.tax_provisions.len(),
6695            snapshot.deferred_tax.temporary_differences.len(),
6696            snapshot.deferred_tax.journal_entries.len(),
6697        );
6698        self.check_resources_with_log("post-tax")?;
6699
6700        Ok(snapshot)
6701    }
6702
6703    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
6704    fn phase_esg_generation(
6705        &mut self,
6706        document_flows: &DocumentFlowSnapshot,
6707        stats: &mut EnhancedGenerationStatistics,
6708    ) -> SynthResult<EsgSnapshot> {
6709        if !self.phase_config.generate_esg {
6710            debug!("Phase 21: Skipped (ESG generation disabled)");
6711            return Ok(EsgSnapshot::default());
6712        }
6713        let degradation = self.check_resources()?;
6714        if degradation >= DegradationLevel::Reduced {
6715            debug!(
6716                "Phase skipped due to resource pressure (degradation: {:?})",
6717                degradation
6718            );
6719            return Ok(EsgSnapshot::default());
6720        }
6721        info!("Phase 21: Generating ESG Data");
6722
6723        let seed = self.seed;
6724        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6725            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6726        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6727        let entity_id = self
6728            .config
6729            .companies
6730            .first()
6731            .map(|c| c.code.as_str())
6732            .unwrap_or("1000");
6733
6734        let esg_cfg = &self.config.esg;
6735        let mut snapshot = EsgSnapshot::default();
6736
6737        // Energy consumption (feeds into scope 1 & 2 emissions)
6738        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6739            esg_cfg.environmental.energy.clone(),
6740            seed + 80,
6741        );
6742        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6743
6744        // Water usage
6745        let facility_count = esg_cfg.environmental.energy.facility_count;
6746        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6747        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6748
6749        // Waste
6750        let mut waste_gen = datasynth_generators::WasteGenerator::new(
6751            seed + 82,
6752            esg_cfg.environmental.waste.diversion_target,
6753            facility_count,
6754        );
6755        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6756
6757        // Emissions (scope 1, 2, 3)
6758        let mut emission_gen =
6759            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6760
6761        // Build EnergyInput from energy_records
6762        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6763            .iter()
6764            .map(|e| datasynth_generators::EnergyInput {
6765                facility_id: e.facility_id.clone(),
6766                energy_type: match e.energy_source {
6767                    EnergySourceType::NaturalGas => {
6768                        datasynth_generators::EnergyInputType::NaturalGas
6769                    }
6770                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6771                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6772                    _ => datasynth_generators::EnergyInputType::Electricity,
6773                },
6774                consumption_kwh: e.consumption_kwh,
6775                period: e.period,
6776            })
6777            .collect();
6778
6779        let mut emissions = Vec::new();
6780        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6781        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6782
6783        // Scope 3: use vendor spend data from actual payments
6784        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6785            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6786            for payment in &document_flows.payments {
6787                if payment.is_vendor {
6788                    *totals
6789                        .entry(payment.business_partner_id.clone())
6790                        .or_default() += payment.amount;
6791                }
6792            }
6793            totals
6794        };
6795        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6796            .master_data
6797            .vendors
6798            .iter()
6799            .map(|v| {
6800                let spend = vendor_payment_totals
6801                    .get(&v.vendor_id)
6802                    .copied()
6803                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6804                datasynth_generators::VendorSpendInput {
6805                    vendor_id: v.vendor_id.clone(),
6806                    category: format!("{:?}", v.vendor_type).to_lowercase(),
6807                    spend,
6808                    country: v.country.clone(),
6809                }
6810            })
6811            .collect();
6812        if !vendor_spend.is_empty() {
6813            emissions.extend(emission_gen.generate_scope3_purchased_goods(
6814                entity_id,
6815                &vendor_spend,
6816                start_date,
6817                end_date,
6818            ));
6819        }
6820
6821        // Business travel & commuting (scope 3)
6822        let headcount = self.master_data.employees.len() as u32;
6823        if headcount > 0 {
6824            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6825            emissions.extend(emission_gen.generate_scope3_business_travel(
6826                entity_id,
6827                travel_spend,
6828                start_date,
6829            ));
6830            emissions
6831                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6832        }
6833
6834        snapshot.emission_count = emissions.len();
6835        snapshot.emissions = emissions;
6836        snapshot.energy = energy_records;
6837
6838        // Social: Workforce diversity, pay equity, safety
6839        let mut workforce_gen =
6840            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6841        let total_headcount = headcount.max(100);
6842        snapshot.diversity =
6843            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6844        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6845        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6846            entity_id,
6847            facility_count,
6848            start_date,
6849            end_date,
6850        );
6851
6852        // Compute safety metrics
6853        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
6854        let safety_metric = workforce_gen.compute_safety_metrics(
6855            entity_id,
6856            &snapshot.safety_incidents,
6857            total_hours,
6858            start_date,
6859        );
6860        snapshot.safety_metrics = vec![safety_metric];
6861
6862        // Governance
6863        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6864            seed + 85,
6865            esg_cfg.governance.board_size,
6866            esg_cfg.governance.independence_target,
6867        );
6868        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6869
6870        // Supplier ESG assessments
6871        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6872            esg_cfg.supply_chain_esg.clone(),
6873            seed + 86,
6874        );
6875        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6876            .master_data
6877            .vendors
6878            .iter()
6879            .map(|v| datasynth_generators::VendorInput {
6880                vendor_id: v.vendor_id.clone(),
6881                country: v.country.clone(),
6882                industry: format!("{:?}", v.vendor_type).to_lowercase(),
6883                quality_score: None,
6884            })
6885            .collect();
6886        snapshot.supplier_assessments =
6887            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6888
6889        // Disclosures
6890        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6891            seed + 87,
6892            esg_cfg.reporting.clone(),
6893            esg_cfg.climate_scenarios.clone(),
6894        );
6895        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6896        snapshot.disclosures = disclosure_gen.generate_disclosures(
6897            entity_id,
6898            &snapshot.materiality,
6899            start_date,
6900            end_date,
6901        );
6902        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6903        snapshot.disclosure_count = snapshot.disclosures.len();
6904
6905        // Anomaly injection
6906        if esg_cfg.anomaly_rate > 0.0 {
6907            let mut anomaly_injector =
6908                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6909            let mut labels = Vec::new();
6910            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6911            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6912            labels.extend(
6913                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6914            );
6915            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6916            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6917            snapshot.anomaly_labels = labels;
6918        }
6919
6920        stats.esg_emission_count = snapshot.emission_count;
6921        stats.esg_disclosure_count = snapshot.disclosure_count;
6922
6923        info!(
6924            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6925            snapshot.emission_count,
6926            snapshot.disclosure_count,
6927            snapshot.supplier_assessments.len()
6928        );
6929        self.check_resources_with_log("post-esg")?;
6930
6931        Ok(snapshot)
6932    }
6933
6934    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
6935    fn phase_treasury_data(
6936        &mut self,
6937        document_flows: &DocumentFlowSnapshot,
6938        subledger: &SubledgerSnapshot,
6939        intercompany: &IntercompanySnapshot,
6940        stats: &mut EnhancedGenerationStatistics,
6941    ) -> SynthResult<TreasurySnapshot> {
6942        if !self.phase_config.generate_treasury {
6943            debug!("Phase 22: Skipped (treasury generation disabled)");
6944            return Ok(TreasurySnapshot::default());
6945        }
6946        let degradation = self.check_resources()?;
6947        if degradation >= DegradationLevel::Reduced {
6948            debug!(
6949                "Phase skipped due to resource pressure (degradation: {:?})",
6950                degradation
6951            );
6952            return Ok(TreasurySnapshot::default());
6953        }
6954        info!("Phase 22: Generating Treasury Data");
6955
6956        let seed = self.seed;
6957        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6958            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6959        let currency = self
6960            .config
6961            .companies
6962            .first()
6963            .map(|c| c.currency.as_str())
6964            .unwrap_or("USD");
6965        let entity_id = self
6966            .config
6967            .companies
6968            .first()
6969            .map(|c| c.code.as_str())
6970            .unwrap_or("1000");
6971
6972        let mut snapshot = TreasurySnapshot::default();
6973
6974        // Generate debt instruments
6975        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6976            self.config.treasury.debt.clone(),
6977            seed + 90,
6978        );
6979        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6980
6981        // Generate hedging instruments (IR swaps for floating-rate debt)
6982        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6983            self.config.treasury.hedging.clone(),
6984            seed + 91,
6985        );
6986        for debt in &snapshot.debt_instruments {
6987            if debt.rate_type == InterestRateType::Variable {
6988                let swap = hedge_gen.generate_ir_swap(
6989                    currency,
6990                    debt.principal,
6991                    debt.origination_date,
6992                    debt.maturity_date,
6993                );
6994                snapshot.hedging_instruments.push(swap);
6995            }
6996        }
6997
6998        // Build FX exposures from foreign-currency payments and generate
6999        // FX forwards + hedge relationship designations via generate() API.
7000        {
7001            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7002            for payment in &document_flows.payments {
7003                if payment.currency != currency {
7004                    let entry = fx_map
7005                        .entry(payment.currency.clone())
7006                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7007                    entry.0 += payment.amount;
7008                    // Use the latest settlement date among grouped payments
7009                    if payment.header.document_date > entry.1 {
7010                        entry.1 = payment.header.document_date;
7011                    }
7012                }
7013            }
7014            if !fx_map.is_empty() {
7015                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7016                    .into_iter()
7017                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7018                        datasynth_generators::treasury::FxExposure {
7019                            currency_pair: format!("{foreign_ccy}/{currency}"),
7020                            foreign_currency: foreign_ccy,
7021                            net_amount,
7022                            settlement_date,
7023                            description: "AP payment FX exposure".to_string(),
7024                        }
7025                    })
7026                    .collect();
7027                let (fx_instruments, fx_relationships) =
7028                    hedge_gen.generate(start_date, &fx_exposures);
7029                snapshot.hedging_instruments.extend(fx_instruments);
7030                snapshot.hedge_relationships.extend(fx_relationships);
7031            }
7032        }
7033
7034        // Inject anomalies if configured
7035        if self.config.treasury.anomaly_rate > 0.0 {
7036            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7037                seed + 92,
7038                self.config.treasury.anomaly_rate,
7039            );
7040            let mut labels = Vec::new();
7041            labels.extend(
7042                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7043            );
7044            snapshot.treasury_anomaly_labels = labels;
7045        }
7046
7047        // Generate cash positions from payment flows
7048        if self.config.treasury.cash_positioning.enabled {
7049            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7050
7051            // AP payments as outflows
7052            for payment in &document_flows.payments {
7053                cash_flows.push(datasynth_generators::treasury::CashFlow {
7054                    date: payment.header.document_date,
7055                    account_id: format!("{entity_id}-MAIN"),
7056                    amount: payment.amount,
7057                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7058                });
7059            }
7060
7061            // Customer receipts (from O2C chains) as inflows
7062            for chain in &document_flows.o2c_chains {
7063                if let Some(ref receipt) = chain.customer_receipt {
7064                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7065                        date: receipt.header.document_date,
7066                        account_id: format!("{entity_id}-MAIN"),
7067                        amount: receipt.amount,
7068                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7069                    });
7070                }
7071                // Remainder receipts (follow-up to partial payments)
7072                for receipt in &chain.remainder_receipts {
7073                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7074                        date: receipt.header.document_date,
7075                        account_id: format!("{entity_id}-MAIN"),
7076                        amount: receipt.amount,
7077                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7078                    });
7079                }
7080            }
7081
7082            if !cash_flows.is_empty() {
7083                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7084                    self.config.treasury.cash_positioning.clone(),
7085                    seed + 93,
7086                );
7087                let account_id = format!("{entity_id}-MAIN");
7088                snapshot.cash_positions = cash_gen.generate(
7089                    entity_id,
7090                    &account_id,
7091                    currency,
7092                    &cash_flows,
7093                    start_date,
7094                    start_date + chrono::Months::new(self.config.global.period_months),
7095                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7096                );
7097            }
7098        }
7099
7100        // Generate cash forecasts from AR/AP aging
7101        if self.config.treasury.cash_forecasting.enabled {
7102            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7103
7104            // Build AR aging items from subledger AR invoices
7105            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7106                .ar_invoices
7107                .iter()
7108                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7109                .map(|inv| {
7110                    let days_past_due = if inv.due_date < end_date {
7111                        (end_date - inv.due_date).num_days().max(0) as u32
7112                    } else {
7113                        0
7114                    };
7115                    datasynth_generators::treasury::ArAgingItem {
7116                        expected_date: inv.due_date,
7117                        amount: inv.amount_remaining,
7118                        days_past_due,
7119                        document_id: inv.invoice_number.clone(),
7120                    }
7121                })
7122                .collect();
7123
7124            // Build AP aging items from subledger AP invoices
7125            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7126                .ap_invoices
7127                .iter()
7128                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7129                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7130                    payment_date: inv.due_date,
7131                    amount: inv.amount_remaining,
7132                    document_id: inv.invoice_number.clone(),
7133                })
7134                .collect();
7135
7136            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7137                self.config.treasury.cash_forecasting.clone(),
7138                seed + 94,
7139            );
7140            let forecast = forecast_gen.generate(
7141                entity_id,
7142                currency,
7143                end_date,
7144                &ar_items,
7145                &ap_items,
7146                &[], // scheduled disbursements - empty for now
7147            );
7148            snapshot.cash_forecasts.push(forecast);
7149        }
7150
7151        // Generate cash pools and sweeps
7152        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7153            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7154            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7155                self.config.treasury.cash_pooling.clone(),
7156                seed + 95,
7157            );
7158
7159            // Create a pool from available accounts
7160            let account_ids: Vec<String> = snapshot
7161                .cash_positions
7162                .iter()
7163                .map(|cp| cp.bank_account_id.clone())
7164                .collect::<std::collections::HashSet<_>>()
7165                .into_iter()
7166                .collect();
7167
7168            if let Some(pool) =
7169                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7170            {
7171                // Generate sweeps - build participant balances from last cash position per account
7172                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7173                for cp in &snapshot.cash_positions {
7174                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7175                }
7176
7177                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7178                    latest_balances
7179                        .into_iter()
7180                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7181                        .map(
7182                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7183                                account_id: id,
7184                                balance,
7185                            },
7186                        )
7187                        .collect();
7188
7189                let sweeps =
7190                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7191                snapshot.cash_pool_sweeps = sweeps;
7192                snapshot.cash_pools.push(pool);
7193            }
7194        }
7195
7196        // Generate bank guarantees
7197        if self.config.treasury.bank_guarantees.enabled {
7198            let vendor_names: Vec<String> = self
7199                .master_data
7200                .vendors
7201                .iter()
7202                .map(|v| v.name.clone())
7203                .collect();
7204            if !vendor_names.is_empty() {
7205                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7206                    self.config.treasury.bank_guarantees.clone(),
7207                    seed + 96,
7208                );
7209                snapshot.bank_guarantees =
7210                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7211            }
7212        }
7213
7214        // Generate netting runs from intercompany matched pairs
7215        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7216            let entity_ids: Vec<String> = self
7217                .config
7218                .companies
7219                .iter()
7220                .map(|c| c.code.clone())
7221                .collect();
7222            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7223                .matched_pairs
7224                .iter()
7225                .map(|mp| {
7226                    (
7227                        mp.seller_company.clone(),
7228                        mp.buyer_company.clone(),
7229                        mp.amount,
7230                    )
7231                })
7232                .collect();
7233            if entity_ids.len() >= 2 {
7234                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7235                    self.config.treasury.netting.clone(),
7236                    seed + 97,
7237                );
7238                snapshot.netting_runs = netting_gen.generate(
7239                    &entity_ids,
7240                    currency,
7241                    start_date,
7242                    self.config.global.period_months,
7243                    &ic_amounts,
7244                );
7245            }
7246        }
7247
7248        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7249        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7250        stats.cash_position_count = snapshot.cash_positions.len();
7251        stats.cash_forecast_count = snapshot.cash_forecasts.len();
7252        stats.cash_pool_count = snapshot.cash_pools.len();
7253
7254        info!(
7255            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
7256            snapshot.debt_instruments.len(),
7257            snapshot.hedging_instruments.len(),
7258            snapshot.cash_positions.len(),
7259            snapshot.cash_forecasts.len(),
7260            snapshot.cash_pools.len(),
7261            snapshot.bank_guarantees.len(),
7262            snapshot.netting_runs.len(),
7263        );
7264        self.check_resources_with_log("post-treasury")?;
7265
7266        Ok(snapshot)
7267    }
7268
7269    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
7270    fn phase_project_accounting(
7271        &mut self,
7272        document_flows: &DocumentFlowSnapshot,
7273        hr: &HrSnapshot,
7274        stats: &mut EnhancedGenerationStatistics,
7275    ) -> SynthResult<ProjectAccountingSnapshot> {
7276        if !self.phase_config.generate_project_accounting {
7277            debug!("Phase 23: Skipped (project accounting disabled)");
7278            return Ok(ProjectAccountingSnapshot::default());
7279        }
7280        let degradation = self.check_resources()?;
7281        if degradation >= DegradationLevel::Reduced {
7282            debug!(
7283                "Phase skipped due to resource pressure (degradation: {:?})",
7284                degradation
7285            );
7286            return Ok(ProjectAccountingSnapshot::default());
7287        }
7288        info!("Phase 23: Generating Project Accounting Data");
7289
7290        let seed = self.seed;
7291        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7292            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7293        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7294        let company_code = self
7295            .config
7296            .companies
7297            .first()
7298            .map(|c| c.code.as_str())
7299            .unwrap_or("1000");
7300
7301        let mut snapshot = ProjectAccountingSnapshot::default();
7302
7303        // Generate projects with WBS hierarchies
7304        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7305            self.config.project_accounting.clone(),
7306            seed + 95,
7307        );
7308        let pool = project_gen.generate(company_code, start_date, end_date);
7309        snapshot.projects = pool.projects.clone();
7310
7311        // Link source documents to projects for cost allocation
7312        {
7313            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7314                Vec::new();
7315
7316            // Time entries
7317            for te in &hr.time_entries {
7318                let total_hours = te.hours_regular + te.hours_overtime;
7319                if total_hours > 0.0 {
7320                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7321                        id: te.entry_id.clone(),
7322                        entity_id: company_code.to_string(),
7323                        date: te.date,
7324                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7325                            .unwrap_or(rust_decimal::Decimal::ZERO),
7326                        source_type: CostSourceType::TimeEntry,
7327                        hours: Some(
7328                            rust_decimal::Decimal::from_f64_retain(total_hours)
7329                                .unwrap_or(rust_decimal::Decimal::ZERO),
7330                        ),
7331                    });
7332                }
7333            }
7334
7335            // Expense reports
7336            for er in &hr.expense_reports {
7337                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7338                    id: er.report_id.clone(),
7339                    entity_id: company_code.to_string(),
7340                    date: er.submission_date,
7341                    amount: er.total_amount,
7342                    source_type: CostSourceType::ExpenseReport,
7343                    hours: None,
7344                });
7345            }
7346
7347            // Purchase orders
7348            for po in &document_flows.purchase_orders {
7349                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7350                    id: po.header.document_id.clone(),
7351                    entity_id: company_code.to_string(),
7352                    date: po.header.document_date,
7353                    amount: po.total_net_amount,
7354                    source_type: CostSourceType::PurchaseOrder,
7355                    hours: None,
7356                });
7357            }
7358
7359            // Vendor invoices
7360            for vi in &document_flows.vendor_invoices {
7361                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7362                    id: vi.header.document_id.clone(),
7363                    entity_id: company_code.to_string(),
7364                    date: vi.header.document_date,
7365                    amount: vi.payable_amount,
7366                    source_type: CostSourceType::VendorInvoice,
7367                    hours: None,
7368                });
7369            }
7370
7371            if !source_docs.is_empty() && !pool.projects.is_empty() {
7372                let mut cost_gen =
7373                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
7374                        self.config.project_accounting.cost_allocation.clone(),
7375                        seed + 99,
7376                    );
7377                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7378            }
7379        }
7380
7381        // Generate change orders
7382        if self.config.project_accounting.change_orders.enabled {
7383            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7384                self.config.project_accounting.change_orders.clone(),
7385                seed + 96,
7386            );
7387            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7388        }
7389
7390        // Generate milestones
7391        if self.config.project_accounting.milestones.enabled {
7392            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7393                self.config.project_accounting.milestones.clone(),
7394                seed + 97,
7395            );
7396            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7397        }
7398
7399        // Generate earned value metrics (needs cost lines, so only if we have projects)
7400        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7401            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7402                self.config.project_accounting.earned_value.clone(),
7403                seed + 98,
7404            );
7405            snapshot.earned_value_metrics =
7406                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7407        }
7408
7409        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
7410        if self.config.project_accounting.revenue_recognition.enabled
7411            && !snapshot.projects.is_empty()
7412            && !snapshot.cost_lines.is_empty()
7413        {
7414            use datasynth_generators::project_accounting::RevenueGenerator;
7415            let rev_config = self.config.project_accounting.revenue_recognition.clone();
7416            let avg_contract_value =
7417                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
7418                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
7419
7420            // Build contract value tuples: only customer-type projects get revenue recognition.
7421            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
7422            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
7423                snapshot
7424                    .projects
7425                    .iter()
7426                    .filter(|p| {
7427                        matches!(
7428                            p.project_type,
7429                            datasynth_core::models::ProjectType::Customer
7430                        )
7431                    })
7432                    .map(|p| {
7433                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
7434                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
7435                        // budget × 1.25 → contract value
7436                        } else {
7437                            avg_contract_value
7438                        };
7439                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
7440                        (p.project_id.clone(), cv, etc)
7441                    })
7442                    .collect();
7443
7444            if !contract_values.is_empty() {
7445                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
7446                snapshot.revenue_records = rev_gen.generate(
7447                    &snapshot.projects,
7448                    &snapshot.cost_lines,
7449                    &contract_values,
7450                    start_date,
7451                    end_date,
7452                );
7453                debug!(
7454                    "Generated {} revenue recognition records for {} customer projects",
7455                    snapshot.revenue_records.len(),
7456                    contract_values.len()
7457                );
7458            }
7459        }
7460
7461        stats.project_count = snapshot.projects.len();
7462        stats.project_change_order_count = snapshot.change_orders.len();
7463        stats.project_cost_line_count = snapshot.cost_lines.len();
7464
7465        info!(
7466            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
7467            snapshot.projects.len(),
7468            snapshot.change_orders.len(),
7469            snapshot.milestones.len(),
7470            snapshot.earned_value_metrics.len()
7471        );
7472        self.check_resources_with_log("post-project-accounting")?;
7473
7474        Ok(snapshot)
7475    }
7476
7477    /// Phase 24: Generate process evolution and organizational events.
7478    fn phase_evolution_events(
7479        &mut self,
7480        stats: &mut EnhancedGenerationStatistics,
7481    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
7482        if !self.phase_config.generate_evolution_events {
7483            debug!("Phase 24: Skipped (evolution events disabled)");
7484            return Ok((Vec::new(), Vec::new()));
7485        }
7486        info!("Phase 24: Generating Process Evolution + Organizational Events");
7487
7488        let seed = self.seed;
7489        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7490            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7491        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7492
7493        // Process evolution events
7494        let mut proc_gen =
7495            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7496                seed + 100,
7497            );
7498        let process_events = proc_gen.generate_events(start_date, end_date);
7499
7500        // Organizational events
7501        let company_codes: Vec<String> = self
7502            .config
7503            .companies
7504            .iter()
7505            .map(|c| c.code.clone())
7506            .collect();
7507        let mut org_gen =
7508            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7509                seed + 101,
7510            );
7511        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7512
7513        stats.process_evolution_event_count = process_events.len();
7514        stats.organizational_event_count = org_events.len();
7515
7516        info!(
7517            "Evolution events generated: {} process evolution, {} organizational",
7518            process_events.len(),
7519            org_events.len()
7520        );
7521        self.check_resources_with_log("post-evolution-events")?;
7522
7523        Ok((process_events, org_events))
7524    }
7525
7526    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
7527    /// data recovery, and regulatory changes).
7528    fn phase_disruption_events(
7529        &self,
7530        stats: &mut EnhancedGenerationStatistics,
7531    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7532        if !self.config.organizational_events.enabled {
7533            debug!("Phase 24b: Skipped (organizational events disabled)");
7534            return Ok(Vec::new());
7535        }
7536        info!("Phase 24b: Generating Disruption Events");
7537
7538        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7539            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7540        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7541
7542        let company_codes: Vec<String> = self
7543            .config
7544            .companies
7545            .iter()
7546            .map(|c| c.code.clone())
7547            .collect();
7548
7549        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7550        let events = gen.generate(start_date, end_date, &company_codes);
7551
7552        stats.disruption_event_count = events.len();
7553        info!("Disruption events generated: {} events", events.len());
7554        self.check_resources_with_log("post-disruption-events")?;
7555
7556        Ok(events)
7557    }
7558
7559    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
7560    ///
7561    /// Produces paired examples where each pair contains the original clean JE
7562    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
7563    /// split transaction). Useful for training anomaly detection models with
7564    /// known ground truth.
7565    fn phase_counterfactuals(
7566        &self,
7567        journal_entries: &[JournalEntry],
7568        stats: &mut EnhancedGenerationStatistics,
7569    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7570        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7571            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7572            return Ok(Vec::new());
7573        }
7574        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7575
7576        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7577
7578        let mut gen = CounterfactualGenerator::new(self.seed + 110);
7579
7580        // Rotating set of specs to produce diverse mutation types
7581        let specs = [
7582            CounterfactualSpec::ScaleAmount { factor: 2.5 },
7583            CounterfactualSpec::ShiftDate { days: -14 },
7584            CounterfactualSpec::SelfApprove,
7585            CounterfactualSpec::SplitTransaction { split_count: 3 },
7586        ];
7587
7588        let pairs: Vec<_> = journal_entries
7589            .iter()
7590            .enumerate()
7591            .map(|(i, je)| {
7592                let spec = &specs[i % specs.len()];
7593                gen.generate(je, spec)
7594            })
7595            .collect();
7596
7597        stats.counterfactual_pair_count = pairs.len();
7598        info!(
7599            "Counterfactual pairs generated: {} pairs from {} journal entries",
7600            pairs.len(),
7601            journal_entries.len()
7602        );
7603        self.check_resources_with_log("post-counterfactuals")?;
7604
7605        Ok(pairs)
7606    }
7607
7608    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
7609    ///
7610    /// Uses the anomaly labels (from Phase 8) to determine which documents are
7611    /// fraudulent, then generates probabilistic red flags on all chain documents.
7612    /// Non-fraud documents also receive red flags at a lower rate (false positives)
7613    /// to produce realistic ML training data.
7614    fn phase_red_flags(
7615        &self,
7616        anomaly_labels: &AnomalyLabels,
7617        document_flows: &DocumentFlowSnapshot,
7618        stats: &mut EnhancedGenerationStatistics,
7619    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7620        if !self.config.fraud.enabled {
7621            debug!("Phase 26: Skipped (fraud generation disabled)");
7622            return Ok(Vec::new());
7623        }
7624        info!("Phase 26: Generating Fraud Red-Flag Indicators");
7625
7626        use datasynth_generators::fraud::RedFlagGenerator;
7627
7628        let generator = RedFlagGenerator::new();
7629        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7630
7631        // Build a set of document IDs that are known-fraudulent from anomaly labels.
7632        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7633            .labels
7634            .iter()
7635            .filter(|label| label.anomaly_type.is_intentional())
7636            .map(|label| label.document_id.as_str())
7637            .collect();
7638
7639        let mut flags = Vec::new();
7640
7641        // Iterate P2P chains: use the purchase order document ID as the chain key.
7642        for chain in &document_flows.p2p_chains {
7643            let doc_id = &chain.purchase_order.header.document_id;
7644            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7645            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7646        }
7647
7648        // Iterate O2C chains: use the sales order document ID as the chain key.
7649        for chain in &document_flows.o2c_chains {
7650            let doc_id = &chain.sales_order.header.document_id;
7651            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7652            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7653        }
7654
7655        stats.red_flag_count = flags.len();
7656        info!(
7657            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7658            flags.len(),
7659            document_flows.p2p_chains.len(),
7660            document_flows.o2c_chains.len(),
7661            fraud_doc_ids.len()
7662        );
7663        self.check_resources_with_log("post-red-flags")?;
7664
7665        Ok(flags)
7666    }
7667
7668    /// Phase 26b: Generate collusion rings from employee/vendor pools.
7669    ///
7670    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
7671    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
7672    /// advance them over the simulation period.
7673    fn phase_collusion_rings(
7674        &mut self,
7675        stats: &mut EnhancedGenerationStatistics,
7676    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7677        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7678            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7679            return Ok(Vec::new());
7680        }
7681        info!("Phase 26b: Generating Collusion Rings");
7682
7683        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7684            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7685        let months = self.config.global.period_months;
7686
7687        let employee_ids: Vec<String> = self
7688            .master_data
7689            .employees
7690            .iter()
7691            .map(|e| e.employee_id.clone())
7692            .collect();
7693        let vendor_ids: Vec<String> = self
7694            .master_data
7695            .vendors
7696            .iter()
7697            .map(|v| v.vendor_id.clone())
7698            .collect();
7699
7700        let mut generator =
7701            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7702        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7703
7704        stats.collusion_ring_count = rings.len();
7705        info!(
7706            "Collusion rings generated: {} rings, total members: {}",
7707            rings.len(),
7708            rings
7709                .iter()
7710                .map(datasynth_generators::fraud::CollusionRing::size)
7711                .sum::<usize>()
7712        );
7713        self.check_resources_with_log("post-collusion-rings")?;
7714
7715        Ok(rings)
7716    }
7717
7718    /// Phase 27: Generate bi-temporal version chains for vendor entities.
7719    ///
7720    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
7721    /// master data changes over time, supporting bi-temporal audit queries.
7722    fn phase_temporal_attributes(
7723        &mut self,
7724        stats: &mut EnhancedGenerationStatistics,
7725    ) -> SynthResult<
7726        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7727    > {
7728        if !self.config.temporal_attributes.enabled {
7729            debug!("Phase 27: Skipped (temporal attributes disabled)");
7730            return Ok(Vec::new());
7731        }
7732        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7733
7734        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7735            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7736
7737        // Build a TemporalAttributeConfig from the user's config.
7738        // Since Phase 27 is already gated on temporal_attributes.enabled,
7739        // default to enabling version chains so users get actual mutations.
7740        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7741            || self.config.temporal_attributes.enabled;
7742        let temporal_config = {
7743            let ta = &self.config.temporal_attributes;
7744            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7745                .enabled(ta.enabled)
7746                .closed_probability(ta.valid_time.closed_probability)
7747                .avg_validity_days(ta.valid_time.avg_validity_days)
7748                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7749                .with_version_chains(if generate_version_chains {
7750                    ta.avg_versions_per_entity
7751                } else {
7752                    1.0
7753                })
7754                .build()
7755        };
7756        // Apply backdating settings if configured
7757        let temporal_config = if self
7758            .config
7759            .temporal_attributes
7760            .transaction_time
7761            .allow_backdating
7762        {
7763            let mut c = temporal_config;
7764            c.transaction_time.allow_backdating = true;
7765            c.transaction_time.backdating_probability = self
7766                .config
7767                .temporal_attributes
7768                .transaction_time
7769                .backdating_probability;
7770            c.transaction_time.max_backdate_days = self
7771                .config
7772                .temporal_attributes
7773                .transaction_time
7774                .max_backdate_days;
7775            c
7776        } else {
7777            temporal_config
7778        };
7779        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7780            temporal_config,
7781            self.seed + 130,
7782            start_date,
7783        );
7784
7785        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7786            self.seed + 130,
7787            datasynth_core::GeneratorType::Vendor,
7788        );
7789
7790        let chains: Vec<_> = self
7791            .master_data
7792            .vendors
7793            .iter()
7794            .map(|vendor| {
7795                let id = uuid_factory.next();
7796                gen.generate_version_chain(vendor.clone(), id)
7797            })
7798            .collect();
7799
7800        stats.temporal_version_chain_count = chains.len();
7801        info!("Temporal version chains generated: {} chains", chains.len());
7802        self.check_resources_with_log("post-temporal-attributes")?;
7803
7804        Ok(chains)
7805    }
7806
7807    /// Phase 28: Build entity relationship graph and cross-process links.
7808    ///
7809    /// Part 1 (gated on `relationship_strength.enabled`): builds an
7810    /// `EntityGraph` from master-data vendor/customer entities and
7811    /// journal-entry-derived transaction summaries.
7812    ///
7813    /// Part 2 (gated on `cross_process_links.enabled`): extracts
7814    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
7815    /// generates inventory-movement cross-process links.
7816    fn phase_entity_relationships(
7817        &self,
7818        journal_entries: &[JournalEntry],
7819        document_flows: &DocumentFlowSnapshot,
7820        stats: &mut EnhancedGenerationStatistics,
7821    ) -> SynthResult<(
7822        Option<datasynth_core::models::EntityGraph>,
7823        Vec<datasynth_core::models::CrossProcessLink>,
7824    )> {
7825        use datasynth_generators::relationships::{
7826            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7827            TransactionSummary,
7828        };
7829
7830        let rs_enabled = self.config.relationship_strength.enabled;
7831        let cpl_enabled = self.config.cross_process_links.enabled
7832            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7833
7834        if !rs_enabled && !cpl_enabled {
7835            debug!(
7836                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7837            );
7838            return Ok((None, Vec::new()));
7839        }
7840
7841        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7842
7843        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7844            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7845
7846        let company_code = self
7847            .config
7848            .companies
7849            .first()
7850            .map(|c| c.code.as_str())
7851            .unwrap_or("1000");
7852
7853        // Build the generator with matching config flags
7854        let gen_config = EntityGraphConfig {
7855            enabled: rs_enabled,
7856            cross_process: datasynth_generators::relationships::CrossProcessConfig {
7857                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7858                enable_return_flows: false,
7859                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7860                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7861                // Use higher link rate for small datasets to avoid probabilistic empty results
7862                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7863                    1.0
7864                } else {
7865                    0.30
7866                },
7867                ..Default::default()
7868            },
7869            strength_config: datasynth_generators::relationships::StrengthConfig {
7870                transaction_volume_weight: self
7871                    .config
7872                    .relationship_strength
7873                    .calculation
7874                    .transaction_volume_weight,
7875                transaction_count_weight: self
7876                    .config
7877                    .relationship_strength
7878                    .calculation
7879                    .transaction_count_weight,
7880                duration_weight: self
7881                    .config
7882                    .relationship_strength
7883                    .calculation
7884                    .relationship_duration_weight,
7885                recency_weight: self.config.relationship_strength.calculation.recency_weight,
7886                mutual_connections_weight: self
7887                    .config
7888                    .relationship_strength
7889                    .calculation
7890                    .mutual_connections_weight,
7891                recency_half_life_days: self
7892                    .config
7893                    .relationship_strength
7894                    .calculation
7895                    .recency_half_life_days,
7896            },
7897            ..Default::default()
7898        };
7899
7900        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7901
7902        // --- Part 1: Entity Relationship Graph ---
7903        let entity_graph = if rs_enabled {
7904            // Build EntitySummary lists from master data
7905            let vendor_summaries: Vec<EntitySummary> = self
7906                .master_data
7907                .vendors
7908                .iter()
7909                .map(|v| {
7910                    EntitySummary::new(
7911                        &v.vendor_id,
7912                        &v.name,
7913                        datasynth_core::models::GraphEntityType::Vendor,
7914                        start_date,
7915                    )
7916                })
7917                .collect();
7918
7919            let customer_summaries: Vec<EntitySummary> = self
7920                .master_data
7921                .customers
7922                .iter()
7923                .map(|c| {
7924                    EntitySummary::new(
7925                        &c.customer_id,
7926                        &c.name,
7927                        datasynth_core::models::GraphEntityType::Customer,
7928                        start_date,
7929                    )
7930                })
7931                .collect();
7932
7933            // Build transaction summaries from journal entries.
7934            // Key = (company_code, trading_partner) for entries that have a
7935            // trading partner.  This captures intercompany flows and any JE
7936            // whose line items carry a trading_partner reference.
7937            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7938                std::collections::HashMap::new();
7939
7940            for je in journal_entries {
7941                let cc = je.header.company_code.clone();
7942                let posting_date = je.header.posting_date;
7943                for line in &je.lines {
7944                    if let Some(ref tp) = line.trading_partner {
7945                        let amount = if line.debit_amount > line.credit_amount {
7946                            line.debit_amount
7947                        } else {
7948                            line.credit_amount
7949                        };
7950                        let entry = txn_summaries
7951                            .entry((cc.clone(), tp.clone()))
7952                            .or_insert_with(|| TransactionSummary {
7953                                total_volume: rust_decimal::Decimal::ZERO,
7954                                transaction_count: 0,
7955                                first_transaction_date: posting_date,
7956                                last_transaction_date: posting_date,
7957                                related_entities: std::collections::HashSet::new(),
7958                            });
7959                        entry.total_volume += amount;
7960                        entry.transaction_count += 1;
7961                        if posting_date < entry.first_transaction_date {
7962                            entry.first_transaction_date = posting_date;
7963                        }
7964                        if posting_date > entry.last_transaction_date {
7965                            entry.last_transaction_date = posting_date;
7966                        }
7967                        entry.related_entities.insert(cc.clone());
7968                    }
7969                }
7970            }
7971
7972            // Also extract transaction relationships from document flow chains.
7973            // P2P chains: Company → Vendor relationships
7974            for chain in &document_flows.p2p_chains {
7975                let cc = chain.purchase_order.header.company_code.clone();
7976                let vendor_id = chain.purchase_order.vendor_id.clone();
7977                let po_date = chain.purchase_order.header.document_date;
7978                let amount = chain.purchase_order.total_net_amount;
7979
7980                let entry = txn_summaries
7981                    .entry((cc.clone(), vendor_id))
7982                    .or_insert_with(|| TransactionSummary {
7983                        total_volume: rust_decimal::Decimal::ZERO,
7984                        transaction_count: 0,
7985                        first_transaction_date: po_date,
7986                        last_transaction_date: po_date,
7987                        related_entities: std::collections::HashSet::new(),
7988                    });
7989                entry.total_volume += amount;
7990                entry.transaction_count += 1;
7991                if po_date < entry.first_transaction_date {
7992                    entry.first_transaction_date = po_date;
7993                }
7994                if po_date > entry.last_transaction_date {
7995                    entry.last_transaction_date = po_date;
7996                }
7997                entry.related_entities.insert(cc);
7998            }
7999
8000            // O2C chains: Company → Customer relationships
8001            for chain in &document_flows.o2c_chains {
8002                let cc = chain.sales_order.header.company_code.clone();
8003                let customer_id = chain.sales_order.customer_id.clone();
8004                let so_date = chain.sales_order.header.document_date;
8005                let amount = chain.sales_order.total_net_amount;
8006
8007                let entry = txn_summaries
8008                    .entry((cc.clone(), customer_id))
8009                    .or_insert_with(|| TransactionSummary {
8010                        total_volume: rust_decimal::Decimal::ZERO,
8011                        transaction_count: 0,
8012                        first_transaction_date: so_date,
8013                        last_transaction_date: so_date,
8014                        related_entities: std::collections::HashSet::new(),
8015                    });
8016                entry.total_volume += amount;
8017                entry.transaction_count += 1;
8018                if so_date < entry.first_transaction_date {
8019                    entry.first_transaction_date = so_date;
8020                }
8021                if so_date > entry.last_transaction_date {
8022                    entry.last_transaction_date = so_date;
8023                }
8024                entry.related_entities.insert(cc);
8025            }
8026
8027            let as_of_date = journal_entries
8028                .last()
8029                .map(|je| je.header.posting_date)
8030                .unwrap_or(start_date);
8031
8032            let graph = gen.generate_entity_graph(
8033                company_code,
8034                as_of_date,
8035                &vendor_summaries,
8036                &customer_summaries,
8037                &txn_summaries,
8038            );
8039
8040            info!(
8041                "Entity relationship graph: {} nodes, {} edges",
8042                graph.nodes.len(),
8043                graph.edges.len()
8044            );
8045            stats.entity_relationship_node_count = graph.nodes.len();
8046            stats.entity_relationship_edge_count = graph.edges.len();
8047            Some(graph)
8048        } else {
8049            None
8050        };
8051
8052        // --- Part 2: Cross-Process Links ---
8053        let cross_process_links = if cpl_enabled {
8054            // Build GoodsReceiptRef from P2P chains
8055            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8056                .p2p_chains
8057                .iter()
8058                .flat_map(|chain| {
8059                    let vendor_id = chain.purchase_order.vendor_id.clone();
8060                    let cc = chain.purchase_order.header.company_code.clone();
8061                    chain.goods_receipts.iter().flat_map(move |gr| {
8062                        gr.items.iter().filter_map({
8063                            let doc_id = gr.header.document_id.clone();
8064                            let v_id = vendor_id.clone();
8065                            let company = cc.clone();
8066                            let receipt_date = gr.header.document_date;
8067                            move |item| {
8068                                item.base
8069                                    .material_id
8070                                    .as_ref()
8071                                    .map(|mat_id| GoodsReceiptRef {
8072                                        document_id: doc_id.clone(),
8073                                        material_id: mat_id.clone(),
8074                                        quantity: item.base.quantity,
8075                                        receipt_date,
8076                                        vendor_id: v_id.clone(),
8077                                        company_code: company.clone(),
8078                                    })
8079                            }
8080                        })
8081                    })
8082                })
8083                .collect();
8084
8085            // Build DeliveryRef from O2C chains
8086            let del_refs: Vec<DeliveryRef> = document_flows
8087                .o2c_chains
8088                .iter()
8089                .flat_map(|chain| {
8090                    let customer_id = chain.sales_order.customer_id.clone();
8091                    let cc = chain.sales_order.header.company_code.clone();
8092                    chain.deliveries.iter().flat_map(move |del| {
8093                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8094                        del.items.iter().filter_map({
8095                            let doc_id = del.header.document_id.clone();
8096                            let c_id = customer_id.clone();
8097                            let company = cc.clone();
8098                            move |item| {
8099                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8100                                    document_id: doc_id.clone(),
8101                                    material_id: mat_id.clone(),
8102                                    quantity: item.base.quantity,
8103                                    delivery_date,
8104                                    customer_id: c_id.clone(),
8105                                    company_code: company.clone(),
8106                                })
8107                            }
8108                        })
8109                    })
8110                })
8111                .collect();
8112
8113            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8114            info!("Cross-process links generated: {} links", links.len());
8115            stats.cross_process_link_count = links.len();
8116            links
8117        } else {
8118            Vec::new()
8119        };
8120
8121        self.check_resources_with_log("post-entity-relationships")?;
8122        Ok((entity_graph, cross_process_links))
8123    }
8124
8125    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8126    fn phase_industry_data(
8127        &self,
8128        stats: &mut EnhancedGenerationStatistics,
8129    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8130        if !self.config.industry_specific.enabled {
8131            return None;
8132        }
8133        info!("Phase 29: Generating industry-specific data");
8134        let output = datasynth_generators::industry::factory::generate_industry_output(
8135            self.config.global.industry,
8136        );
8137        stats.industry_gl_account_count = output.gl_accounts.len();
8138        info!(
8139            "Industry data generated: {} GL accounts for {:?}",
8140            output.gl_accounts.len(),
8141            self.config.global.industry
8142        );
8143        Some(output)
8144    }
8145
8146    /// Phase 3b: Generate opening balances for each company.
8147    fn phase_opening_balances(
8148        &mut self,
8149        coa: &Arc<ChartOfAccounts>,
8150        stats: &mut EnhancedGenerationStatistics,
8151    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8152        if !self.config.balance.generate_opening_balances {
8153            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8154            return Ok(Vec::new());
8155        }
8156        info!("Phase 3b: Generating Opening Balances");
8157
8158        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8159            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8160        let fiscal_year = start_date.year();
8161
8162        let industry = match self.config.global.industry {
8163            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8164            IndustrySector::Retail => IndustryType::Retail,
8165            IndustrySector::FinancialServices => IndustryType::Financial,
8166            IndustrySector::Healthcare => IndustryType::Healthcare,
8167            IndustrySector::Technology => IndustryType::Technology,
8168            _ => IndustryType::Manufacturing,
8169        };
8170
8171        let config = datasynth_generators::OpeningBalanceConfig {
8172            industry,
8173            ..Default::default()
8174        };
8175        let mut gen =
8176            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8177
8178        let mut results = Vec::new();
8179        for company in &self.config.companies {
8180            let spec = OpeningBalanceSpec::new(
8181                company.code.clone(),
8182                start_date,
8183                fiscal_year,
8184                company.currency.clone(),
8185                rust_decimal::Decimal::new(10_000_000, 0),
8186                industry,
8187            );
8188            let ob = gen.generate(&spec, coa, start_date, &company.code);
8189            results.push(ob);
8190        }
8191
8192        stats.opening_balance_count = results.len();
8193        info!("Opening balances generated: {} companies", results.len());
8194        self.check_resources_with_log("post-opening-balances")?;
8195
8196        Ok(results)
8197    }
8198
8199    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8200    fn phase_subledger_reconciliation(
8201        &mut self,
8202        subledger: &SubledgerSnapshot,
8203        entries: &[JournalEntry],
8204        stats: &mut EnhancedGenerationStatistics,
8205    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8206        if !self.config.balance.reconcile_subledgers {
8207            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8208            return Ok(Vec::new());
8209        }
8210        info!("Phase 9b: Reconciling GL to subledger balances");
8211
8212        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8213            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8214            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8215
8216        // Build GL balance map from journal entries using a balance tracker
8217        let tracker_config = BalanceTrackerConfig {
8218            validate_on_each_entry: false,
8219            track_history: false,
8220            fail_on_validation_error: false,
8221            ..Default::default()
8222        };
8223        let recon_currency = self
8224            .config
8225            .companies
8226            .first()
8227            .map(|c| c.currency.clone())
8228            .unwrap_or_else(|| "USD".to_string());
8229        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8230        let validation_errors = tracker.apply_entries(entries);
8231        if !validation_errors.is_empty() {
8232            warn!(
8233                error_count = validation_errors.len(),
8234                "Balance tracker encountered validation errors during subledger reconciliation"
8235            );
8236            for err in &validation_errors {
8237                debug!("Balance validation error: {:?}", err);
8238            }
8239        }
8240
8241        let mut engine = datasynth_generators::ReconciliationEngine::new(
8242            datasynth_generators::ReconciliationConfig::default(),
8243        );
8244
8245        let mut results = Vec::new();
8246        let company_code = self
8247            .config
8248            .companies
8249            .first()
8250            .map(|c| c.code.as_str())
8251            .unwrap_or("1000");
8252
8253        // Reconcile AR
8254        if !subledger.ar_invoices.is_empty() {
8255            let gl_balance = tracker
8256                .get_account_balance(
8257                    company_code,
8258                    datasynth_core::accounts::control_accounts::AR_CONTROL,
8259                )
8260                .map(|b| b.closing_balance)
8261                .unwrap_or_default();
8262            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8263            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8264        }
8265
8266        // Reconcile AP
8267        if !subledger.ap_invoices.is_empty() {
8268            let gl_balance = tracker
8269                .get_account_balance(
8270                    company_code,
8271                    datasynth_core::accounts::control_accounts::AP_CONTROL,
8272                )
8273                .map(|b| b.closing_balance)
8274                .unwrap_or_default();
8275            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8276            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8277        }
8278
8279        // Reconcile FA
8280        if !subledger.fa_records.is_empty() {
8281            let gl_asset_balance = tracker
8282                .get_account_balance(
8283                    company_code,
8284                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8285                )
8286                .map(|b| b.closing_balance)
8287                .unwrap_or_default();
8288            let gl_accum_depr_balance = tracker
8289                .get_account_balance(
8290                    company_code,
8291                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8292                )
8293                .map(|b| b.closing_balance)
8294                .unwrap_or_default();
8295            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8296                subledger.fa_records.iter().collect();
8297            let (asset_recon, depr_recon) = engine.reconcile_fa(
8298                company_code,
8299                end_date,
8300                gl_asset_balance,
8301                gl_accum_depr_balance,
8302                &fa_refs,
8303            );
8304            results.push(asset_recon);
8305            results.push(depr_recon);
8306        }
8307
8308        // Reconcile Inventory
8309        if !subledger.inventory_positions.is_empty() {
8310            let gl_balance = tracker
8311                .get_account_balance(
8312                    company_code,
8313                    datasynth_core::accounts::control_accounts::INVENTORY,
8314                )
8315                .map(|b| b.closing_balance)
8316                .unwrap_or_default();
8317            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8318                subledger.inventory_positions.iter().collect();
8319            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8320        }
8321
8322        stats.subledger_reconciliation_count = results.len();
8323        let passed = results.iter().filter(|r| r.is_balanced()).count();
8324        let failed = results.len() - passed;
8325        info!(
8326            "Subledger reconciliation: {} checks, {} passed, {} failed",
8327            results.len(),
8328            passed,
8329            failed
8330        );
8331        self.check_resources_with_log("post-subledger-reconciliation")?;
8332
8333        Ok(results)
8334    }
8335
8336    /// Generate the chart of accounts.
8337    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8338        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8339
8340        let coa_framework = self.resolve_coa_framework();
8341
8342        let mut gen = ChartOfAccountsGenerator::new(
8343            self.config.chart_of_accounts.complexity,
8344            self.config.global.industry,
8345            self.seed,
8346        )
8347        .with_coa_framework(coa_framework);
8348
8349        let coa = Arc::new(gen.generate());
8350        self.coa = Some(Arc::clone(&coa));
8351
8352        if let Some(pb) = pb {
8353            pb.finish_with_message("Chart of Accounts complete");
8354        }
8355
8356        Ok(coa)
8357    }
8358
8359    /// Generate master data entities.
8360    fn generate_master_data(&mut self) -> SynthResult<()> {
8361        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8362            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8363        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8364
8365        let total = self.config.companies.len() as u64 * 5; // 5 entity types
8366        let pb = self.create_progress_bar(total, "Generating Master Data");
8367
8368        // Resolve country pack once for all companies (uses primary company's country)
8369        let pack = self.primary_pack().clone();
8370
8371        // Capture config values needed inside the parallel closure
8372        let vendors_per_company = self.phase_config.vendors_per_company;
8373        let customers_per_company = self.phase_config.customers_per_company;
8374        let materials_per_company = self.phase_config.materials_per_company;
8375        let assets_per_company = self.phase_config.assets_per_company;
8376        let coa_framework = self.resolve_coa_framework();
8377
8378        // Generate all master data in parallel across companies.
8379        // Each company's data is independent, making this embarrassingly parallel.
8380        let per_company_results: Vec<_> = self
8381            .config
8382            .companies
8383            .par_iter()
8384            .enumerate()
8385            .map(|(i, company)| {
8386                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8387                let pack = pack.clone();
8388
8389                // Generate vendors (offset counter so IDs are globally unique across companies)
8390                let mut vendor_gen = VendorGenerator::new(company_seed);
8391                vendor_gen.set_country_pack(pack.clone());
8392                vendor_gen.set_coa_framework(coa_framework);
8393                vendor_gen.set_counter_offset(i * vendors_per_company);
8394                // Wire vendor network config when enabled
8395                if self.config.vendor_network.enabled {
8396                    let vn = &self.config.vendor_network;
8397                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8398                        enabled: true,
8399                        depth: vn.depth,
8400                        tier1_count: datasynth_generators::TierCountConfig::new(
8401                            vn.tier1.min,
8402                            vn.tier1.max,
8403                        ),
8404                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
8405                            vn.tier2_per_parent.min,
8406                            vn.tier2_per_parent.max,
8407                        ),
8408                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
8409                            vn.tier3_per_parent.min,
8410                            vn.tier3_per_parent.max,
8411                        ),
8412                        cluster_distribution: datasynth_generators::ClusterDistribution {
8413                            reliable_strategic: vn.clusters.reliable_strategic,
8414                            standard_operational: vn.clusters.standard_operational,
8415                            transactional: vn.clusters.transactional,
8416                            problematic: vn.clusters.problematic,
8417                        },
8418                        concentration_limits: datasynth_generators::ConcentrationLimits {
8419                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
8420                            max_top5: vn.dependencies.top_5_concentration,
8421                        },
8422                        ..datasynth_generators::VendorNetworkConfig::default()
8423                    });
8424                }
8425                let vendor_pool =
8426                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
8427
8428                // Generate customers (offset counter so IDs are globally unique across companies)
8429                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
8430                customer_gen.set_country_pack(pack.clone());
8431                customer_gen.set_coa_framework(coa_framework);
8432                customer_gen.set_counter_offset(i * customers_per_company);
8433                // Wire customer segmentation config when enabled
8434                if self.config.customer_segmentation.enabled {
8435                    let cs = &self.config.customer_segmentation;
8436                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
8437                        enabled: true,
8438                        segment_distribution: datasynth_generators::SegmentDistribution {
8439                            enterprise: cs.value_segments.enterprise.customer_share,
8440                            mid_market: cs.value_segments.mid_market.customer_share,
8441                            smb: cs.value_segments.smb.customer_share,
8442                            consumer: cs.value_segments.consumer.customer_share,
8443                        },
8444                        referral_config: datasynth_generators::ReferralConfig {
8445                            enabled: cs.networks.referrals.enabled,
8446                            referral_rate: cs.networks.referrals.referral_rate,
8447                            ..Default::default()
8448                        },
8449                        hierarchy_config: datasynth_generators::HierarchyConfig {
8450                            enabled: cs.networks.corporate_hierarchies.enabled,
8451                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
8452                            ..Default::default()
8453                        },
8454                        ..Default::default()
8455                    };
8456                    customer_gen.set_segmentation_config(seg_cfg);
8457                }
8458                let customer_pool = customer_gen.generate_customer_pool(
8459                    customers_per_company,
8460                    &company.code,
8461                    start_date,
8462                );
8463
8464                // Generate materials (offset counter so IDs are globally unique across companies)
8465                let mut material_gen = MaterialGenerator::new(company_seed + 200);
8466                material_gen.set_country_pack(pack.clone());
8467                material_gen.set_counter_offset(i * materials_per_company);
8468                let material_pool = material_gen.generate_material_pool(
8469                    materials_per_company,
8470                    &company.code,
8471                    start_date,
8472                );
8473
8474                // Generate fixed assets
8475                let mut asset_gen = AssetGenerator::new(company_seed + 300);
8476                let asset_pool = asset_gen.generate_asset_pool(
8477                    assets_per_company,
8478                    &company.code,
8479                    (start_date, end_date),
8480                );
8481
8482                // Generate employees
8483                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
8484                employee_gen.set_country_pack(pack);
8485                let employee_pool =
8486                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
8487
8488                // Generate employee change history (2-5 events per employee)
8489                let employee_change_history =
8490                    employee_gen.generate_all_change_history(&employee_pool, end_date);
8491
8492                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
8493                let employee_ids: Vec<String> = employee_pool
8494                    .employees
8495                    .iter()
8496                    .map(|e| e.employee_id.clone())
8497                    .collect();
8498                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
8499                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
8500
8501                (
8502                    vendor_pool.vendors,
8503                    customer_pool.customers,
8504                    material_pool.materials,
8505                    asset_pool.assets,
8506                    employee_pool.employees,
8507                    employee_change_history,
8508                    cost_centers,
8509                )
8510            })
8511            .collect();
8512
8513        // Aggregate results from all companies
8514        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
8515            per_company_results
8516        {
8517            self.master_data.vendors.extend(vendors);
8518            self.master_data.customers.extend(customers);
8519            self.master_data.materials.extend(materials);
8520            self.master_data.assets.extend(assets);
8521            self.master_data.employees.extend(employees);
8522            self.master_data.cost_centers.extend(cost_centers);
8523            self.master_data
8524                .employee_change_history
8525                .extend(change_history);
8526        }
8527
8528        if let Some(pb) = &pb {
8529            pb.inc(total);
8530        }
8531        if let Some(pb) = pb {
8532            pb.finish_with_message("Master data generation complete");
8533        }
8534
8535        Ok(())
8536    }
8537
8538    /// Generate document flows (P2P and O2C).
8539    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
8540        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8541            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8542
8543        // Generate P2P chains
8544        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
8545        let months = (self.config.global.period_months as usize).max(1);
8546        let p2p_count = self
8547            .phase_config
8548            .p2p_chains
8549            .min(self.master_data.vendors.len() * 2 * months);
8550        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
8551
8552        // Convert P2P config from schema to generator config
8553        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
8554        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
8555        p2p_gen.set_country_pack(self.primary_pack().clone());
8556
8557        for i in 0..p2p_count {
8558            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
8559            let materials: Vec<&Material> = self
8560                .master_data
8561                .materials
8562                .iter()
8563                .skip(i % self.master_data.materials.len().max(1))
8564                .take(2.min(self.master_data.materials.len()))
8565                .collect();
8566
8567            if materials.is_empty() {
8568                continue;
8569            }
8570
8571            let company = &self.config.companies[i % self.config.companies.len()];
8572            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8573            let fiscal_period = po_date.month() as u8;
8574            let created_by = if self.master_data.employees.is_empty() {
8575                "SYSTEM"
8576            } else {
8577                self.master_data.employees[i % self.master_data.employees.len()]
8578                    .user_id
8579                    .as_str()
8580            };
8581
8582            let chain = p2p_gen.generate_chain(
8583                &company.code,
8584                vendor,
8585                &materials,
8586                po_date,
8587                start_date.year() as u16,
8588                fiscal_period,
8589                created_by,
8590            );
8591
8592            // Flatten documents
8593            flows.purchase_orders.push(chain.purchase_order.clone());
8594            flows.goods_receipts.extend(chain.goods_receipts.clone());
8595            if let Some(vi) = &chain.vendor_invoice {
8596                flows.vendor_invoices.push(vi.clone());
8597            }
8598            if let Some(payment) = &chain.payment {
8599                flows.payments.push(payment.clone());
8600            }
8601            for remainder in &chain.remainder_payments {
8602                flows.payments.push(remainder.clone());
8603            }
8604            flows.p2p_chains.push(chain);
8605
8606            if let Some(pb) = &pb {
8607                pb.inc(1);
8608            }
8609        }
8610
8611        if let Some(pb) = pb {
8612            pb.finish_with_message("P2P document flows complete");
8613        }
8614
8615        // Generate O2C chains
8616        // Cap at ~2 SOs per customer per month to keep order volume realistic
8617        let o2c_count = self
8618            .phase_config
8619            .o2c_chains
8620            .min(self.master_data.customers.len() * 2 * months);
8621        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8622
8623        // Convert O2C config from schema to generator config
8624        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8625        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8626        o2c_gen.set_country_pack(self.primary_pack().clone());
8627
8628        for i in 0..o2c_count {
8629            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8630            let materials: Vec<&Material> = self
8631                .master_data
8632                .materials
8633                .iter()
8634                .skip(i % self.master_data.materials.len().max(1))
8635                .take(2.min(self.master_data.materials.len()))
8636                .collect();
8637
8638            if materials.is_empty() {
8639                continue;
8640            }
8641
8642            let company = &self.config.companies[i % self.config.companies.len()];
8643            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8644            let fiscal_period = so_date.month() as u8;
8645            let created_by = if self.master_data.employees.is_empty() {
8646                "SYSTEM"
8647            } else {
8648                self.master_data.employees[i % self.master_data.employees.len()]
8649                    .user_id
8650                    .as_str()
8651            };
8652
8653            let chain = o2c_gen.generate_chain(
8654                &company.code,
8655                customer,
8656                &materials,
8657                so_date,
8658                start_date.year() as u16,
8659                fiscal_period,
8660                created_by,
8661            );
8662
8663            // Flatten documents
8664            flows.sales_orders.push(chain.sales_order.clone());
8665            flows.deliveries.extend(chain.deliveries.clone());
8666            if let Some(ci) = &chain.customer_invoice {
8667                flows.customer_invoices.push(ci.clone());
8668            }
8669            if let Some(receipt) = &chain.customer_receipt {
8670                flows.payments.push(receipt.clone());
8671            }
8672            // Extract remainder receipts (follow-up to partial payments)
8673            for receipt in &chain.remainder_receipts {
8674                flows.payments.push(receipt.clone());
8675            }
8676            flows.o2c_chains.push(chain);
8677
8678            if let Some(pb) = &pb {
8679                pb.inc(1);
8680            }
8681        }
8682
8683        if let Some(pb) = pb {
8684            pb.finish_with_message("O2C document flows complete");
8685        }
8686
8687        // Collect all document cross-references from document headers.
8688        // Each document embeds references to its predecessor(s) via add_reference(); here we
8689        // denormalise them into a flat list for the document_references.json output file.
8690        {
8691            let mut refs = Vec::new();
8692            for doc in &flows.purchase_orders {
8693                refs.extend(doc.header.document_references.iter().cloned());
8694            }
8695            for doc in &flows.goods_receipts {
8696                refs.extend(doc.header.document_references.iter().cloned());
8697            }
8698            for doc in &flows.vendor_invoices {
8699                refs.extend(doc.header.document_references.iter().cloned());
8700            }
8701            for doc in &flows.sales_orders {
8702                refs.extend(doc.header.document_references.iter().cloned());
8703            }
8704            for doc in &flows.deliveries {
8705                refs.extend(doc.header.document_references.iter().cloned());
8706            }
8707            for doc in &flows.customer_invoices {
8708                refs.extend(doc.header.document_references.iter().cloned());
8709            }
8710            for doc in &flows.payments {
8711                refs.extend(doc.header.document_references.iter().cloned());
8712            }
8713            debug!(
8714                "Collected {} document cross-references from document headers",
8715                refs.len()
8716            );
8717            flows.document_references = refs;
8718        }
8719
8720        Ok(())
8721    }
8722
8723    /// Generate journal entries using parallel generation across multiple cores.
8724    fn generate_journal_entries(
8725        &mut self,
8726        coa: &Arc<ChartOfAccounts>,
8727    ) -> SynthResult<Vec<JournalEntry>> {
8728        use datasynth_core::traits::ParallelGenerator;
8729
8730        let total = self.calculate_total_transactions();
8731        let pb = self.create_progress_bar(total, "Generating Journal Entries");
8732
8733        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8734            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8735        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8736
8737        let company_codes: Vec<String> = self
8738            .config
8739            .companies
8740            .iter()
8741            .map(|c| c.code.clone())
8742            .collect();
8743
8744        let generator = JournalEntryGenerator::new_with_params(
8745            self.config.transactions.clone(),
8746            Arc::clone(coa),
8747            company_codes,
8748            start_date,
8749            end_date,
8750            self.seed,
8751        );
8752
8753        // Connect generated master data to ensure JEs reference real entities
8754        // Enable persona-based error injection for realistic human behavior
8755        // Pass fraud configuration for fraud injection
8756        let je_pack = self.primary_pack();
8757
8758        let mut generator = generator
8759            .with_master_data(
8760                &self.master_data.vendors,
8761                &self.master_data.customers,
8762                &self.master_data.materials,
8763            )
8764            .with_country_pack_names(je_pack)
8765            .with_country_pack_temporal(
8766                self.config.temporal_patterns.clone(),
8767                self.seed + 200,
8768                je_pack,
8769            )
8770            .with_persona_errors(true)
8771            .with_fraud_config(self.config.fraud.clone());
8772
8773        // Apply temporal drift if configured
8774        if self.config.temporal.enabled {
8775            let drift_config = self.config.temporal.to_core_config();
8776            generator = generator.with_drift_config(drift_config, self.seed + 100);
8777        }
8778
8779        // Check memory limit at start
8780        self.check_memory_limit()?;
8781
8782        // Determine parallelism: use available cores, but cap at total entries
8783        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8784
8785        // Use parallel generation for datasets with 10K+ entries.
8786        // Below this threshold, the statistical properties of a single-seeded
8787        // generator (e.g. Benford compliance) are better preserved.
8788        let entries = if total >= 10_000 && num_threads > 1 {
8789            // Parallel path: split the generator across cores and generate in parallel.
8790            // Each sub-generator gets a unique seed for deterministic, independent generation.
8791            let sub_generators = generator.split(num_threads);
8792            let entries_per_thread = total as usize / num_threads;
8793            let remainder = total as usize % num_threads;
8794
8795            let batches: Vec<Vec<JournalEntry>> = sub_generators
8796                .into_par_iter()
8797                .enumerate()
8798                .map(|(i, mut gen)| {
8799                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8800                    gen.generate_batch(count)
8801                })
8802                .collect();
8803
8804            // Merge all batches into a single Vec
8805            let entries = JournalEntryGenerator::merge_results(batches);
8806
8807            if let Some(pb) = &pb {
8808                pb.inc(total);
8809            }
8810            entries
8811        } else {
8812            // Sequential path for small datasets (< 1000 entries)
8813            let mut entries = Vec::with_capacity(total as usize);
8814            for _ in 0..total {
8815                let entry = generator.generate();
8816                entries.push(entry);
8817                if let Some(pb) = &pb {
8818                    pb.inc(1);
8819                }
8820            }
8821            entries
8822        };
8823
8824        if let Some(pb) = pb {
8825            pb.finish_with_message("Journal entries complete");
8826        }
8827
8828        Ok(entries)
8829    }
8830
8831    /// Generate journal entries from document flows.
8832    ///
8833    /// This creates proper GL entries for each document in the P2P and O2C flows,
8834    /// ensuring that document activity is reflected in the general ledger.
8835    fn generate_jes_from_document_flows(
8836        &mut self,
8837        flows: &DocumentFlowSnapshot,
8838    ) -> SynthResult<Vec<JournalEntry>> {
8839        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8840        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8841
8842        let je_config = match self.resolve_coa_framework() {
8843            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8844            CoAFramework::GermanSkr04 => {
8845                let fa = datasynth_core::FrameworkAccounts::german_gaap();
8846                DocumentFlowJeConfig::from(&fa)
8847            }
8848            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8849        };
8850
8851        let populate_fec = je_config.populate_fec_fields;
8852        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8853
8854        // Build auxiliary account lookup from vendor/customer master data so that
8855        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
8856        // PCG "4010001") instead of raw partner IDs.
8857        if populate_fec {
8858            let mut aux_lookup = std::collections::HashMap::new();
8859            for vendor in &self.master_data.vendors {
8860                if let Some(ref aux) = vendor.auxiliary_gl_account {
8861                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8862                }
8863            }
8864            for customer in &self.master_data.customers {
8865                if let Some(ref aux) = customer.auxiliary_gl_account {
8866                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8867                }
8868            }
8869            if !aux_lookup.is_empty() {
8870                generator.set_auxiliary_account_lookup(aux_lookup);
8871            }
8872        }
8873
8874        let mut entries = Vec::new();
8875
8876        // Generate JEs from P2P chains
8877        for chain in &flows.p2p_chains {
8878            let chain_entries = generator.generate_from_p2p_chain(chain);
8879            entries.extend(chain_entries);
8880            if let Some(pb) = &pb {
8881                pb.inc(1);
8882            }
8883        }
8884
8885        // Generate JEs from O2C chains
8886        for chain in &flows.o2c_chains {
8887            let chain_entries = generator.generate_from_o2c_chain(chain);
8888            entries.extend(chain_entries);
8889            if let Some(pb) = &pb {
8890                pb.inc(1);
8891            }
8892        }
8893
8894        if let Some(pb) = pb {
8895            pb.finish_with_message(format!(
8896                "Generated {} JEs from document flows",
8897                entries.len()
8898            ));
8899        }
8900
8901        Ok(entries)
8902    }
8903
8904    /// Generate journal entries from payroll runs.
8905    ///
8906    /// Creates one JE per payroll run:
8907    /// - DR Salaries & Wages (6100) for gross pay
8908    /// - CR Payroll Clearing (9100) for gross pay
8909    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8910        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8911
8912        let mut jes = Vec::with_capacity(payroll_runs.len());
8913
8914        for run in payroll_runs {
8915            let mut je = JournalEntry::new_simple(
8916                format!("JE-PAYROLL-{}", run.payroll_id),
8917                run.company_code.clone(),
8918                run.run_date,
8919                format!("Payroll {}", run.payroll_id),
8920            );
8921
8922            // Debit Salaries & Wages for gross pay
8923            je.add_line(JournalEntryLine {
8924                line_number: 1,
8925                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8926                debit_amount: run.total_gross,
8927                reference: Some(run.payroll_id.clone()),
8928                text: Some(format!(
8929                    "Payroll {} ({} employees)",
8930                    run.payroll_id, run.employee_count
8931                )),
8932                ..Default::default()
8933            });
8934
8935            // Credit Payroll Clearing for gross pay
8936            je.add_line(JournalEntryLine {
8937                line_number: 2,
8938                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8939                credit_amount: run.total_gross,
8940                reference: Some(run.payroll_id.clone()),
8941                ..Default::default()
8942            });
8943
8944            jes.push(je);
8945        }
8946
8947        jes
8948    }
8949
8950    /// Generate journal entries from production orders.
8951    ///
8952    /// Creates one JE per completed production order:
8953    /// - DR Raw Materials (5100) for material consumption (actual_cost)
8954    /// - CR Inventory (1200) for material consumption
8955    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8956        use datasynth_core::accounts::{control_accounts, expense_accounts};
8957        use datasynth_core::models::ProductionOrderStatus;
8958
8959        let mut jes = Vec::new();
8960
8961        for order in production_orders {
8962            // Only generate JEs for completed or closed orders
8963            if !matches!(
8964                order.status,
8965                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8966            ) {
8967                continue;
8968            }
8969
8970            let mut je = JournalEntry::new_simple(
8971                format!("JE-MFG-{}", order.order_id),
8972                order.company_code.clone(),
8973                order.actual_end.unwrap_or(order.planned_end),
8974                format!(
8975                    "Production Order {} - {}",
8976                    order.order_id, order.material_description
8977                ),
8978            );
8979
8980            // Debit Raw Materials / Manufacturing expense for actual cost
8981            je.add_line(JournalEntryLine {
8982                line_number: 1,
8983                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8984                debit_amount: order.actual_cost,
8985                reference: Some(order.order_id.clone()),
8986                text: Some(format!(
8987                    "Material consumption for {}",
8988                    order.material_description
8989                )),
8990                quantity: Some(order.actual_quantity),
8991                unit: Some("EA".to_string()),
8992                ..Default::default()
8993            });
8994
8995            // Credit Inventory for material consumption
8996            je.add_line(JournalEntryLine {
8997                line_number: 2,
8998                gl_account: control_accounts::INVENTORY.to_string(),
8999                credit_amount: order.actual_cost,
9000                reference: Some(order.order_id.clone()),
9001                ..Default::default()
9002            });
9003
9004            jes.push(je);
9005        }
9006
9007        jes
9008    }
9009
9010    /// Link document flows to subledger records.
9011    ///
9012    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9013    /// ensuring subledger data is coherent with document flow data.
9014    fn link_document_flows_to_subledgers(
9015        &mut self,
9016        flows: &DocumentFlowSnapshot,
9017    ) -> SynthResult<SubledgerSnapshot> {
9018        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9019        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9020
9021        // Build vendor/customer name maps from master data for realistic subledger names
9022        let vendor_names: std::collections::HashMap<String, String> = self
9023            .master_data
9024            .vendors
9025            .iter()
9026            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9027            .collect();
9028        let customer_names: std::collections::HashMap<String, String> = self
9029            .master_data
9030            .customers
9031            .iter()
9032            .map(|c| (c.customer_id.clone(), c.name.clone()))
9033            .collect();
9034
9035        let mut linker = DocumentFlowLinker::new()
9036            .with_vendor_names(vendor_names)
9037            .with_customer_names(customer_names);
9038
9039        // Convert vendor invoices to AP invoices
9040        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9041        if let Some(pb) = &pb {
9042            pb.inc(flows.vendor_invoices.len() as u64);
9043        }
9044
9045        // Convert customer invoices to AR invoices
9046        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9047        if let Some(pb) = &pb {
9048            pb.inc(flows.customer_invoices.len() as u64);
9049        }
9050
9051        if let Some(pb) = pb {
9052            pb.finish_with_message(format!(
9053                "Linked {} AP and {} AR invoices",
9054                ap_invoices.len(),
9055                ar_invoices.len()
9056            ));
9057        }
9058
9059        Ok(SubledgerSnapshot {
9060            ap_invoices,
9061            ar_invoices,
9062            fa_records: Vec::new(),
9063            inventory_positions: Vec::new(),
9064            inventory_movements: Vec::new(),
9065            // Aging reports are computed after payment settlement in phase_document_flows.
9066            ar_aging_reports: Vec::new(),
9067            ap_aging_reports: Vec::new(),
9068            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9069            depreciation_runs: Vec::new(),
9070            inventory_valuations: Vec::new(),
9071            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9072            dunning_runs: Vec::new(),
9073            dunning_letters: Vec::new(),
9074        })
9075    }
9076
9077    /// Generate OCPM events from document flows.
9078    ///
9079    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9080    /// capturing the object-centric process perspective.
9081    #[allow(clippy::too_many_arguments)]
9082    fn generate_ocpm_events(
9083        &mut self,
9084        flows: &DocumentFlowSnapshot,
9085        sourcing: &SourcingSnapshot,
9086        hr: &HrSnapshot,
9087        manufacturing: &ManufacturingSnapshot,
9088        banking: &BankingSnapshot,
9089        audit: &AuditSnapshot,
9090        financial_reporting: &FinancialReportingSnapshot,
9091    ) -> SynthResult<OcpmSnapshot> {
9092        let total_chains = flows.p2p_chains.len()
9093            + flows.o2c_chains.len()
9094            + sourcing.sourcing_projects.len()
9095            + hr.payroll_runs.len()
9096            + manufacturing.production_orders.len()
9097            + banking.customers.len()
9098            + audit.engagements.len()
9099            + financial_reporting.bank_reconciliations.len();
9100        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9101
9102        // Create OCPM event log with standard types
9103        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9104        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9105
9106        // Configure the OCPM generator
9107        let ocpm_config = OcpmGeneratorConfig {
9108            generate_p2p: true,
9109            generate_o2c: true,
9110            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9111            generate_h2r: !hr.payroll_runs.is_empty(),
9112            generate_mfg: !manufacturing.production_orders.is_empty(),
9113            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9114            generate_bank: !banking.customers.is_empty(),
9115            generate_audit: !audit.engagements.is_empty(),
9116            happy_path_rate: 0.75,
9117            exception_path_rate: 0.20,
9118            error_path_rate: 0.05,
9119            add_duration_variability: true,
9120            duration_std_dev_factor: 0.3,
9121        };
9122        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9123        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9124
9125        // Get available users for resource assignment
9126        let available_users: Vec<String> = self
9127            .master_data
9128            .employees
9129            .iter()
9130            .take(20)
9131            .map(|e| e.user_id.clone())
9132            .collect();
9133
9134        // Deterministic base date from config (avoids Utc::now() non-determinism)
9135        let fallback_date =
9136            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9137        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9138            .unwrap_or(fallback_date);
9139        let base_midnight = base_date
9140            .and_hms_opt(0, 0, 0)
9141            .expect("midnight is always valid");
9142        let base_datetime =
9143            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9144
9145        // Helper closure to add case results to event log
9146        let add_result = |event_log: &mut OcpmEventLog,
9147                          result: datasynth_ocpm::CaseGenerationResult| {
9148            for event in result.events {
9149                event_log.add_event(event);
9150            }
9151            for object in result.objects {
9152                event_log.add_object(object);
9153            }
9154            for relationship in result.relationships {
9155                event_log.add_relationship(relationship);
9156            }
9157            for corr in result.correlation_events {
9158                event_log.add_correlation_event(corr);
9159            }
9160            event_log.add_case(result.case_trace);
9161        };
9162
9163        // Generate events from P2P chains
9164        for chain in &flows.p2p_chains {
9165            let po = &chain.purchase_order;
9166            let documents = P2pDocuments::new(
9167                &po.header.document_id,
9168                &po.vendor_id,
9169                &po.header.company_code,
9170                po.total_net_amount,
9171                &po.header.currency,
9172                &ocpm_uuid_factory,
9173            )
9174            .with_goods_receipt(
9175                chain
9176                    .goods_receipts
9177                    .first()
9178                    .map(|gr| gr.header.document_id.as_str())
9179                    .unwrap_or(""),
9180                &ocpm_uuid_factory,
9181            )
9182            .with_invoice(
9183                chain
9184                    .vendor_invoice
9185                    .as_ref()
9186                    .map(|vi| vi.header.document_id.as_str())
9187                    .unwrap_or(""),
9188                &ocpm_uuid_factory,
9189            )
9190            .with_payment(
9191                chain
9192                    .payment
9193                    .as_ref()
9194                    .map(|p| p.header.document_id.as_str())
9195                    .unwrap_or(""),
9196                &ocpm_uuid_factory,
9197            );
9198
9199            let start_time =
9200                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9201            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9202            add_result(&mut event_log, result);
9203
9204            if let Some(pb) = &pb {
9205                pb.inc(1);
9206            }
9207        }
9208
9209        // Generate events from O2C chains
9210        for chain in &flows.o2c_chains {
9211            let so = &chain.sales_order;
9212            let documents = O2cDocuments::new(
9213                &so.header.document_id,
9214                &so.customer_id,
9215                &so.header.company_code,
9216                so.total_net_amount,
9217                &so.header.currency,
9218                &ocpm_uuid_factory,
9219            )
9220            .with_delivery(
9221                chain
9222                    .deliveries
9223                    .first()
9224                    .map(|d| d.header.document_id.as_str())
9225                    .unwrap_or(""),
9226                &ocpm_uuid_factory,
9227            )
9228            .with_invoice(
9229                chain
9230                    .customer_invoice
9231                    .as_ref()
9232                    .map(|ci| ci.header.document_id.as_str())
9233                    .unwrap_or(""),
9234                &ocpm_uuid_factory,
9235            )
9236            .with_receipt(
9237                chain
9238                    .customer_receipt
9239                    .as_ref()
9240                    .map(|r| r.header.document_id.as_str())
9241                    .unwrap_or(""),
9242                &ocpm_uuid_factory,
9243            );
9244
9245            let start_time =
9246                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9247            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9248            add_result(&mut event_log, result);
9249
9250            if let Some(pb) = &pb {
9251                pb.inc(1);
9252            }
9253        }
9254
9255        // Generate events from S2C sourcing projects
9256        for project in &sourcing.sourcing_projects {
9257            // Find vendor from contracts or qualifications
9258            let vendor_id = sourcing
9259                .contracts
9260                .iter()
9261                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9262                .map(|c| c.vendor_id.clone())
9263                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9264                .or_else(|| {
9265                    self.master_data
9266                        .vendors
9267                        .first()
9268                        .map(|v| v.vendor_id.clone())
9269                })
9270                .unwrap_or_else(|| "V000".to_string());
9271            let mut docs = S2cDocuments::new(
9272                &project.project_id,
9273                &vendor_id,
9274                &project.company_code,
9275                project.estimated_annual_spend,
9276                &ocpm_uuid_factory,
9277            );
9278            // Link RFx if available
9279            if let Some(rfx) = sourcing
9280                .rfx_events
9281                .iter()
9282                .find(|r| r.sourcing_project_id == project.project_id)
9283            {
9284                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9285                // Link winning bid (status == Accepted)
9286                if let Some(bid) = sourcing.bids.iter().find(|b| {
9287                    b.rfx_id == rfx.rfx_id
9288                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9289                }) {
9290                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9291                }
9292            }
9293            // Link contract
9294            if let Some(contract) = sourcing
9295                .contracts
9296                .iter()
9297                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9298            {
9299                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9300            }
9301            let start_time = base_datetime - chrono::Duration::days(90);
9302            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9303            add_result(&mut event_log, result);
9304
9305            if let Some(pb) = &pb {
9306                pb.inc(1);
9307            }
9308        }
9309
9310        // Generate events from H2R payroll runs
9311        for run in &hr.payroll_runs {
9312            // Use first matching payroll line item's employee, or fallback
9313            let employee_id = hr
9314                .payroll_line_items
9315                .iter()
9316                .find(|li| li.payroll_id == run.payroll_id)
9317                .map(|li| li.employee_id.as_str())
9318                .unwrap_or("EMP000");
9319            let docs = H2rDocuments::new(
9320                &run.payroll_id,
9321                employee_id,
9322                &run.company_code,
9323                run.total_gross,
9324                &ocpm_uuid_factory,
9325            )
9326            .with_time_entries(
9327                hr.time_entries
9328                    .iter()
9329                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9330                    .take(5)
9331                    .map(|t| t.entry_id.as_str())
9332                    .collect(),
9333            );
9334            let start_time = base_datetime - chrono::Duration::days(30);
9335            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9336            add_result(&mut event_log, result);
9337
9338            if let Some(pb) = &pb {
9339                pb.inc(1);
9340            }
9341        }
9342
9343        // Generate events from MFG production orders
9344        for order in &manufacturing.production_orders {
9345            let mut docs = MfgDocuments::new(
9346                &order.order_id,
9347                &order.material_id,
9348                &order.company_code,
9349                order.planned_quantity,
9350                &ocpm_uuid_factory,
9351            )
9352            .with_operations(
9353                order
9354                    .operations
9355                    .iter()
9356                    .map(|o| format!("OP-{:04}", o.operation_number))
9357                    .collect::<Vec<_>>()
9358                    .iter()
9359                    .map(std::string::String::as_str)
9360                    .collect(),
9361            );
9362            // Link quality inspection if available (via reference_id matching order_id)
9363            if let Some(insp) = manufacturing
9364                .quality_inspections
9365                .iter()
9366                .find(|i| i.reference_id == order.order_id)
9367            {
9368                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9369            }
9370            // Link cycle count if available (match by material_id in items)
9371            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9372                cc.items
9373                    .iter()
9374                    .any(|item| item.material_id == order.material_id)
9375            }) {
9376                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9377            }
9378            let start_time = base_datetime - chrono::Duration::days(60);
9379            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9380            add_result(&mut event_log, result);
9381
9382            if let Some(pb) = &pb {
9383                pb.inc(1);
9384            }
9385        }
9386
9387        // Generate events from Banking customers
9388        for customer in &banking.customers {
9389            let customer_id_str = customer.customer_id.to_string();
9390            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9391            // Link accounts (primary_owner_id matches customer_id)
9392            if let Some(account) = banking
9393                .accounts
9394                .iter()
9395                .find(|a| a.primary_owner_id == customer.customer_id)
9396            {
9397                let account_id_str = account.account_id.to_string();
9398                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9399                // Link transactions for this account
9400                let txn_strs: Vec<String> = banking
9401                    .transactions
9402                    .iter()
9403                    .filter(|t| t.account_id == account.account_id)
9404                    .take(10)
9405                    .map(|t| t.transaction_id.to_string())
9406                    .collect();
9407                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9408                let txn_amounts: Vec<rust_decimal::Decimal> = banking
9409                    .transactions
9410                    .iter()
9411                    .filter(|t| t.account_id == account.account_id)
9412                    .take(10)
9413                    .map(|t| t.amount)
9414                    .collect();
9415                if !txn_ids.is_empty() {
9416                    docs = docs.with_transactions(txn_ids, txn_amounts);
9417                }
9418            }
9419            let start_time = base_datetime - chrono::Duration::days(180);
9420            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9421            add_result(&mut event_log, result);
9422
9423            if let Some(pb) = &pb {
9424                pb.inc(1);
9425            }
9426        }
9427
9428        // Generate events from Audit engagements
9429        for engagement in &audit.engagements {
9430            let engagement_id_str = engagement.engagement_id.to_string();
9431            let docs = AuditDocuments::new(
9432                &engagement_id_str,
9433                &engagement.client_entity_id,
9434                &ocpm_uuid_factory,
9435            )
9436            .with_workpapers(
9437                audit
9438                    .workpapers
9439                    .iter()
9440                    .filter(|w| w.engagement_id == engagement.engagement_id)
9441                    .take(10)
9442                    .map(|w| w.workpaper_id.to_string())
9443                    .collect::<Vec<_>>()
9444                    .iter()
9445                    .map(std::string::String::as_str)
9446                    .collect(),
9447            )
9448            .with_evidence(
9449                audit
9450                    .evidence
9451                    .iter()
9452                    .filter(|e| e.engagement_id == engagement.engagement_id)
9453                    .take(10)
9454                    .map(|e| e.evidence_id.to_string())
9455                    .collect::<Vec<_>>()
9456                    .iter()
9457                    .map(std::string::String::as_str)
9458                    .collect(),
9459            )
9460            .with_risks(
9461                audit
9462                    .risk_assessments
9463                    .iter()
9464                    .filter(|r| r.engagement_id == engagement.engagement_id)
9465                    .take(5)
9466                    .map(|r| r.risk_id.to_string())
9467                    .collect::<Vec<_>>()
9468                    .iter()
9469                    .map(std::string::String::as_str)
9470                    .collect(),
9471            )
9472            .with_findings(
9473                audit
9474                    .findings
9475                    .iter()
9476                    .filter(|f| f.engagement_id == engagement.engagement_id)
9477                    .take(5)
9478                    .map(|f| f.finding_id.to_string())
9479                    .collect::<Vec<_>>()
9480                    .iter()
9481                    .map(std::string::String::as_str)
9482                    .collect(),
9483            )
9484            .with_judgments(
9485                audit
9486                    .judgments
9487                    .iter()
9488                    .filter(|j| j.engagement_id == engagement.engagement_id)
9489                    .take(5)
9490                    .map(|j| j.judgment_id.to_string())
9491                    .collect::<Vec<_>>()
9492                    .iter()
9493                    .map(std::string::String::as_str)
9494                    .collect(),
9495            );
9496            let start_time = base_datetime - chrono::Duration::days(120);
9497            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
9498            add_result(&mut event_log, result);
9499
9500            if let Some(pb) = &pb {
9501                pb.inc(1);
9502            }
9503        }
9504
9505        // Generate events from Bank Reconciliations
9506        for recon in &financial_reporting.bank_reconciliations {
9507            let docs = BankReconDocuments::new(
9508                &recon.reconciliation_id,
9509                &recon.bank_account_id,
9510                &recon.company_code,
9511                recon.bank_ending_balance,
9512                &ocpm_uuid_factory,
9513            )
9514            .with_statement_lines(
9515                recon
9516                    .statement_lines
9517                    .iter()
9518                    .take(20)
9519                    .map(|l| l.line_id.as_str())
9520                    .collect(),
9521            )
9522            .with_reconciling_items(
9523                recon
9524                    .reconciling_items
9525                    .iter()
9526                    .take(10)
9527                    .map(|i| i.item_id.as_str())
9528                    .collect(),
9529            );
9530            let start_time = base_datetime - chrono::Duration::days(30);
9531            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
9532            add_result(&mut event_log, result);
9533
9534            if let Some(pb) = &pb {
9535                pb.inc(1);
9536            }
9537        }
9538
9539        // Compute process variants
9540        event_log.compute_variants();
9541
9542        let summary = event_log.summary();
9543
9544        if let Some(pb) = pb {
9545            pb.finish_with_message(format!(
9546                "Generated {} OCPM events, {} objects",
9547                summary.event_count, summary.object_count
9548            ));
9549        }
9550
9551        Ok(OcpmSnapshot {
9552            event_count: summary.event_count,
9553            object_count: summary.object_count,
9554            case_count: summary.case_count,
9555            event_log: Some(event_log),
9556        })
9557    }
9558
9559    /// Inject anomalies into journal entries.
9560    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
9561        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
9562
9563        // Read anomaly rates from config instead of using hardcoded values.
9564        // Priority: anomaly_injection config > fraud config > default 0.02
9565        let total_rate = if self.config.anomaly_injection.enabled {
9566            self.config.anomaly_injection.rates.total_rate
9567        } else if self.config.fraud.enabled {
9568            self.config.fraud.fraud_rate
9569        } else {
9570            0.02
9571        };
9572
9573        let fraud_rate = if self.config.anomaly_injection.enabled {
9574            self.config.anomaly_injection.rates.fraud_rate
9575        } else {
9576            AnomalyRateConfig::default().fraud_rate
9577        };
9578
9579        let error_rate = if self.config.anomaly_injection.enabled {
9580            self.config.anomaly_injection.rates.error_rate
9581        } else {
9582            AnomalyRateConfig::default().error_rate
9583        };
9584
9585        let process_issue_rate = if self.config.anomaly_injection.enabled {
9586            self.config.anomaly_injection.rates.process_rate
9587        } else {
9588            AnomalyRateConfig::default().process_issue_rate
9589        };
9590
9591        let anomaly_config = AnomalyInjectorConfig {
9592            rates: AnomalyRateConfig {
9593                total_rate,
9594                fraud_rate,
9595                error_rate,
9596                process_issue_rate,
9597                ..Default::default()
9598            },
9599            seed: self.seed + 5000,
9600            ..Default::default()
9601        };
9602
9603        let mut injector = AnomalyInjector::new(anomaly_config);
9604        let result = injector.process_entries(entries);
9605
9606        if let Some(pb) = &pb {
9607            pb.inc(entries.len() as u64);
9608            pb.finish_with_message("Anomaly injection complete");
9609        }
9610
9611        let mut by_type = HashMap::new();
9612        for label in &result.labels {
9613            *by_type
9614                .entry(format!("{:?}", label.anomaly_type))
9615                .or_insert(0) += 1;
9616        }
9617
9618        Ok(AnomalyLabels {
9619            labels: result.labels,
9620            summary: Some(result.summary),
9621            by_type,
9622        })
9623    }
9624
9625    /// Validate journal entries using running balance tracker.
9626    ///
9627    /// Applies all entries to the balance tracker and validates:
9628    /// - Each entry is internally balanced (debits = credits)
9629    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
9630    ///
9631    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
9632    /// excluded from balance validation as they may be intentionally unbalanced.
9633    fn validate_journal_entries(
9634        &mut self,
9635        entries: &[JournalEntry],
9636    ) -> SynthResult<BalanceValidationResult> {
9637        // Filter out entries with human errors as they may be intentionally unbalanced
9638        let clean_entries: Vec<&JournalEntry> = entries
9639            .iter()
9640            .filter(|e| {
9641                e.header
9642                    .header_text
9643                    .as_ref()
9644                    .map(|t| !t.contains("[HUMAN_ERROR:"))
9645                    .unwrap_or(true)
9646            })
9647            .collect();
9648
9649        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9650
9651        // Configure tracker to not fail on errors (collect them instead)
9652        let config = BalanceTrackerConfig {
9653            validate_on_each_entry: false,   // We'll validate at the end
9654            track_history: false,            // Skip history for performance
9655            fail_on_validation_error: false, // Collect errors, don't fail
9656            ..Default::default()
9657        };
9658        let validation_currency = self
9659            .config
9660            .companies
9661            .first()
9662            .map(|c| c.currency.clone())
9663            .unwrap_or_else(|| "USD".to_string());
9664
9665        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9666
9667        // Apply clean entries (without human errors)
9668        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9669        let errors = tracker.apply_entries(&clean_refs);
9670
9671        if let Some(pb) = &pb {
9672            pb.inc(entries.len() as u64);
9673        }
9674
9675        // Check if any entries were unbalanced
9676        // Note: When fail_on_validation_error is false, errors are stored in tracker
9677        let has_unbalanced = tracker
9678            .get_validation_errors()
9679            .iter()
9680            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9681
9682        // Validate balance sheet for each company
9683        // Include both returned errors and collected validation errors
9684        let mut all_errors = errors;
9685        all_errors.extend(tracker.get_validation_errors().iter().cloned());
9686        let company_codes: Vec<String> = self
9687            .config
9688            .companies
9689            .iter()
9690            .map(|c| c.code.clone())
9691            .collect();
9692
9693        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9694            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9695            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9696
9697        for company_code in &company_codes {
9698            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9699                all_errors.push(e);
9700            }
9701        }
9702
9703        // Get statistics after all mutable operations are done
9704        let stats = tracker.get_statistics();
9705
9706        // Determine if balanced overall
9707        let is_balanced = all_errors.is_empty();
9708
9709        if let Some(pb) = pb {
9710            let msg = if is_balanced {
9711                "Balance validation passed"
9712            } else {
9713                "Balance validation completed with errors"
9714            };
9715            pb.finish_with_message(msg);
9716        }
9717
9718        Ok(BalanceValidationResult {
9719            validated: true,
9720            is_balanced,
9721            entries_processed: stats.entries_processed,
9722            total_debits: stats.total_debits,
9723            total_credits: stats.total_credits,
9724            accounts_tracked: stats.accounts_tracked,
9725            companies_tracked: stats.companies_tracked,
9726            validation_errors: all_errors,
9727            has_unbalanced_entries: has_unbalanced,
9728        })
9729    }
9730
9731    /// Inject data quality variations into journal entries.
9732    ///
9733    /// Applies typos, missing values, and format variations to make
9734    /// the synthetic data more realistic for testing data cleaning pipelines.
9735    fn inject_data_quality(
9736        &mut self,
9737        entries: &mut [JournalEntry],
9738    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
9739        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9740
9741        // Build config from user-specified schema settings when data_quality is enabled;
9742        // otherwise fall back to the low-rate minimal() preset.
9743        let config = if self.config.data_quality.enabled {
9744            let dq = &self.config.data_quality;
9745            DataQualityConfig {
9746                enable_missing_values: dq.missing_values.enabled,
9747                missing_values: datasynth_generators::MissingValueConfig {
9748                    global_rate: dq.effective_missing_rate(),
9749                    ..Default::default()
9750                },
9751                enable_format_variations: dq.format_variations.enabled,
9752                format_variations: datasynth_generators::FormatVariationConfig {
9753                    date_variation_rate: dq.format_variations.dates.rate,
9754                    amount_variation_rate: dq.format_variations.amounts.rate,
9755                    identifier_variation_rate: dq.format_variations.identifiers.rate,
9756                    ..Default::default()
9757                },
9758                enable_duplicates: dq.duplicates.enabled,
9759                duplicates: datasynth_generators::DuplicateConfig {
9760                    duplicate_rate: dq.effective_duplicate_rate(),
9761                    ..Default::default()
9762                },
9763                enable_typos: dq.typos.enabled,
9764                typos: datasynth_generators::TypoConfig {
9765                    char_error_rate: dq.effective_typo_rate(),
9766                    ..Default::default()
9767                },
9768                enable_encoding_issues: dq.encoding_issues.enabled,
9769                encoding_issue_rate: dq.encoding_issues.rate,
9770                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
9771                track_statistics: true,
9772            }
9773        } else {
9774            DataQualityConfig::minimal()
9775        };
9776        let mut injector = DataQualityInjector::new(config);
9777
9778        // Wire country pack for locale-aware format baselines
9779        injector.set_country_pack(self.primary_pack().clone());
9780
9781        // Build context for missing value decisions
9782        let context = HashMap::new();
9783
9784        for entry in entries.iter_mut() {
9785            // Process header_text field (common target for typos)
9786            if let Some(text) = &entry.header.header_text {
9787                let processed = injector.process_text_field(
9788                    "header_text",
9789                    text,
9790                    &entry.header.document_id.to_string(),
9791                    &context,
9792                );
9793                match processed {
9794                    Some(new_text) if new_text != *text => {
9795                        entry.header.header_text = Some(new_text);
9796                    }
9797                    None => {
9798                        entry.header.header_text = None; // Missing value
9799                    }
9800                    _ => {}
9801                }
9802            }
9803
9804            // Process reference field
9805            if let Some(ref_text) = &entry.header.reference {
9806                let processed = injector.process_text_field(
9807                    "reference",
9808                    ref_text,
9809                    &entry.header.document_id.to_string(),
9810                    &context,
9811                );
9812                match processed {
9813                    Some(new_text) if new_text != *ref_text => {
9814                        entry.header.reference = Some(new_text);
9815                    }
9816                    None => {
9817                        entry.header.reference = None;
9818                    }
9819                    _ => {}
9820                }
9821            }
9822
9823            // Process user_persona field (potential for typos in user IDs)
9824            let user_persona = entry.header.user_persona.clone();
9825            if let Some(processed) = injector.process_text_field(
9826                "user_persona",
9827                &user_persona,
9828                &entry.header.document_id.to_string(),
9829                &context,
9830            ) {
9831                if processed != user_persona {
9832                    entry.header.user_persona = processed;
9833                }
9834            }
9835
9836            // Process line items
9837            for line in &mut entry.lines {
9838                // Process line description if present
9839                if let Some(ref text) = line.line_text {
9840                    let processed = injector.process_text_field(
9841                        "line_text",
9842                        text,
9843                        &entry.header.document_id.to_string(),
9844                        &context,
9845                    );
9846                    match processed {
9847                        Some(new_text) if new_text != *text => {
9848                            line.line_text = Some(new_text);
9849                        }
9850                        None => {
9851                            line.line_text = None;
9852                        }
9853                        _ => {}
9854                    }
9855                }
9856
9857                // Process cost_center if present
9858                if let Some(cc) = &line.cost_center {
9859                    let processed = injector.process_text_field(
9860                        "cost_center",
9861                        cc,
9862                        &entry.header.document_id.to_string(),
9863                        &context,
9864                    );
9865                    match processed {
9866                        Some(new_cc) if new_cc != *cc => {
9867                            line.cost_center = Some(new_cc);
9868                        }
9869                        None => {
9870                            line.cost_center = None;
9871                        }
9872                        _ => {}
9873                    }
9874                }
9875            }
9876
9877            if let Some(pb) = &pb {
9878                pb.inc(1);
9879            }
9880        }
9881
9882        if let Some(pb) = pb {
9883            pb.finish_with_message("Data quality injection complete");
9884        }
9885
9886        let quality_issues = injector.issues().to_vec();
9887        Ok((injector.stats().clone(), quality_issues))
9888    }
9889
9890    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
9891    ///
9892    /// Creates complete audit documentation for each company in the configuration,
9893    /// following ISA standards:
9894    /// - ISA 210/220: Engagement acceptance and terms
9895    /// - ISA 230: Audit documentation (workpapers)
9896    /// - ISA 265: Control deficiencies (findings)
9897    /// - ISA 315/330: Risk assessment and response
9898    /// - ISA 500: Audit evidence
9899    /// - ISA 200: Professional judgment
9900    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9901        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9902            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9903        let fiscal_year = start_date.year() as u16;
9904        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9905
9906        // Calculate rough total revenue from entries for materiality
9907        let total_revenue: rust_decimal::Decimal = entries
9908            .iter()
9909            .flat_map(|e| e.lines.iter())
9910            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9911            .map(|l| l.credit_amount)
9912            .sum();
9913
9914        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
9915        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9916
9917        let mut snapshot = AuditSnapshot::default();
9918
9919        // Initialize generators
9920        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9921        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9922        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9923        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9924        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9925        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9926        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9927        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9928        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9929        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9930        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9931        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9932
9933        // Get list of accounts from CoA for risk assessment
9934        let accounts: Vec<String> = self
9935            .coa
9936            .as_ref()
9937            .map(|coa| {
9938                coa.get_postable_accounts()
9939                    .iter()
9940                    .map(|acc| acc.account_code().to_string())
9941                    .collect()
9942            })
9943            .unwrap_or_default();
9944
9945        // Generate engagements for each company
9946        for (i, company) in self.config.companies.iter().enumerate() {
9947            // Calculate company-specific revenue (proportional to volume weight)
9948            let company_revenue = total_revenue
9949                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9950
9951            // Generate engagements for this company
9952            let engagements_for_company =
9953                self.phase_config.audit_engagements / self.config.companies.len().max(1);
9954            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9955                1
9956            } else {
9957                0
9958            };
9959
9960            for _eng_idx in 0..(engagements_for_company + extra) {
9961                // Generate the engagement
9962                let mut engagement = engagement_gen.generate_engagement(
9963                    &company.code,
9964                    &company.name,
9965                    fiscal_year,
9966                    period_end,
9967                    company_revenue,
9968                    None, // Use default engagement type
9969                );
9970
9971                // Replace synthetic team IDs with real employee IDs from master data
9972                if !self.master_data.employees.is_empty() {
9973                    let emp_count = self.master_data.employees.len();
9974                    // Use employee IDs deterministically based on engagement index
9975                    let base = (i * 10 + _eng_idx) % emp_count;
9976                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9977                        .employee_id
9978                        .clone();
9979                    engagement.engagement_manager_id = self.master_data.employees
9980                        [(base + 1) % emp_count]
9981                        .employee_id
9982                        .clone();
9983                    let real_team: Vec<String> = engagement
9984                        .team_member_ids
9985                        .iter()
9986                        .enumerate()
9987                        .map(|(j, _)| {
9988                            self.master_data.employees[(base + 2 + j) % emp_count]
9989                                .employee_id
9990                                .clone()
9991                        })
9992                        .collect();
9993                    engagement.team_member_ids = real_team;
9994                }
9995
9996                if let Some(pb) = &pb {
9997                    pb.inc(1);
9998                }
9999
10000                // Get team members from the engagement
10001                let team_members: Vec<String> = engagement.team_member_ids.clone();
10002
10003                // Generate workpapers for the engagement
10004                let workpapers =
10005                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10006
10007                for wp in &workpapers {
10008                    if let Some(pb) = &pb {
10009                        pb.inc(1);
10010                    }
10011
10012                    // Generate evidence for each workpaper
10013                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10014                        wp,
10015                        &team_members,
10016                        wp.preparer_date,
10017                    );
10018
10019                    for _ in &evidence {
10020                        if let Some(pb) = &pb {
10021                            pb.inc(1);
10022                        }
10023                    }
10024
10025                    snapshot.evidence.extend(evidence);
10026                }
10027
10028                // Generate risk assessments for the engagement
10029                let risks =
10030                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10031
10032                for _ in &risks {
10033                    if let Some(pb) = &pb {
10034                        pb.inc(1);
10035                    }
10036                }
10037                snapshot.risk_assessments.extend(risks);
10038
10039                // Generate findings for the engagement
10040                let findings = finding_gen.generate_findings_for_engagement(
10041                    &engagement,
10042                    &workpapers,
10043                    &team_members,
10044                );
10045
10046                for _ in &findings {
10047                    if let Some(pb) = &pb {
10048                        pb.inc(1);
10049                    }
10050                }
10051                snapshot.findings.extend(findings);
10052
10053                // Generate professional judgments for the engagement
10054                let judgments =
10055                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10056
10057                for _ in &judgments {
10058                    if let Some(pb) = &pb {
10059                        pb.inc(1);
10060                    }
10061                }
10062                snapshot.judgments.extend(judgments);
10063
10064                // ISA 505: External confirmations and responses
10065                let (confs, resps) =
10066                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10067                snapshot.confirmations.extend(confs);
10068                snapshot.confirmation_responses.extend(resps);
10069
10070                // ISA 330: Procedure steps per workpaper
10071                let team_pairs: Vec<(String, String)> = team_members
10072                    .iter()
10073                    .map(|id| {
10074                        let name = self
10075                            .master_data
10076                            .employees
10077                            .iter()
10078                            .find(|e| e.employee_id == *id)
10079                            .map(|e| e.display_name.clone())
10080                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10081                        (id.clone(), name)
10082                    })
10083                    .collect();
10084                for wp in &workpapers {
10085                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10086                    snapshot.procedure_steps.extend(steps);
10087                }
10088
10089                // ISA 530: Samples per workpaper
10090                for wp in &workpapers {
10091                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10092                        snapshot.samples.push(sample);
10093                    }
10094                }
10095
10096                // ISA 520: Analytical procedures
10097                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10098                snapshot.analytical_results.extend(analytical);
10099
10100                // ISA 610: Internal audit function and reports
10101                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10102                snapshot.ia_functions.push(ia_func);
10103                snapshot.ia_reports.extend(ia_reports);
10104
10105                // ISA 550: Related parties and transactions
10106                let vendor_names: Vec<String> = self
10107                    .master_data
10108                    .vendors
10109                    .iter()
10110                    .map(|v| v.name.clone())
10111                    .collect();
10112                let customer_names: Vec<String> = self
10113                    .master_data
10114                    .customers
10115                    .iter()
10116                    .map(|c| c.name.clone())
10117                    .collect();
10118                let (parties, rp_txns) =
10119                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10120                snapshot.related_parties.extend(parties);
10121                snapshot.related_party_transactions.extend(rp_txns);
10122
10123                // Add workpapers after findings since findings need them
10124                snapshot.workpapers.extend(workpapers);
10125
10126                // Generate audit scope record for this engagement (one per engagement)
10127                {
10128                    let scope_id = format!(
10129                        "SCOPE-{}-{}",
10130                        engagement.engagement_id.simple(),
10131                        &engagement.client_entity_id
10132                    );
10133                    let scope = datasynth_core::models::audit::AuditScope::new(
10134                        scope_id.clone(),
10135                        engagement.engagement_id.to_string(),
10136                        engagement.client_entity_id.clone(),
10137                        engagement.materiality,
10138                    );
10139                    // Wire scope_id back to engagement
10140                    let mut eng = engagement;
10141                    eng.scope_id = Some(scope_id);
10142                    snapshot.audit_scopes.push(scope);
10143                    snapshot.engagements.push(eng);
10144                }
10145            }
10146        }
10147
10148        // ----------------------------------------------------------------
10149        // ISA 600: Group audit — component auditors, plan, instructions, reports
10150        // ----------------------------------------------------------------
10151        if self.config.companies.len() > 1 {
10152            // Use materiality from the first engagement if available, otherwise
10153            // derive a reasonable figure from total revenue.
10154            let group_materiality = snapshot
10155                .engagements
10156                .first()
10157                .map(|e| e.materiality)
10158                .unwrap_or_else(|| {
10159                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10160                    total_revenue * pct
10161                });
10162
10163            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10164            let group_engagement_id = snapshot
10165                .engagements
10166                .first()
10167                .map(|e| e.engagement_id.to_string())
10168                .unwrap_or_else(|| "GROUP-ENG".to_string());
10169
10170            let component_snapshot = component_gen.generate(
10171                &self.config.companies,
10172                group_materiality,
10173                &group_engagement_id,
10174                period_end,
10175            );
10176
10177            snapshot.component_auditors = component_snapshot.component_auditors;
10178            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10179            snapshot.component_instructions = component_snapshot.component_instructions;
10180            snapshot.component_reports = component_snapshot.component_reports;
10181
10182            info!(
10183                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10184                snapshot.component_auditors.len(),
10185                snapshot.component_instructions.len(),
10186                snapshot.component_reports.len(),
10187            );
10188        }
10189
10190        // ----------------------------------------------------------------
10191        // ISA 210: Engagement letters — one per engagement
10192        // ----------------------------------------------------------------
10193        {
10194            let applicable_framework = self
10195                .config
10196                .accounting_standards
10197                .framework
10198                .as_ref()
10199                .map(|f| format!("{f:?}"))
10200                .unwrap_or_else(|| "IFRS".to_string());
10201
10202            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10203            let entity_count = self.config.companies.len();
10204
10205            for engagement in &snapshot.engagements {
10206                let company = self
10207                    .config
10208                    .companies
10209                    .iter()
10210                    .find(|c| c.code == engagement.client_entity_id);
10211                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10212                let letter_date = engagement.planning_start;
10213                let letter = letter_gen.generate(
10214                    &engagement.engagement_id.to_string(),
10215                    &engagement.client_name,
10216                    entity_count,
10217                    engagement.period_end_date,
10218                    currency,
10219                    &applicable_framework,
10220                    letter_date,
10221                );
10222                snapshot.engagement_letters.push(letter);
10223            }
10224
10225            info!(
10226                "ISA 210 engagement letters: {} generated",
10227                snapshot.engagement_letters.len()
10228            );
10229        }
10230
10231        // ----------------------------------------------------------------
10232        // ISA 560 / IAS 10: Subsequent events
10233        // ----------------------------------------------------------------
10234        {
10235            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10236            let entity_codes: Vec<String> = self
10237                .config
10238                .companies
10239                .iter()
10240                .map(|c| c.code.clone())
10241                .collect();
10242            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10243            info!(
10244                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10245                subsequent.len(),
10246                subsequent
10247                    .iter()
10248                    .filter(|e| matches!(
10249                        e.classification,
10250                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10251                    ))
10252                    .count(),
10253                subsequent
10254                    .iter()
10255                    .filter(|e| matches!(
10256                        e.classification,
10257                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10258                    ))
10259                    .count(),
10260            );
10261            snapshot.subsequent_events = subsequent;
10262        }
10263
10264        // ----------------------------------------------------------------
10265        // ISA 402: Service organization controls
10266        // ----------------------------------------------------------------
10267        {
10268            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10269            let entity_codes: Vec<String> = self
10270                .config
10271                .companies
10272                .iter()
10273                .map(|c| c.code.clone())
10274                .collect();
10275            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10276            info!(
10277                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10278                soc_snapshot.service_organizations.len(),
10279                soc_snapshot.soc_reports.len(),
10280                soc_snapshot.user_entity_controls.len(),
10281            );
10282            snapshot.service_organizations = soc_snapshot.service_organizations;
10283            snapshot.soc_reports = soc_snapshot.soc_reports;
10284            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10285        }
10286
10287        // ----------------------------------------------------------------
10288        // ISA 570: Going concern assessments
10289        // ----------------------------------------------------------------
10290        {
10291            use datasynth_generators::audit::going_concern_generator::{
10292                GoingConcernGenerator, GoingConcernInput,
10293            };
10294            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10295            let entity_codes: Vec<String> = self
10296                .config
10297                .companies
10298                .iter()
10299                .map(|c| c.code.clone())
10300                .collect();
10301            // Assessment date = period end + 75 days (typical sign-off window).
10302            let assessment_date = period_end + chrono::Duration::days(75);
10303            let period_label = format!("FY{}", period_end.year());
10304
10305            // Build financial inputs from actual journal entries.
10306            //
10307            // We derive approximate P&L, working capital, and operating cash flow
10308            // by aggregating GL account balances from the journal entry population.
10309            // Account ranges used (standard chart):
10310            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
10311            //   Expenses:        6xxx (debit-normal)
10312            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
10313            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
10314            //   Operating CF:    net income adjusted for D&A (rough proxy)
10315            let gc_inputs: Vec<GoingConcernInput> = self
10316                .config
10317                .companies
10318                .iter()
10319                .map(|company| {
10320                    let code = &company.code;
10321                    let mut revenue = rust_decimal::Decimal::ZERO;
10322                    let mut expenses = rust_decimal::Decimal::ZERO;
10323                    let mut current_assets = rust_decimal::Decimal::ZERO;
10324                    let mut current_liabs = rust_decimal::Decimal::ZERO;
10325                    let mut total_debt = rust_decimal::Decimal::ZERO;
10326
10327                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
10328                        for line in &je.lines {
10329                            let acct = line.gl_account.as_str();
10330                            let net = line.debit_amount - line.credit_amount;
10331                            if acct.starts_with('4') {
10332                                // Revenue accounts: credit-normal, so negative net = revenue earned
10333                                revenue -= net;
10334                            } else if acct.starts_with('6') {
10335                                // Expense accounts: debit-normal
10336                                expenses += net;
10337                            }
10338                            // Balance sheet accounts for working capital
10339                            if acct.starts_with('1') {
10340                                // Current asset accounts (1000–1499)
10341                                if let Ok(n) = acct.parse::<u32>() {
10342                                    if (1000..=1499).contains(&n) {
10343                                        current_assets += net;
10344                                    }
10345                                }
10346                            } else if acct.starts_with('2') {
10347                                if let Ok(n) = acct.parse::<u32>() {
10348                                    if (2000..=2499).contains(&n) {
10349                                        // Current liabilities
10350                                        current_liabs -= net; // credit-normal
10351                                    } else if (2500..=2999).contains(&n) {
10352                                        // Long-term debt
10353                                        total_debt -= net;
10354                                    }
10355                                }
10356                            }
10357                        }
10358                    }
10359
10360                    let net_income = revenue - expenses;
10361                    let working_capital = current_assets - current_liabs;
10362                    // Rough operating CF proxy: net income (full accrual CF calculation
10363                    // is done separately in the cash flow statement generator)
10364                    let operating_cash_flow = net_income;
10365
10366                    GoingConcernInput {
10367                        entity_code: code.clone(),
10368                        net_income,
10369                        working_capital,
10370                        operating_cash_flow,
10371                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10372                        assessment_date,
10373                    }
10374                })
10375                .collect();
10376
10377            let assessments = if gc_inputs.is_empty() {
10378                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10379            } else {
10380                gc_gen.generate_for_entities_with_inputs(
10381                    &entity_codes,
10382                    &gc_inputs,
10383                    assessment_date,
10384                    &period_label,
10385                )
10386            };
10387            info!(
10388                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10389                assessments.len(),
10390                assessments.iter().filter(|a| matches!(
10391                    a.auditor_conclusion,
10392                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10393                )).count(),
10394                assessments.iter().filter(|a| matches!(
10395                    a.auditor_conclusion,
10396                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10397                )).count(),
10398                assessments.iter().filter(|a| matches!(
10399                    a.auditor_conclusion,
10400                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10401                )).count(),
10402            );
10403            snapshot.going_concern_assessments = assessments;
10404        }
10405
10406        // ----------------------------------------------------------------
10407        // ISA 540: Accounting estimates
10408        // ----------------------------------------------------------------
10409        {
10410            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10411            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10412            let entity_codes: Vec<String> = self
10413                .config
10414                .companies
10415                .iter()
10416                .map(|c| c.code.clone())
10417                .collect();
10418            let estimates = est_gen.generate_for_entities(&entity_codes);
10419            info!(
10420                "ISA 540 accounting estimates: {} estimates across {} entities \
10421                 ({} with retrospective reviews, {} with auditor point estimates)",
10422                estimates.len(),
10423                entity_codes.len(),
10424                estimates
10425                    .iter()
10426                    .filter(|e| e.retrospective_review.is_some())
10427                    .count(),
10428                estimates
10429                    .iter()
10430                    .filter(|e| e.auditor_point_estimate.is_some())
10431                    .count(),
10432            );
10433            snapshot.accounting_estimates = estimates;
10434        }
10435
10436        // ----------------------------------------------------------------
10437        // ISA 700/701/705/706: Audit opinions (one per engagement)
10438        // ----------------------------------------------------------------
10439        {
10440            use datasynth_generators::audit::audit_opinion_generator::{
10441                AuditOpinionGenerator, AuditOpinionInput,
10442            };
10443
10444            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10445
10446            // Build inputs — one per engagement, linking findings and going concern.
10447            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10448                .engagements
10449                .iter()
10450                .map(|eng| {
10451                    // Collect findings for this engagement.
10452                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10453                        .findings
10454                        .iter()
10455                        .filter(|f| f.engagement_id == eng.engagement_id)
10456                        .cloned()
10457                        .collect();
10458
10459                    // Going concern for this entity.
10460                    let gc = snapshot
10461                        .going_concern_assessments
10462                        .iter()
10463                        .find(|g| g.entity_code == eng.client_entity_id)
10464                        .cloned();
10465
10466                    // Component reports relevant to this engagement.
10467                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
10468                        snapshot.component_reports.clone();
10469
10470                    let auditor = self
10471                        .master_data
10472                        .employees
10473                        .first()
10474                        .map(|e| e.display_name.clone())
10475                        .unwrap_or_else(|| "Global Audit LLP".into());
10476
10477                    let partner = self
10478                        .master_data
10479                        .employees
10480                        .get(1)
10481                        .map(|e| e.display_name.clone())
10482                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
10483
10484                    AuditOpinionInput {
10485                        entity_code: eng.client_entity_id.clone(),
10486                        entity_name: eng.client_name.clone(),
10487                        engagement_id: eng.engagement_id,
10488                        period_end: eng.period_end_date,
10489                        findings: eng_findings,
10490                        going_concern: gc,
10491                        component_reports: comp_reports,
10492                        // Mark as US-listed when audit standards include PCAOB.
10493                        is_us_listed: {
10494                            let fw = &self.config.audit_standards.isa_compliance.framework;
10495                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
10496                        },
10497                        auditor_name: auditor,
10498                        engagement_partner: partner,
10499                    }
10500                })
10501                .collect();
10502
10503            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
10504
10505            for go in &generated_opinions {
10506                snapshot
10507                    .key_audit_matters
10508                    .extend(go.key_audit_matters.clone());
10509            }
10510            snapshot.audit_opinions = generated_opinions
10511                .into_iter()
10512                .map(|go| go.opinion)
10513                .collect();
10514
10515            info!(
10516                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
10517                snapshot.audit_opinions.len(),
10518                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
10519                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
10520                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
10521                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
10522            );
10523        }
10524
10525        // ----------------------------------------------------------------
10526        // SOX 302 / 404 assessments
10527        // ----------------------------------------------------------------
10528        {
10529            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
10530
10531            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
10532
10533            for (i, company) in self.config.companies.iter().enumerate() {
10534                // Collect findings for this company's engagements.
10535                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
10536                    .engagements
10537                    .iter()
10538                    .filter(|e| e.client_entity_id == company.code)
10539                    .map(|e| e.engagement_id)
10540                    .collect();
10541
10542                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10543                    .findings
10544                    .iter()
10545                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
10546                    .cloned()
10547                    .collect();
10548
10549                // Derive executive names from employee list.
10550                let emp_count = self.master_data.employees.len();
10551                let ceo_name = if emp_count > 0 {
10552                    self.master_data.employees[i % emp_count]
10553                        .display_name
10554                        .clone()
10555                } else {
10556                    format!("CEO of {}", company.name)
10557                };
10558                let cfo_name = if emp_count > 1 {
10559                    self.master_data.employees[(i + 1) % emp_count]
10560                        .display_name
10561                        .clone()
10562                } else {
10563                    format!("CFO of {}", company.name)
10564                };
10565
10566                // Use engagement materiality if available.
10567                let materiality = snapshot
10568                    .engagements
10569                    .iter()
10570                    .find(|e| e.client_entity_id == company.code)
10571                    .map(|e| e.materiality)
10572                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
10573
10574                let input = SoxGeneratorInput {
10575                    company_code: company.code.clone(),
10576                    company_name: company.name.clone(),
10577                    fiscal_year,
10578                    period_end,
10579                    findings: company_findings,
10580                    ceo_name,
10581                    cfo_name,
10582                    materiality_threshold: materiality,
10583                    revenue_percent: rust_decimal::Decimal::from(100),
10584                    assets_percent: rust_decimal::Decimal::from(100),
10585                    significant_accounts: vec![
10586                        "Revenue".into(),
10587                        "Accounts Receivable".into(),
10588                        "Inventory".into(),
10589                        "Fixed Assets".into(),
10590                        "Accounts Payable".into(),
10591                    ],
10592                };
10593
10594                let (certs, assessment) = sox_gen.generate(&input);
10595                snapshot.sox_302_certifications.extend(certs);
10596                snapshot.sox_404_assessments.push(assessment);
10597            }
10598
10599            info!(
10600                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
10601                snapshot.sox_302_certifications.len(),
10602                snapshot.sox_404_assessments.len(),
10603                snapshot
10604                    .sox_404_assessments
10605                    .iter()
10606                    .filter(|a| a.icfr_effective)
10607                    .count(),
10608                snapshot
10609                    .sox_404_assessments
10610                    .iter()
10611                    .filter(|a| !a.icfr_effective)
10612                    .count(),
10613            );
10614        }
10615
10616        // ----------------------------------------------------------------
10617        // ISA 320: Materiality calculations (one per entity)
10618        // ----------------------------------------------------------------
10619        {
10620            use datasynth_generators::audit::materiality_generator::{
10621                MaterialityGenerator, MaterialityInput,
10622            };
10623
10624            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
10625
10626            // Compute per-company financials from JEs.
10627            // Asset accounts start with '1', revenue with '4',
10628            // expense accounts with '5' or '6'.
10629            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10630
10631            for company in &self.config.companies {
10632                let company_code = company.code.clone();
10633
10634                // Revenue: credit-side entries on 4xxx accounts
10635                let company_revenue: rust_decimal::Decimal = entries
10636                    .iter()
10637                    .filter(|e| e.company_code() == company_code)
10638                    .flat_map(|e| e.lines.iter())
10639                    .filter(|l| l.account_code.starts_with('4'))
10640                    .map(|l| l.credit_amount)
10641                    .sum();
10642
10643                // Total assets: debit balances on 1xxx accounts
10644                let total_assets: rust_decimal::Decimal = entries
10645                    .iter()
10646                    .filter(|e| e.company_code() == company_code)
10647                    .flat_map(|e| e.lines.iter())
10648                    .filter(|l| l.account_code.starts_with('1'))
10649                    .map(|l| l.debit_amount)
10650                    .sum();
10651
10652                // Expenses: debit-side entries on 5xxx/6xxx accounts
10653                let total_expenses: rust_decimal::Decimal = entries
10654                    .iter()
10655                    .filter(|e| e.company_code() == company_code)
10656                    .flat_map(|e| e.lines.iter())
10657                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10658                    .map(|l| l.debit_amount)
10659                    .sum();
10660
10661                // Equity: credit balances on 3xxx accounts
10662                let equity: rust_decimal::Decimal = entries
10663                    .iter()
10664                    .filter(|e| e.company_code() == company_code)
10665                    .flat_map(|e| e.lines.iter())
10666                    .filter(|l| l.account_code.starts_with('3'))
10667                    .map(|l| l.credit_amount)
10668                    .sum();
10669
10670                let pretax_income = company_revenue - total_expenses;
10671
10672                // If no company-specific data, fall back to proportional share
10673                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10674                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
10675                        .unwrap_or(rust_decimal::Decimal::ONE);
10676                    (
10677                        total_revenue * w,
10678                        total_revenue * w * rust_decimal::Decimal::from(3),
10679                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
10680                        total_revenue * w * rust_decimal::Decimal::from(2),
10681                    )
10682                } else {
10683                    (company_revenue, total_assets, pretax_income, equity)
10684                };
10685
10686                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
10687
10688                materiality_inputs.push(MaterialityInput {
10689                    entity_code: company_code,
10690                    period: format!("FY{}", fiscal_year),
10691                    revenue: rev,
10692                    pretax_income: pti,
10693                    total_assets: assets,
10694                    equity: eq,
10695                    gross_profit,
10696                });
10697            }
10698
10699            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10700
10701            info!(
10702                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10703                 {} total assets, {} equity benchmarks)",
10704                snapshot.materiality_calculations.len(),
10705                snapshot
10706                    .materiality_calculations
10707                    .iter()
10708                    .filter(|m| matches!(
10709                        m.benchmark,
10710                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10711                    ))
10712                    .count(),
10713                snapshot
10714                    .materiality_calculations
10715                    .iter()
10716                    .filter(|m| matches!(
10717                        m.benchmark,
10718                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10719                    ))
10720                    .count(),
10721                snapshot
10722                    .materiality_calculations
10723                    .iter()
10724                    .filter(|m| matches!(
10725                        m.benchmark,
10726                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10727                    ))
10728                    .count(),
10729                snapshot
10730                    .materiality_calculations
10731                    .iter()
10732                    .filter(|m| matches!(
10733                        m.benchmark,
10734                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10735                    ))
10736                    .count(),
10737            );
10738        }
10739
10740        // ----------------------------------------------------------------
10741        // ISA 315: Combined Risk Assessments (per entity, per account area)
10742        // ----------------------------------------------------------------
10743        {
10744            use datasynth_generators::audit::cra_generator::CraGenerator;
10745
10746            let mut cra_gen = CraGenerator::new(self.seed + 8315);
10747
10748            // Build entity → scope_id map from already-generated scopes
10749            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
10750                .audit_scopes
10751                .iter()
10752                .map(|s| (s.entity_code.clone(), s.id.clone()))
10753                .collect();
10754
10755            for company in &self.config.companies {
10756                let cras = cra_gen.generate_for_entity(&company.code, None);
10757                let scope_id = entity_scope_map.get(&company.code).cloned();
10758                let cras_with_scope: Vec<_> = cras
10759                    .into_iter()
10760                    .map(|mut cra| {
10761                        cra.scope_id = scope_id.clone();
10762                        cra
10763                    })
10764                    .collect();
10765                snapshot.combined_risk_assessments.extend(cras_with_scope);
10766            }
10767
10768            let significant_count = snapshot
10769                .combined_risk_assessments
10770                .iter()
10771                .filter(|c| c.significant_risk)
10772                .count();
10773            let high_cra_count = snapshot
10774                .combined_risk_assessments
10775                .iter()
10776                .filter(|c| {
10777                    matches!(
10778                        c.combined_risk,
10779                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10780                    )
10781                })
10782                .count();
10783
10784            info!(
10785                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10786                snapshot.combined_risk_assessments.len(),
10787                significant_count,
10788                high_cra_count,
10789            );
10790        }
10791
10792        // ----------------------------------------------------------------
10793        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
10794        // ----------------------------------------------------------------
10795        {
10796            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10797
10798            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10799
10800            // Group CRAs by entity and use per-entity tolerable error from materiality
10801            for company in &self.config.companies {
10802                let entity_code = company.code.clone();
10803
10804                // Find tolerable error for this entity (= performance materiality)
10805                let tolerable_error = snapshot
10806                    .materiality_calculations
10807                    .iter()
10808                    .find(|m| m.entity_code == entity_code)
10809                    .map(|m| m.tolerable_error);
10810
10811                // Collect CRAs for this entity
10812                let entity_cras: Vec<_> = snapshot
10813                    .combined_risk_assessments
10814                    .iter()
10815                    .filter(|c| c.entity_code == entity_code)
10816                    .cloned()
10817                    .collect();
10818
10819                if !entity_cras.is_empty() {
10820                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10821                    snapshot.sampling_plans.extend(plans);
10822                    snapshot.sampled_items.extend(items);
10823                }
10824            }
10825
10826            let misstatement_count = snapshot
10827                .sampled_items
10828                .iter()
10829                .filter(|i| i.misstatement_found)
10830                .count();
10831
10832            info!(
10833                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10834                snapshot.sampling_plans.len(),
10835                snapshot.sampled_items.len(),
10836                misstatement_count,
10837            );
10838        }
10839
10840        // ----------------------------------------------------------------
10841        // ISA 315: Significant Classes of Transactions (SCOTS)
10842        // ----------------------------------------------------------------
10843        {
10844            use datasynth_generators::audit::scots_generator::{
10845                ScotsGenerator, ScotsGeneratorConfig,
10846            };
10847
10848            let ic_enabled = self.config.intercompany.enabled;
10849
10850            let config = ScotsGeneratorConfig {
10851                intercompany_enabled: ic_enabled,
10852                ..ScotsGeneratorConfig::default()
10853            };
10854            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10855
10856            for company in &self.config.companies {
10857                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10858                snapshot
10859                    .significant_transaction_classes
10860                    .extend(entity_scots);
10861            }
10862
10863            let estimation_count = snapshot
10864                .significant_transaction_classes
10865                .iter()
10866                .filter(|s| {
10867                    matches!(
10868                        s.transaction_type,
10869                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10870                    )
10871                })
10872                .count();
10873
10874            info!(
10875                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10876                snapshot.significant_transaction_classes.len(),
10877                estimation_count,
10878            );
10879        }
10880
10881        // ----------------------------------------------------------------
10882        // ISA 520: Unusual Item Markers
10883        // ----------------------------------------------------------------
10884        {
10885            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10886
10887            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10888            let entity_codes: Vec<String> = self
10889                .config
10890                .companies
10891                .iter()
10892                .map(|c| c.code.clone())
10893                .collect();
10894            let unusual_flags =
10895                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10896            info!(
10897                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10898                unusual_flags.len(),
10899                unusual_flags
10900                    .iter()
10901                    .filter(|f| matches!(
10902                        f.severity,
10903                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10904                    ))
10905                    .count(),
10906                unusual_flags
10907                    .iter()
10908                    .filter(|f| matches!(
10909                        f.severity,
10910                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10911                    ))
10912                    .count(),
10913                unusual_flags
10914                    .iter()
10915                    .filter(|f| matches!(
10916                        f.severity,
10917                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10918                    ))
10919                    .count(),
10920            );
10921            snapshot.unusual_items = unusual_flags;
10922        }
10923
10924        // ----------------------------------------------------------------
10925        // ISA 520: Analytical Relationships
10926        // ----------------------------------------------------------------
10927        {
10928            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10929
10930            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10931            let entity_codes: Vec<String> = self
10932                .config
10933                .companies
10934                .iter()
10935                .map(|c| c.code.clone())
10936                .collect();
10937            let current_period_label = format!("FY{fiscal_year}");
10938            let prior_period_label = format!("FY{}", fiscal_year - 1);
10939            let analytical_rels = ar_gen.generate_for_entities(
10940                &entity_codes,
10941                entries,
10942                &current_period_label,
10943                &prior_period_label,
10944            );
10945            let out_of_range = analytical_rels
10946                .iter()
10947                .filter(|r| !r.within_expected_range)
10948                .count();
10949            info!(
10950                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10951                analytical_rels.len(),
10952                out_of_range,
10953            );
10954            snapshot.analytical_relationships = analytical_rels;
10955        }
10956
10957        if let Some(pb) = pb {
10958            pb.finish_with_message(format!(
10959                "Audit data: {} engagements, {} workpapers, {} evidence, \
10960                 {} confirmations, {} procedure steps, {} samples, \
10961                 {} analytical, {} IA funcs, {} related parties, \
10962                 {} component auditors, {} letters, {} subsequent events, \
10963                 {} service orgs, {} going concern, {} accounting estimates, \
10964                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10965                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10966                 {} unusual items, {} analytical relationships",
10967                snapshot.engagements.len(),
10968                snapshot.workpapers.len(),
10969                snapshot.evidence.len(),
10970                snapshot.confirmations.len(),
10971                snapshot.procedure_steps.len(),
10972                snapshot.samples.len(),
10973                snapshot.analytical_results.len(),
10974                snapshot.ia_functions.len(),
10975                snapshot.related_parties.len(),
10976                snapshot.component_auditors.len(),
10977                snapshot.engagement_letters.len(),
10978                snapshot.subsequent_events.len(),
10979                snapshot.service_organizations.len(),
10980                snapshot.going_concern_assessments.len(),
10981                snapshot.accounting_estimates.len(),
10982                snapshot.audit_opinions.len(),
10983                snapshot.key_audit_matters.len(),
10984                snapshot.sox_302_certifications.len(),
10985                snapshot.sox_404_assessments.len(),
10986                snapshot.materiality_calculations.len(),
10987                snapshot.combined_risk_assessments.len(),
10988                snapshot.sampling_plans.len(),
10989                snapshot.significant_transaction_classes.len(),
10990                snapshot.unusual_items.len(),
10991                snapshot.analytical_relationships.len(),
10992            ));
10993        }
10994
10995        // ----------------------------------------------------------------
10996        // PCAOB-ISA cross-reference mappings
10997        // ----------------------------------------------------------------
10998        // Always include the standard PCAOB-ISA mappings when audit generation is
10999        // enabled. These are static reference data (no randomness required) so we
11000        // call standard_mappings() directly.
11001        {
11002            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11003            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11004            debug!(
11005                "PCAOB-ISA mappings generated: {} mappings",
11006                snapshot.isa_pcaob_mappings.len()
11007            );
11008        }
11009
11010        // ----------------------------------------------------------------
11011        // ISA standard reference entries
11012        // ----------------------------------------------------------------
11013        // Emit flat ISA standard reference data (number, title, series) so
11014        // consumers get a machine-readable listing of all 34 ISA standards in
11015        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11016        {
11017            use datasynth_standards::audit::isa_reference::IsaStandard;
11018            snapshot.isa_mappings = IsaStandard::standard_entries();
11019            debug!(
11020                "ISA standard entries generated: {} standards",
11021                snapshot.isa_mappings.len()
11022            );
11023        }
11024
11025        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11026        // For each RPT, find the chronologically closest JE for the engagement's entity.
11027        {
11028            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11029                .engagements
11030                .iter()
11031                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11032                .collect();
11033
11034            for rpt in &mut snapshot.related_party_transactions {
11035                if rpt.journal_entry_id.is_some() {
11036                    continue; // already set
11037                }
11038                let entity = engagement_by_id
11039                    .get(&rpt.engagement_id.to_string())
11040                    .copied()
11041                    .unwrap_or("");
11042
11043                // Find closest JE by date in the entity's company
11044                let best_je = entries
11045                    .iter()
11046                    .filter(|je| je.header.company_code == entity)
11047                    .min_by_key(|je| {
11048                        (je.header.posting_date - rpt.transaction_date)
11049                            .num_days()
11050                            .abs()
11051                    });
11052
11053                if let Some(je) = best_je {
11054                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11055                }
11056            }
11057
11058            let linked = snapshot
11059                .related_party_transactions
11060                .iter()
11061                .filter(|t| t.journal_entry_id.is_some())
11062                .count();
11063            debug!(
11064                "Linked {}/{} related party transactions to journal entries",
11065                linked,
11066                snapshot.related_party_transactions.len()
11067            );
11068        }
11069
11070        Ok(snapshot)
11071    }
11072
11073    /// Export journal entries as graph data for ML training and network reconstruction.
11074    ///
11075    /// Builds a transaction graph where:
11076    /// - Nodes are GL accounts
11077    /// - Edges are money flows from credit to debit accounts
11078    /// - Edge attributes include amount, date, business process, anomaly flags
11079    fn export_graphs(
11080        &mut self,
11081        entries: &[JournalEntry],
11082        _coa: &Arc<ChartOfAccounts>,
11083        stats: &mut EnhancedGenerationStatistics,
11084    ) -> SynthResult<GraphExportSnapshot> {
11085        let pb = self.create_progress_bar(100, "Exporting Graphs");
11086
11087        let mut snapshot = GraphExportSnapshot::default();
11088
11089        // Get output directory
11090        let output_dir = self
11091            .output_path
11092            .clone()
11093            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11094        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11095
11096        // Process each graph type configuration
11097        for graph_type in &self.config.graph_export.graph_types {
11098            if let Some(pb) = &pb {
11099                pb.inc(10);
11100            }
11101
11102            // Build transaction graph
11103            let graph_config = TransactionGraphConfig {
11104                include_vendors: false,
11105                include_customers: false,
11106                create_debit_credit_edges: true,
11107                include_document_nodes: graph_type.include_document_nodes,
11108                min_edge_weight: graph_type.min_edge_weight,
11109                aggregate_parallel_edges: graph_type.aggregate_edges,
11110                framework: None,
11111            };
11112
11113            let mut builder = TransactionGraphBuilder::new(graph_config);
11114            builder.add_journal_entries(entries);
11115            let graph = builder.build();
11116
11117            // Update stats
11118            stats.graph_node_count += graph.node_count();
11119            stats.graph_edge_count += graph.edge_count();
11120
11121            if let Some(pb) = &pb {
11122                pb.inc(40);
11123            }
11124
11125            // Export to each configured format
11126            for format in &self.config.graph_export.formats {
11127                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
11128
11129                // Create output directory
11130                if let Err(e) = std::fs::create_dir_all(&format_dir) {
11131                    warn!("Failed to create graph output directory: {}", e);
11132                    continue;
11133                }
11134
11135                match format {
11136                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
11137                        let pyg_config = PyGExportConfig {
11138                            common: datasynth_graph::CommonExportConfig {
11139                                export_node_features: true,
11140                                export_edge_features: true,
11141                                export_node_labels: true,
11142                                export_edge_labels: true,
11143                                export_masks: true,
11144                                train_ratio: self.config.graph_export.train_ratio,
11145                                val_ratio: self.config.graph_export.validation_ratio,
11146                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11147                            },
11148                            one_hot_categoricals: false,
11149                        };
11150
11151                        let exporter = PyGExporter::new(pyg_config);
11152                        match exporter.export(&graph, &format_dir) {
11153                            Ok(metadata) => {
11154                                snapshot.exports.insert(
11155                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
11156                                    GraphExportInfo {
11157                                        name: graph_type.name.clone(),
11158                                        format: "pytorch_geometric".to_string(),
11159                                        output_path: format_dir.clone(),
11160                                        node_count: metadata.num_nodes,
11161                                        edge_count: metadata.num_edges,
11162                                    },
11163                                );
11164                                snapshot.graph_count += 1;
11165                            }
11166                            Err(e) => {
11167                                warn!("Failed to export PyTorch Geometric graph: {}", e);
11168                            }
11169                        }
11170                    }
11171                    datasynth_config::schema::GraphExportFormat::Neo4j => {
11172                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
11173
11174                        let neo4j_config = Neo4jExportConfig {
11175                            export_node_properties: true,
11176                            export_edge_properties: true,
11177                            export_features: true,
11178                            generate_cypher: true,
11179                            generate_admin_import: true,
11180                            database_name: "synth".to_string(),
11181                            cypher_batch_size: 1000,
11182                        };
11183
11184                        let exporter = Neo4jExporter::new(neo4j_config);
11185                        match exporter.export(&graph, &format_dir) {
11186                            Ok(metadata) => {
11187                                snapshot.exports.insert(
11188                                    format!("{}_{}", graph_type.name, "neo4j"),
11189                                    GraphExportInfo {
11190                                        name: graph_type.name.clone(),
11191                                        format: "neo4j".to_string(),
11192                                        output_path: format_dir.clone(),
11193                                        node_count: metadata.num_nodes,
11194                                        edge_count: metadata.num_edges,
11195                                    },
11196                                );
11197                                snapshot.graph_count += 1;
11198                            }
11199                            Err(e) => {
11200                                warn!("Failed to export Neo4j graph: {}", e);
11201                            }
11202                        }
11203                    }
11204                    datasynth_config::schema::GraphExportFormat::Dgl => {
11205                        use datasynth_graph::{DGLExportConfig, DGLExporter};
11206
11207                        let dgl_config = DGLExportConfig {
11208                            common: datasynth_graph::CommonExportConfig {
11209                                export_node_features: true,
11210                                export_edge_features: true,
11211                                export_node_labels: true,
11212                                export_edge_labels: true,
11213                                export_masks: true,
11214                                train_ratio: self.config.graph_export.train_ratio,
11215                                val_ratio: self.config.graph_export.validation_ratio,
11216                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
11217                            },
11218                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
11219                            include_pickle_script: true, // DGL ecosystem standard helper
11220                        };
11221
11222                        let exporter = DGLExporter::new(dgl_config);
11223                        match exporter.export(&graph, &format_dir) {
11224                            Ok(metadata) => {
11225                                snapshot.exports.insert(
11226                                    format!("{}_{}", graph_type.name, "dgl"),
11227                                    GraphExportInfo {
11228                                        name: graph_type.name.clone(),
11229                                        format: "dgl".to_string(),
11230                                        output_path: format_dir.clone(),
11231                                        node_count: metadata.common.num_nodes,
11232                                        edge_count: metadata.common.num_edges,
11233                                    },
11234                                );
11235                                snapshot.graph_count += 1;
11236                            }
11237                            Err(e) => {
11238                                warn!("Failed to export DGL graph: {}", e);
11239                            }
11240                        }
11241                    }
11242                    datasynth_config::schema::GraphExportFormat::RustGraph => {
11243                        use datasynth_graph::{
11244                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
11245                        };
11246
11247                        let rustgraph_config = RustGraphExportConfig {
11248                            include_features: true,
11249                            include_temporal: true,
11250                            include_labels: true,
11251                            source_name: "datasynth".to_string(),
11252                            batch_id: None,
11253                            output_format: RustGraphOutputFormat::JsonLines,
11254                            export_node_properties: true,
11255                            export_edge_properties: true,
11256                            pretty_print: false,
11257                        };
11258
11259                        let exporter = RustGraphExporter::new(rustgraph_config);
11260                        match exporter.export(&graph, &format_dir) {
11261                            Ok(metadata) => {
11262                                snapshot.exports.insert(
11263                                    format!("{}_{}", graph_type.name, "rustgraph"),
11264                                    GraphExportInfo {
11265                                        name: graph_type.name.clone(),
11266                                        format: "rustgraph".to_string(),
11267                                        output_path: format_dir.clone(),
11268                                        node_count: metadata.num_nodes,
11269                                        edge_count: metadata.num_edges,
11270                                    },
11271                                );
11272                                snapshot.graph_count += 1;
11273                            }
11274                            Err(e) => {
11275                                warn!("Failed to export RustGraph: {}", e);
11276                            }
11277                        }
11278                    }
11279                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
11280                        // Hypergraph export is handled separately in Phase 10b
11281                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
11282                    }
11283                }
11284            }
11285
11286            if let Some(pb) = &pb {
11287                pb.inc(40);
11288            }
11289        }
11290
11291        stats.graph_export_count = snapshot.graph_count;
11292        snapshot.exported = snapshot.graph_count > 0;
11293
11294        if let Some(pb) = pb {
11295            pb.finish_with_message(format!(
11296                "Graphs exported: {} graphs ({} nodes, {} edges)",
11297                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
11298            ));
11299        }
11300
11301        Ok(snapshot)
11302    }
11303
11304    /// Build additional graph types (banking, approval, entity) when relevant data
11305    /// is available. These run as a late phase because the data they need (banking
11306    /// snapshot, intercompany snapshot) is only generated after the main graph
11307    /// export phase.
11308    fn build_additional_graphs(
11309        &self,
11310        banking: &BankingSnapshot,
11311        intercompany: &IntercompanySnapshot,
11312        entries: &[JournalEntry],
11313        stats: &mut EnhancedGenerationStatistics,
11314    ) {
11315        let output_dir = self
11316            .output_path
11317            .clone()
11318            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11319        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11320
11321        // Banking graph: build when banking customers and transactions exist
11322        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
11323            info!("Phase 10c: Building banking network graph");
11324            let config = BankingGraphConfig::default();
11325            let mut builder = BankingGraphBuilder::new(config);
11326            builder.add_customers(&banking.customers);
11327            builder.add_accounts(&banking.accounts, &banking.customers);
11328            builder.add_transactions(&banking.transactions);
11329            let graph = builder.build();
11330
11331            let node_count = graph.node_count();
11332            let edge_count = graph.edge_count();
11333            stats.graph_node_count += node_count;
11334            stats.graph_edge_count += edge_count;
11335
11336            // Export as PyG if configured
11337            for format in &self.config.graph_export.formats {
11338                if matches!(
11339                    format,
11340                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11341                ) {
11342                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
11343                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11344                        warn!("Failed to create banking graph output dir: {}", e);
11345                        continue;
11346                    }
11347                    let pyg_config = PyGExportConfig::default();
11348                    let exporter = PyGExporter::new(pyg_config);
11349                    if let Err(e) = exporter.export(&graph, &format_dir) {
11350                        warn!("Failed to export banking graph as PyG: {}", e);
11351                    } else {
11352                        info!(
11353                            "Banking network graph exported: {} nodes, {} edges",
11354                            node_count, edge_count
11355                        );
11356                    }
11357                }
11358            }
11359        }
11360
11361        // Approval graph: build from journal entry approval workflows
11362        let approval_entries: Vec<_> = entries
11363            .iter()
11364            .filter(|je| je.header.approval_workflow.is_some())
11365            .collect();
11366
11367        if !approval_entries.is_empty() {
11368            info!(
11369                "Phase 10c: Building approval network graph ({} entries with approvals)",
11370                approval_entries.len()
11371            );
11372            let config = ApprovalGraphConfig::default();
11373            let mut builder = ApprovalGraphBuilder::new(config);
11374
11375            for je in &approval_entries {
11376                if let Some(ref wf) = je.header.approval_workflow {
11377                    for action in &wf.actions {
11378                        let record = datasynth_core::models::ApprovalRecord {
11379                            approval_id: format!(
11380                                "APR-{}-{}",
11381                                je.header.document_id, action.approval_level
11382                            ),
11383                            document_number: je.header.document_id.to_string(),
11384                            document_type: "JE".to_string(),
11385                            company_code: je.company_code().to_string(),
11386                            requester_id: wf.preparer_id.clone(),
11387                            requester_name: Some(wf.preparer_name.clone()),
11388                            approver_id: action.actor_id.clone(),
11389                            approver_name: action.actor_name.clone(),
11390                            approval_date: je.posting_date(),
11391                            action: format!("{:?}", action.action),
11392                            amount: wf.amount,
11393                            approval_limit: None,
11394                            comments: action.comments.clone(),
11395                            delegation_from: None,
11396                            is_auto_approved: false,
11397                        };
11398                        builder.add_approval(&record);
11399                    }
11400                }
11401            }
11402
11403            let graph = builder.build();
11404            let node_count = graph.node_count();
11405            let edge_count = graph.edge_count();
11406            stats.graph_node_count += node_count;
11407            stats.graph_edge_count += edge_count;
11408
11409            // Export as PyG if configured
11410            for format in &self.config.graph_export.formats {
11411                if matches!(
11412                    format,
11413                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11414                ) {
11415                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
11416                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11417                        warn!("Failed to create approval graph output dir: {}", e);
11418                        continue;
11419                    }
11420                    let pyg_config = PyGExportConfig::default();
11421                    let exporter = PyGExporter::new(pyg_config);
11422                    if let Err(e) = exporter.export(&graph, &format_dir) {
11423                        warn!("Failed to export approval graph as PyG: {}", e);
11424                    } else {
11425                        info!(
11426                            "Approval network graph exported: {} nodes, {} edges",
11427                            node_count, edge_count
11428                        );
11429                    }
11430                }
11431            }
11432        }
11433
11434        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
11435        if self.config.companies.len() >= 2 {
11436            info!(
11437                "Phase 10c: Building entity relationship graph ({} companies)",
11438                self.config.companies.len()
11439            );
11440
11441            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11442                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
11443
11444            // Map CompanyConfig → Company objects
11445            let parent_code = &self.config.companies[0].code;
11446            let mut companies: Vec<datasynth_core::models::Company> =
11447                Vec::with_capacity(self.config.companies.len());
11448
11449            // First company is the parent
11450            let first = &self.config.companies[0];
11451            companies.push(datasynth_core::models::Company::parent(
11452                &first.code,
11453                &first.name,
11454                &first.country,
11455                &first.currency,
11456            ));
11457
11458            // Remaining companies are subsidiaries (100% owned by parent)
11459            for cc in self.config.companies.iter().skip(1) {
11460                companies.push(datasynth_core::models::Company::subsidiary(
11461                    &cc.code,
11462                    &cc.name,
11463                    &cc.country,
11464                    &cc.currency,
11465                    parent_code,
11466                    rust_decimal::Decimal::from(100),
11467                ));
11468            }
11469
11470            // Build IntercompanyRelationship records (same logic as phase_intercompany)
11471            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
11472                self.config
11473                    .companies
11474                    .iter()
11475                    .skip(1)
11476                    .enumerate()
11477                    .map(|(i, cc)| {
11478                        let mut rel =
11479                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
11480                                format!("REL{:03}", i + 1),
11481                                parent_code.clone(),
11482                                cc.code.clone(),
11483                                rust_decimal::Decimal::from(100),
11484                                start_date,
11485                            );
11486                        rel.functional_currency = cc.currency.clone();
11487                        rel
11488                    })
11489                    .collect();
11490
11491            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
11492            builder.add_companies(&companies);
11493            builder.add_ownership_relationships(&relationships);
11494
11495            // Thread IC matched-pair transaction edges into the entity graph
11496            for pair in &intercompany.matched_pairs {
11497                builder.add_intercompany_edge(
11498                    &pair.seller_company,
11499                    &pair.buyer_company,
11500                    pair.amount,
11501                    &format!("{:?}", pair.transaction_type),
11502                );
11503            }
11504
11505            let graph = builder.build();
11506            let node_count = graph.node_count();
11507            let edge_count = graph.edge_count();
11508            stats.graph_node_count += node_count;
11509            stats.graph_edge_count += edge_count;
11510
11511            // Export as PyG if configured
11512            for format in &self.config.graph_export.formats {
11513                if matches!(
11514                    format,
11515                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
11516                ) {
11517                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
11518                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
11519                        warn!("Failed to create entity graph output dir: {}", e);
11520                        continue;
11521                    }
11522                    let pyg_config = PyGExportConfig::default();
11523                    let exporter = PyGExporter::new(pyg_config);
11524                    if let Err(e) = exporter.export(&graph, &format_dir) {
11525                        warn!("Failed to export entity graph as PyG: {}", e);
11526                    } else {
11527                        info!(
11528                            "Entity relationship graph exported: {} nodes, {} edges",
11529                            node_count, edge_count
11530                        );
11531                    }
11532                }
11533            }
11534        } else {
11535            debug!(
11536                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
11537                self.config.companies.len()
11538            );
11539        }
11540    }
11541
11542    /// Export a multi-layer hypergraph for RustGraph integration.
11543    ///
11544    /// Builds a 3-layer hypergraph:
11545    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
11546    /// - Layer 2: Process Events (all process family document flows + OCPM events)
11547    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
11548    #[allow(clippy::too_many_arguments)]
11549    fn export_hypergraph(
11550        &self,
11551        coa: &Arc<ChartOfAccounts>,
11552        entries: &[JournalEntry],
11553        document_flows: &DocumentFlowSnapshot,
11554        sourcing: &SourcingSnapshot,
11555        hr: &HrSnapshot,
11556        manufacturing: &ManufacturingSnapshot,
11557        banking: &BankingSnapshot,
11558        audit: &AuditSnapshot,
11559        financial_reporting: &FinancialReportingSnapshot,
11560        ocpm: &OcpmSnapshot,
11561        compliance: &ComplianceRegulationsSnapshot,
11562        stats: &mut EnhancedGenerationStatistics,
11563    ) -> SynthResult<HypergraphExportInfo> {
11564        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
11565        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
11566        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
11567        use datasynth_graph::models::hypergraph::AggregationStrategy;
11568
11569        let hg_settings = &self.config.graph_export.hypergraph;
11570
11571        // Parse aggregation strategy from config string
11572        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
11573            "truncate" => AggregationStrategy::Truncate,
11574            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
11575            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
11576            "importance_sample" => AggregationStrategy::ImportanceSample,
11577            _ => AggregationStrategy::PoolByCounterparty,
11578        };
11579
11580        let builder_config = HypergraphConfig {
11581            max_nodes: hg_settings.max_nodes,
11582            aggregation_strategy,
11583            include_coso: hg_settings.governance_layer.include_coso,
11584            include_controls: hg_settings.governance_layer.include_controls,
11585            include_sox: hg_settings.governance_layer.include_sox,
11586            include_vendors: hg_settings.governance_layer.include_vendors,
11587            include_customers: hg_settings.governance_layer.include_customers,
11588            include_employees: hg_settings.governance_layer.include_employees,
11589            include_p2p: hg_settings.process_layer.include_p2p,
11590            include_o2c: hg_settings.process_layer.include_o2c,
11591            include_s2c: hg_settings.process_layer.include_s2c,
11592            include_h2r: hg_settings.process_layer.include_h2r,
11593            include_mfg: hg_settings.process_layer.include_mfg,
11594            include_bank: hg_settings.process_layer.include_bank,
11595            include_audit: hg_settings.process_layer.include_audit,
11596            include_r2r: hg_settings.process_layer.include_r2r,
11597            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
11598            docs_per_counterparty_threshold: hg_settings
11599                .process_layer
11600                .docs_per_counterparty_threshold,
11601            include_accounts: hg_settings.accounting_layer.include_accounts,
11602            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
11603            include_cross_layer_edges: hg_settings.cross_layer.enabled,
11604            include_compliance: self.config.compliance_regulations.enabled,
11605            include_tax: true,
11606            include_treasury: true,
11607            include_esg: true,
11608            include_project: true,
11609            include_intercompany: true,
11610            include_temporal_events: true,
11611        };
11612
11613        let mut builder = HypergraphBuilder::new(builder_config);
11614
11615        // Layer 1: Governance & Controls
11616        builder.add_coso_framework();
11617
11618        // Add controls if available (generated during JE generation)
11619        // Controls are generated per-company; we use the standard set
11620        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
11621            let controls = InternalControl::standard_controls();
11622            builder.add_controls(&controls);
11623        }
11624
11625        // Add master data
11626        builder.add_vendors(&self.master_data.vendors);
11627        builder.add_customers(&self.master_data.customers);
11628        builder.add_employees(&self.master_data.employees);
11629
11630        // Layer 2: Process Events (all process families)
11631        builder.add_p2p_documents(
11632            &document_flows.purchase_orders,
11633            &document_flows.goods_receipts,
11634            &document_flows.vendor_invoices,
11635            &document_flows.payments,
11636        );
11637        builder.add_o2c_documents(
11638            &document_flows.sales_orders,
11639            &document_flows.deliveries,
11640            &document_flows.customer_invoices,
11641        );
11642        builder.add_s2c_documents(
11643            &sourcing.sourcing_projects,
11644            &sourcing.qualifications,
11645            &sourcing.rfx_events,
11646            &sourcing.bids,
11647            &sourcing.bid_evaluations,
11648            &sourcing.contracts,
11649        );
11650        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
11651        builder.add_mfg_documents(
11652            &manufacturing.production_orders,
11653            &manufacturing.quality_inspections,
11654            &manufacturing.cycle_counts,
11655        );
11656        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
11657        builder.add_audit_documents(
11658            &audit.engagements,
11659            &audit.workpapers,
11660            &audit.findings,
11661            &audit.evidence,
11662            &audit.risk_assessments,
11663            &audit.judgments,
11664        );
11665        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
11666
11667        // OCPM events as hyperedges
11668        if let Some(ref event_log) = ocpm.event_log {
11669            builder.add_ocpm_events(event_log);
11670        }
11671
11672        // Compliance regulations as cross-layer nodes
11673        if self.config.compliance_regulations.enabled
11674            && hg_settings.governance_layer.include_controls
11675        {
11676            // Reconstruct ComplianceStandard objects from the registry
11677            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
11678            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
11679                .standard_records
11680                .iter()
11681                .filter_map(|r| {
11682                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
11683                    registry.get(&sid).cloned()
11684                })
11685                .collect();
11686
11687            builder.add_compliance_regulations(
11688                &standards,
11689                &compliance.findings,
11690                &compliance.filings,
11691            );
11692        }
11693
11694        // Layer 3: Accounting Network
11695        builder.add_accounts(coa);
11696        builder.add_journal_entries_as_hyperedges(entries);
11697
11698        // Build the hypergraph
11699        let hypergraph = builder.build();
11700
11701        // Export
11702        let output_dir = self
11703            .output_path
11704            .clone()
11705            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11706        let hg_dir = output_dir
11707            .join(&self.config.graph_export.output_subdirectory)
11708            .join(&hg_settings.output_subdirectory);
11709
11710        // Branch on output format
11711        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
11712            "unified" => {
11713                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11714                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11715                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
11716                })?;
11717                (
11718                    metadata.num_nodes,
11719                    metadata.num_edges,
11720                    metadata.num_hyperedges,
11721                )
11722            }
11723            _ => {
11724                // "native" or any unrecognized format → use existing exporter
11725                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
11726                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11727                    SynthError::generation(format!("Hypergraph export failed: {e}"))
11728                })?;
11729                (
11730                    metadata.num_nodes,
11731                    metadata.num_edges,
11732                    metadata.num_hyperedges,
11733                )
11734            }
11735        };
11736
11737        // Stream to RustGraph ingest endpoint if configured
11738        #[cfg(feature = "streaming")]
11739        if let Some(ref target_url) = hg_settings.stream_target {
11740            use crate::stream_client::{StreamClient, StreamConfig};
11741            use std::io::Write as _;
11742
11743            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
11744            let stream_config = StreamConfig {
11745                target_url: target_url.clone(),
11746                batch_size: hg_settings.stream_batch_size,
11747                api_key,
11748                ..StreamConfig::default()
11749            };
11750
11751            match StreamClient::new(stream_config) {
11752                Ok(mut client) => {
11753                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11754                    match exporter.export_to_writer(&hypergraph, &mut client) {
11755                        Ok(_) => {
11756                            if let Err(e) = client.flush() {
11757                                warn!("Failed to flush stream client: {}", e);
11758                            } else {
11759                                info!("Streamed {} records to {}", client.total_sent(), target_url);
11760                            }
11761                        }
11762                        Err(e) => {
11763                            warn!("Streaming export failed: {}", e);
11764                        }
11765                    }
11766                }
11767                Err(e) => {
11768                    warn!("Failed to create stream client: {}", e);
11769                }
11770            }
11771        }
11772
11773        // Update stats
11774        stats.graph_node_count += num_nodes;
11775        stats.graph_edge_count += num_edges;
11776        stats.graph_export_count += 1;
11777
11778        Ok(HypergraphExportInfo {
11779            node_count: num_nodes,
11780            edge_count: num_edges,
11781            hyperedge_count: num_hyperedges,
11782            output_path: hg_dir,
11783        })
11784    }
11785
11786    /// Generate banking KYC/AML data.
11787    ///
11788    /// Creates banking customers, accounts, and transactions with AML typology injection.
11789    /// Uses the BankingOrchestrator from synth-banking crate.
11790    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
11791        let pb = self.create_progress_bar(100, "Generating Banking Data");
11792
11793        // Build the banking orchestrator from config
11794        let orchestrator = BankingOrchestratorBuilder::new()
11795            .config(self.config.banking.clone())
11796            .seed(self.seed + 9000)
11797            .country_pack(self.primary_pack().clone())
11798            .build();
11799
11800        if let Some(pb) = &pb {
11801            pb.inc(10);
11802        }
11803
11804        // Generate the banking data
11805        let result = orchestrator.generate();
11806
11807        if let Some(pb) = &pb {
11808            pb.inc(90);
11809            pb.finish_with_message(format!(
11810                "Banking: {} customers, {} transactions",
11811                result.customers.len(),
11812                result.transactions.len()
11813            ));
11814        }
11815
11816        // Cross-reference banking customers with core master data so that
11817        // banking customer names align with the enterprise customer list.
11818        // We rotate through core customers, overlaying their name and country
11819        // onto the generated banking customers where possible.
11820        let mut banking_customers = result.customers;
11821        let core_customers = &self.master_data.customers;
11822        if !core_customers.is_empty() {
11823            for (i, bc) in banking_customers.iter_mut().enumerate() {
11824                let core = &core_customers[i % core_customers.len()];
11825                bc.name = CustomerName::business(&core.name);
11826                bc.residence_country = core.country.clone();
11827                bc.enterprise_customer_id = Some(core.customer_id.clone());
11828            }
11829            debug!(
11830                "Cross-referenced {} banking customers with {} core customers",
11831                banking_customers.len(),
11832                core_customers.len()
11833            );
11834        }
11835
11836        Ok(BankingSnapshot {
11837            customers: banking_customers,
11838            accounts: result.accounts,
11839            transactions: result.transactions,
11840            transaction_labels: result.transaction_labels,
11841            customer_labels: result.customer_labels,
11842            account_labels: result.account_labels,
11843            relationship_labels: result.relationship_labels,
11844            narratives: result.narratives,
11845            suspicious_count: result.stats.suspicious_count,
11846            scenario_count: result.scenarios.len(),
11847        })
11848    }
11849
11850    /// Calculate total transactions to generate.
11851    fn calculate_total_transactions(&self) -> u64 {
11852        let months = self.config.global.period_months as f64;
11853        self.config
11854            .companies
11855            .iter()
11856            .map(|c| {
11857                let annual = c.annual_transaction_volume.count() as f64;
11858                let weighted = annual * c.volume_weight;
11859                (weighted * months / 12.0) as u64
11860            })
11861            .sum()
11862    }
11863
11864    /// Create a progress bar if progress display is enabled.
11865    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
11866        if !self.phase_config.show_progress {
11867            return None;
11868        }
11869
11870        let pb = if let Some(mp) = &self.multi_progress {
11871            mp.add(ProgressBar::new(total))
11872        } else {
11873            ProgressBar::new(total)
11874        };
11875
11876        pb.set_style(
11877            ProgressStyle::default_bar()
11878                .template(&format!(
11879                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
11880                ))
11881                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
11882                .progress_chars("#>-"),
11883        );
11884
11885        Some(pb)
11886    }
11887
11888    /// Get the generated chart of accounts.
11889    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
11890        self.coa.clone()
11891    }
11892
11893    /// Get the generated master data.
11894    pub fn get_master_data(&self) -> &MasterDataSnapshot {
11895        &self.master_data
11896    }
11897
11898    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
11899    fn phase_compliance_regulations(
11900        &mut self,
11901        _stats: &mut EnhancedGenerationStatistics,
11902    ) -> SynthResult<ComplianceRegulationsSnapshot> {
11903        if !self.phase_config.generate_compliance_regulations {
11904            return Ok(ComplianceRegulationsSnapshot::default());
11905        }
11906
11907        info!("Phase: Generating Compliance Regulations Data");
11908
11909        let cr_config = &self.config.compliance_regulations;
11910
11911        // Determine jurisdictions: from config or inferred from companies
11912        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
11913            self.config
11914                .companies
11915                .iter()
11916                .map(|c| c.country.clone())
11917                .collect::<std::collections::HashSet<_>>()
11918                .into_iter()
11919                .collect()
11920        } else {
11921            cr_config.jurisdictions.clone()
11922        };
11923
11924        // Determine reference date
11925        let fallback_date =
11926            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
11927        let reference_date = cr_config
11928            .reference_date
11929            .as_ref()
11930            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
11931            .unwrap_or_else(|| {
11932                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11933                    .unwrap_or(fallback_date)
11934            });
11935
11936        // Generate standards registry data
11937        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
11938        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
11939        let cross_reference_records = reg_gen.generate_cross_reference_records();
11940        let jurisdiction_records =
11941            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
11942
11943        info!(
11944            "  Standards: {} records, {} cross-references, {} jurisdictions",
11945            standard_records.len(),
11946            cross_reference_records.len(),
11947            jurisdiction_records.len()
11948        );
11949
11950        // Generate audit procedures (if enabled)
11951        let audit_procedures = if cr_config.audit_procedures.enabled {
11952            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
11953                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
11954                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
11955                confidence_level: cr_config.audit_procedures.confidence_level,
11956                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
11957            };
11958            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
11959                self.seed + 9000,
11960                proc_config,
11961            );
11962            let registry = reg_gen.registry();
11963            let mut all_procs = Vec::new();
11964            for jurisdiction in &jurisdictions {
11965                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
11966                all_procs.extend(procs);
11967            }
11968            info!("  Audit procedures: {}", all_procs.len());
11969            all_procs
11970        } else {
11971            Vec::new()
11972        };
11973
11974        // Generate compliance findings (if enabled)
11975        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
11976            let finding_config =
11977                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
11978                    finding_rate: cr_config.findings.finding_rate,
11979                    material_weakness_rate: cr_config.findings.material_weakness_rate,
11980                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
11981                    generate_remediation: cr_config.findings.generate_remediation,
11982                };
11983            let mut finding_gen =
11984                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
11985                    self.seed + 9100,
11986                    finding_config,
11987                );
11988            let mut all_findings = Vec::new();
11989            for company in &self.config.companies {
11990                let company_findings =
11991                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
11992                all_findings.extend(company_findings);
11993            }
11994            info!("  Compliance findings: {}", all_findings.len());
11995            all_findings
11996        } else {
11997            Vec::new()
11998        };
11999
12000        // Generate regulatory filings (if enabled)
12001        let filings = if cr_config.filings.enabled {
12002            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12003                filing_types: cr_config.filings.filing_types.clone(),
12004                generate_status_progression: cr_config.filings.generate_status_progression,
12005            };
12006            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12007                self.seed + 9200,
12008                filing_config,
12009            );
12010            let company_codes: Vec<String> = self
12011                .config
12012                .companies
12013                .iter()
12014                .map(|c| c.code.clone())
12015                .collect();
12016            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12017                .unwrap_or(fallback_date);
12018            let filings = filing_gen.generate_filings(
12019                &company_codes,
12020                &jurisdictions,
12021                start_date,
12022                self.config.global.period_months,
12023            );
12024            info!("  Regulatory filings: {}", filings.len());
12025            filings
12026        } else {
12027            Vec::new()
12028        };
12029
12030        // Build compliance graph (if enabled)
12031        let compliance_graph = if cr_config.graph.enabled {
12032            let graph_config = datasynth_graph::ComplianceGraphConfig {
12033                include_standard_nodes: cr_config.graph.include_compliance_nodes,
12034                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12035                include_cross_references: cr_config.graph.include_cross_references,
12036                include_supersession_edges: cr_config.graph.include_supersession_edges,
12037                include_account_links: cr_config.graph.include_account_links,
12038                include_control_links: cr_config.graph.include_control_links,
12039                include_company_links: cr_config.graph.include_company_links,
12040            };
12041            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12042
12043            // Add standard nodes
12044            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12045                .iter()
12046                .map(|r| datasynth_graph::StandardNodeInput {
12047                    standard_id: r.standard_id.clone(),
12048                    title: r.title.clone(),
12049                    category: r.category.clone(),
12050                    domain: r.domain.clone(),
12051                    is_active: r.is_active,
12052                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
12053                    applicable_account_types: r.applicable_account_types.clone(),
12054                    applicable_processes: r.applicable_processes.clone(),
12055                })
12056                .collect();
12057            builder.add_standards(&standard_inputs);
12058
12059            // Add jurisdiction nodes
12060            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12061                jurisdiction_records
12062                    .iter()
12063                    .map(|r| datasynth_graph::JurisdictionNodeInput {
12064                        country_code: r.country_code.clone(),
12065                        country_name: r.country_name.clone(),
12066                        framework: r.accounting_framework.clone(),
12067                        standard_count: r.standard_count,
12068                        tax_rate: r.statutory_tax_rate,
12069                    })
12070                    .collect();
12071            builder.add_jurisdictions(&jurisdiction_inputs);
12072
12073            // Add cross-reference edges
12074            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12075                cross_reference_records
12076                    .iter()
12077                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12078                        from_standard: r.from_standard.clone(),
12079                        to_standard: r.to_standard.clone(),
12080                        relationship: r.relationship.clone(),
12081                        convergence_level: r.convergence_level,
12082                    })
12083                    .collect();
12084            builder.add_cross_references(&xref_inputs);
12085
12086            // Add jurisdiction→standard mappings
12087            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12088                .iter()
12089                .map(|r| datasynth_graph::JurisdictionMappingInput {
12090                    country_code: r.jurisdiction.clone(),
12091                    standard_id: r.standard_id.clone(),
12092                })
12093                .collect();
12094            builder.add_jurisdiction_mappings(&mapping_inputs);
12095
12096            // Add procedure nodes
12097            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
12098                .iter()
12099                .map(|p| datasynth_graph::ProcedureNodeInput {
12100                    procedure_id: p.procedure_id.clone(),
12101                    standard_id: p.standard_id.clone(),
12102                    procedure_type: p.procedure_type.clone(),
12103                    sample_size: p.sample_size,
12104                    confidence_level: p.confidence_level,
12105                })
12106                .collect();
12107            builder.add_procedures(&proc_inputs);
12108
12109            // Add finding nodes
12110            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
12111                .iter()
12112                .map(|f| datasynth_graph::FindingNodeInput {
12113                    finding_id: f.finding_id.to_string(),
12114                    standard_id: f
12115                        .related_standards
12116                        .first()
12117                        .map(|s| s.as_str().to_string())
12118                        .unwrap_or_default(),
12119                    severity: f.severity.to_string(),
12120                    deficiency_level: f.deficiency_level.to_string(),
12121                    severity_score: f.deficiency_level.severity_score(),
12122                    control_id: f.control_id.clone(),
12123                    affected_accounts: f.affected_accounts.clone(),
12124                })
12125                .collect();
12126            builder.add_findings(&finding_inputs);
12127
12128            // Cross-domain: link standards to accounts from chart of accounts
12129            if cr_config.graph.include_account_links {
12130                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12131                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
12132                for std_record in &standard_records {
12133                    if let Some(std_obj) =
12134                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
12135                            &std_record.standard_id,
12136                        ))
12137                    {
12138                        for acct_type in &std_obj.applicable_account_types {
12139                            account_links.push(datasynth_graph::AccountLinkInput {
12140                                standard_id: std_record.standard_id.clone(),
12141                                account_code: acct_type.clone(),
12142                                account_name: acct_type.clone(),
12143                            });
12144                        }
12145                    }
12146                }
12147                builder.add_account_links(&account_links);
12148            }
12149
12150            // Cross-domain: link standards to internal controls
12151            if cr_config.graph.include_control_links {
12152                let mut control_links = Vec::new();
12153                // SOX/PCAOB standards link to all controls
12154                let sox_like_ids: Vec<String> = standard_records
12155                    .iter()
12156                    .filter(|r| {
12157                        r.standard_id.starts_with("SOX")
12158                            || r.standard_id.starts_with("PCAOB-AS-2201")
12159                    })
12160                    .map(|r| r.standard_id.clone())
12161                    .collect();
12162                // Get control IDs from config (C001-C060 standard controls)
12163                let control_ids = [
12164                    ("C001", "Cash Controls"),
12165                    ("C002", "Large Transaction Approval"),
12166                    ("C010", "PO Approval"),
12167                    ("C011", "Three-Way Match"),
12168                    ("C020", "Revenue Recognition"),
12169                    ("C021", "Credit Check"),
12170                    ("C030", "Manual JE Approval"),
12171                    ("C031", "Period Close Review"),
12172                    ("C032", "Account Reconciliation"),
12173                    ("C040", "Payroll Processing"),
12174                    ("C050", "Fixed Asset Capitalization"),
12175                    ("C060", "Intercompany Elimination"),
12176                ];
12177                for sox_id in &sox_like_ids {
12178                    for (ctrl_id, ctrl_name) in &control_ids {
12179                        control_links.push(datasynth_graph::ControlLinkInput {
12180                            standard_id: sox_id.clone(),
12181                            control_id: ctrl_id.to_string(),
12182                            control_name: ctrl_name.to_string(),
12183                        });
12184                    }
12185                }
12186                builder.add_control_links(&control_links);
12187            }
12188
12189            // Cross-domain: filing nodes with company links
12190            if cr_config.graph.include_company_links {
12191                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
12192                    .iter()
12193                    .enumerate()
12194                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
12195                        filing_id: format!("F{:04}", i + 1),
12196                        filing_type: f.filing_type.to_string(),
12197                        company_code: f.company_code.clone(),
12198                        jurisdiction: f.jurisdiction.clone(),
12199                        status: format!("{:?}", f.status),
12200                    })
12201                    .collect();
12202                builder.add_filings(&filing_inputs);
12203            }
12204
12205            let graph = builder.build();
12206            info!(
12207                "  Compliance graph: {} nodes, {} edges",
12208                graph.nodes.len(),
12209                graph.edges.len()
12210            );
12211            Some(graph)
12212        } else {
12213            None
12214        };
12215
12216        self.check_resources_with_log("post-compliance-regulations")?;
12217
12218        Ok(ComplianceRegulationsSnapshot {
12219            standard_records,
12220            cross_reference_records,
12221            jurisdiction_records,
12222            audit_procedures,
12223            findings,
12224            filings,
12225            compliance_graph,
12226        })
12227    }
12228
12229    /// Build a lineage graph describing config → phase → output relationships.
12230    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
12231        use super::lineage::LineageGraphBuilder;
12232
12233        let mut builder = LineageGraphBuilder::new();
12234
12235        // Config sections
12236        builder.add_config_section("config:global", "Global Config");
12237        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
12238        builder.add_config_section("config:transactions", "Transaction Config");
12239
12240        // Generator phases
12241        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
12242        builder.add_generator_phase("phase:je", "Journal Entry Generation");
12243
12244        // Config → phase edges
12245        builder.configured_by("phase:coa", "config:chart_of_accounts");
12246        builder.configured_by("phase:je", "config:transactions");
12247
12248        // Output files
12249        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
12250        builder.produced_by("output:je", "phase:je");
12251
12252        // Optional phases based on config
12253        if self.phase_config.generate_master_data {
12254            builder.add_config_section("config:master_data", "Master Data Config");
12255            builder.add_generator_phase("phase:master_data", "Master Data Generation");
12256            builder.configured_by("phase:master_data", "config:master_data");
12257            builder.input_to("phase:master_data", "phase:je");
12258        }
12259
12260        if self.phase_config.generate_document_flows {
12261            builder.add_config_section("config:document_flows", "Document Flow Config");
12262            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
12263            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
12264            builder.configured_by("phase:p2p", "config:document_flows");
12265            builder.configured_by("phase:o2c", "config:document_flows");
12266
12267            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
12268            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
12269            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
12270            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
12271            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
12272
12273            builder.produced_by("output:po", "phase:p2p");
12274            builder.produced_by("output:gr", "phase:p2p");
12275            builder.produced_by("output:vi", "phase:p2p");
12276            builder.produced_by("output:so", "phase:o2c");
12277            builder.produced_by("output:ci", "phase:o2c");
12278        }
12279
12280        if self.phase_config.inject_anomalies {
12281            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
12282            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
12283            builder.configured_by("phase:anomaly", "config:fraud");
12284            builder.add_output_file(
12285                "output:labels",
12286                "Anomaly Labels",
12287                "labels/anomaly_labels.csv",
12288            );
12289            builder.produced_by("output:labels", "phase:anomaly");
12290        }
12291
12292        if self.phase_config.generate_audit {
12293            builder.add_config_section("config:audit", "Audit Config");
12294            builder.add_generator_phase("phase:audit", "Audit Data Generation");
12295            builder.configured_by("phase:audit", "config:audit");
12296        }
12297
12298        if self.phase_config.generate_banking {
12299            builder.add_config_section("config:banking", "Banking Config");
12300            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
12301            builder.configured_by("phase:banking", "config:banking");
12302        }
12303
12304        if self.config.llm.enabled {
12305            builder.add_config_section("config:llm", "LLM Enrichment Config");
12306            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
12307            builder.configured_by("phase:llm_enrichment", "config:llm");
12308        }
12309
12310        if self.config.diffusion.enabled {
12311            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
12312            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
12313            builder.configured_by("phase:diffusion", "config:diffusion");
12314        }
12315
12316        if self.config.causal.enabled {
12317            builder.add_config_section("config:causal", "Causal Generation Config");
12318            builder.add_generator_phase("phase:causal", "Causal Overlay");
12319            builder.configured_by("phase:causal", "config:causal");
12320        }
12321
12322        builder.build()
12323    }
12324
12325    // -----------------------------------------------------------------------
12326    // Trial-balance helpers used to replace hardcoded proxy values
12327    // -----------------------------------------------------------------------
12328
12329    /// Compute total revenue for a company from its journal entries.
12330    ///
12331    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
12332    /// net credits on all revenue-account lines filtered to `company_code`.
12333    fn compute_company_revenue(
12334        entries: &[JournalEntry],
12335        company_code: &str,
12336    ) -> rust_decimal::Decimal {
12337        use rust_decimal::Decimal;
12338        let mut revenue = Decimal::ZERO;
12339        for je in entries {
12340            if je.header.company_code != company_code {
12341                continue;
12342            }
12343            for line in &je.lines {
12344                if line.gl_account.starts_with('4') {
12345                    // Revenue is credit-normal
12346                    revenue += line.credit_amount - line.debit_amount;
12347                }
12348            }
12349        }
12350        revenue.max(Decimal::ZERO)
12351    }
12352
12353    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
12354    ///
12355    /// Asset accounts start with "1"; liability accounts start with "2".
12356    fn compute_entity_net_assets(
12357        entries: &[JournalEntry],
12358        entity_code: &str,
12359    ) -> rust_decimal::Decimal {
12360        use rust_decimal::Decimal;
12361        let mut asset_net = Decimal::ZERO;
12362        let mut liability_net = Decimal::ZERO;
12363        for je in entries {
12364            if je.header.company_code != entity_code {
12365                continue;
12366            }
12367            for line in &je.lines {
12368                if line.gl_account.starts_with('1') {
12369                    asset_net += line.debit_amount - line.credit_amount;
12370                } else if line.gl_account.starts_with('2') {
12371                    liability_net += line.credit_amount - line.debit_amount;
12372                }
12373            }
12374        }
12375        asset_net - liability_net
12376    }
12377}
12378
12379/// Get the directory name for a graph export format.
12380fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
12381    match format {
12382        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
12383        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
12384        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
12385        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
12386        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
12387    }
12388}
12389
12390#[cfg(test)]
12391#[allow(clippy::unwrap_used)]
12392mod tests {
12393    use super::*;
12394    use datasynth_config::schema::*;
12395
12396    fn create_test_config() -> GeneratorConfig {
12397        GeneratorConfig {
12398            global: GlobalConfig {
12399                industry: IndustrySector::Manufacturing,
12400                start_date: "2024-01-01".to_string(),
12401                period_months: 1,
12402                seed: Some(42),
12403                parallel: false,
12404                group_currency: "USD".to_string(),
12405                presentation_currency: None,
12406                worker_threads: 0,
12407                memory_limit_mb: 0,
12408                fiscal_year_months: None,
12409            },
12410            companies: vec![CompanyConfig {
12411                code: "1000".to_string(),
12412                name: "Test Company".to_string(),
12413                currency: "USD".to_string(),
12414                functional_currency: None,
12415                country: "US".to_string(),
12416                annual_transaction_volume: TransactionVolume::TenK,
12417                volume_weight: 1.0,
12418                fiscal_year_variant: "K4".to_string(),
12419            }],
12420            chart_of_accounts: ChartOfAccountsConfig {
12421                complexity: CoAComplexity::Small,
12422                industry_specific: true,
12423                custom_accounts: None,
12424                min_hierarchy_depth: 2,
12425                max_hierarchy_depth: 4,
12426            },
12427            transactions: TransactionConfig::default(),
12428            output: OutputConfig::default(),
12429            fraud: FraudConfig::default(),
12430            internal_controls: InternalControlsConfig::default(),
12431            business_processes: BusinessProcessConfig::default(),
12432            user_personas: UserPersonaConfig::default(),
12433            templates: TemplateConfig::default(),
12434            approval: ApprovalConfig::default(),
12435            departments: DepartmentConfig::default(),
12436            master_data: MasterDataConfig::default(),
12437            document_flows: DocumentFlowConfig::default(),
12438            intercompany: IntercompanyConfig::default(),
12439            balance: BalanceConfig::default(),
12440            ocpm: OcpmConfig::default(),
12441            audit: AuditGenerationConfig::default(),
12442            banking: datasynth_banking::BankingConfig::default(),
12443            data_quality: DataQualitySchemaConfig::default(),
12444            scenario: ScenarioConfig::default(),
12445            temporal: TemporalDriftConfig::default(),
12446            graph_export: GraphExportConfig::default(),
12447            streaming: StreamingSchemaConfig::default(),
12448            rate_limit: RateLimitSchemaConfig::default(),
12449            temporal_attributes: TemporalAttributeSchemaConfig::default(),
12450            relationships: RelationshipSchemaConfig::default(),
12451            accounting_standards: AccountingStandardsConfig::default(),
12452            audit_standards: AuditStandardsConfig::default(),
12453            distributions: Default::default(),
12454            temporal_patterns: Default::default(),
12455            vendor_network: VendorNetworkSchemaConfig::default(),
12456            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
12457            relationship_strength: RelationshipStrengthSchemaConfig::default(),
12458            cross_process_links: CrossProcessLinksSchemaConfig::default(),
12459            organizational_events: OrganizationalEventsSchemaConfig::default(),
12460            behavioral_drift: BehavioralDriftSchemaConfig::default(),
12461            market_drift: MarketDriftSchemaConfig::default(),
12462            drift_labeling: DriftLabelingSchemaConfig::default(),
12463            anomaly_injection: Default::default(),
12464            industry_specific: Default::default(),
12465            fingerprint_privacy: Default::default(),
12466            quality_gates: Default::default(),
12467            compliance: Default::default(),
12468            webhooks: Default::default(),
12469            llm: Default::default(),
12470            diffusion: Default::default(),
12471            causal: Default::default(),
12472            source_to_pay: Default::default(),
12473            financial_reporting: Default::default(),
12474            hr: Default::default(),
12475            manufacturing: Default::default(),
12476            sales_quotes: Default::default(),
12477            tax: Default::default(),
12478            treasury: Default::default(),
12479            project_accounting: Default::default(),
12480            esg: Default::default(),
12481            country_packs: None,
12482            scenarios: Default::default(),
12483            session: Default::default(),
12484            compliance_regulations: Default::default(),
12485        }
12486    }
12487
12488    #[test]
12489    fn test_enhanced_orchestrator_creation() {
12490        let config = create_test_config();
12491        let orchestrator = EnhancedOrchestrator::with_defaults(config);
12492        assert!(orchestrator.is_ok());
12493    }
12494
12495    #[test]
12496    fn test_minimal_generation() {
12497        let config = create_test_config();
12498        let phase_config = PhaseConfig {
12499            generate_master_data: false,
12500            generate_document_flows: false,
12501            generate_journal_entries: true,
12502            inject_anomalies: false,
12503            show_progress: false,
12504            ..Default::default()
12505        };
12506
12507        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12508        let result = orchestrator.generate();
12509
12510        assert!(result.is_ok());
12511        let result = result.unwrap();
12512        assert!(!result.journal_entries.is_empty());
12513    }
12514
12515    #[test]
12516    fn test_master_data_generation() {
12517        let config = create_test_config();
12518        let phase_config = PhaseConfig {
12519            generate_master_data: true,
12520            generate_document_flows: false,
12521            generate_journal_entries: false,
12522            inject_anomalies: false,
12523            show_progress: false,
12524            vendors_per_company: 5,
12525            customers_per_company: 5,
12526            materials_per_company: 10,
12527            assets_per_company: 5,
12528            employees_per_company: 10,
12529            ..Default::default()
12530        };
12531
12532        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12533        let result = orchestrator.generate().unwrap();
12534
12535        assert!(!result.master_data.vendors.is_empty());
12536        assert!(!result.master_data.customers.is_empty());
12537        assert!(!result.master_data.materials.is_empty());
12538    }
12539
12540    #[test]
12541    fn test_document_flow_generation() {
12542        let config = create_test_config();
12543        let phase_config = PhaseConfig {
12544            generate_master_data: true,
12545            generate_document_flows: true,
12546            generate_journal_entries: false,
12547            inject_anomalies: false,
12548            inject_data_quality: false,
12549            validate_balances: false,
12550            generate_ocpm_events: false,
12551            show_progress: false,
12552            vendors_per_company: 5,
12553            customers_per_company: 5,
12554            materials_per_company: 10,
12555            assets_per_company: 5,
12556            employees_per_company: 10,
12557            p2p_chains: 5,
12558            o2c_chains: 5,
12559            ..Default::default()
12560        };
12561
12562        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12563        let result = orchestrator.generate().unwrap();
12564
12565        // Should have generated P2P and O2C chains
12566        assert!(!result.document_flows.p2p_chains.is_empty());
12567        assert!(!result.document_flows.o2c_chains.is_empty());
12568
12569        // Flattened documents should be populated
12570        assert!(!result.document_flows.purchase_orders.is_empty());
12571        assert!(!result.document_flows.sales_orders.is_empty());
12572    }
12573
12574    #[test]
12575    fn test_anomaly_injection() {
12576        let config = create_test_config();
12577        let phase_config = PhaseConfig {
12578            generate_master_data: false,
12579            generate_document_flows: false,
12580            generate_journal_entries: true,
12581            inject_anomalies: true,
12582            show_progress: false,
12583            ..Default::default()
12584        };
12585
12586        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12587        let result = orchestrator.generate().unwrap();
12588
12589        // Should have journal entries
12590        assert!(!result.journal_entries.is_empty());
12591
12592        // With ~833 entries and 2% rate, expect some anomalies
12593        // Note: This is probabilistic, so we just verify the structure exists
12594        assert!(result.anomaly_labels.summary.is_some());
12595    }
12596
12597    #[test]
12598    fn test_full_generation_pipeline() {
12599        let config = create_test_config();
12600        let phase_config = PhaseConfig {
12601            generate_master_data: true,
12602            generate_document_flows: true,
12603            generate_journal_entries: true,
12604            inject_anomalies: false,
12605            inject_data_quality: false,
12606            validate_balances: true,
12607            generate_ocpm_events: false,
12608            show_progress: false,
12609            vendors_per_company: 3,
12610            customers_per_company: 3,
12611            materials_per_company: 5,
12612            assets_per_company: 3,
12613            employees_per_company: 5,
12614            p2p_chains: 3,
12615            o2c_chains: 3,
12616            ..Default::default()
12617        };
12618
12619        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12620        let result = orchestrator.generate().unwrap();
12621
12622        // All phases should have results
12623        assert!(!result.master_data.vendors.is_empty());
12624        assert!(!result.master_data.customers.is_empty());
12625        assert!(!result.document_flows.p2p_chains.is_empty());
12626        assert!(!result.document_flows.o2c_chains.is_empty());
12627        assert!(!result.journal_entries.is_empty());
12628        assert!(result.statistics.accounts_count > 0);
12629
12630        // Subledger linking should have run
12631        assert!(!result.subledger.ap_invoices.is_empty());
12632        assert!(!result.subledger.ar_invoices.is_empty());
12633
12634        // Balance validation should have run
12635        assert!(result.balance_validation.validated);
12636        assert!(result.balance_validation.entries_processed > 0);
12637    }
12638
12639    #[test]
12640    fn test_subledger_linking() {
12641        let config = create_test_config();
12642        let phase_config = PhaseConfig {
12643            generate_master_data: true,
12644            generate_document_flows: true,
12645            generate_journal_entries: false,
12646            inject_anomalies: false,
12647            inject_data_quality: false,
12648            validate_balances: false,
12649            generate_ocpm_events: false,
12650            show_progress: false,
12651            vendors_per_company: 5,
12652            customers_per_company: 5,
12653            materials_per_company: 10,
12654            assets_per_company: 3,
12655            employees_per_company: 5,
12656            p2p_chains: 5,
12657            o2c_chains: 5,
12658            ..Default::default()
12659        };
12660
12661        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12662        let result = orchestrator.generate().unwrap();
12663
12664        // Should have document flows
12665        assert!(!result.document_flows.vendor_invoices.is_empty());
12666        assert!(!result.document_flows.customer_invoices.is_empty());
12667
12668        // Subledger should be linked from document flows
12669        assert!(!result.subledger.ap_invoices.is_empty());
12670        assert!(!result.subledger.ar_invoices.is_empty());
12671
12672        // AP invoices count should match vendor invoices count
12673        assert_eq!(
12674            result.subledger.ap_invoices.len(),
12675            result.document_flows.vendor_invoices.len()
12676        );
12677
12678        // AR invoices count should match customer invoices count
12679        assert_eq!(
12680            result.subledger.ar_invoices.len(),
12681            result.document_flows.customer_invoices.len()
12682        );
12683
12684        // Statistics should reflect subledger counts
12685        assert_eq!(
12686            result.statistics.ap_invoice_count,
12687            result.subledger.ap_invoices.len()
12688        );
12689        assert_eq!(
12690            result.statistics.ar_invoice_count,
12691            result.subledger.ar_invoices.len()
12692        );
12693    }
12694
12695    #[test]
12696    fn test_balance_validation() {
12697        let config = create_test_config();
12698        let phase_config = PhaseConfig {
12699            generate_master_data: false,
12700            generate_document_flows: false,
12701            generate_journal_entries: true,
12702            inject_anomalies: false,
12703            validate_balances: true,
12704            show_progress: false,
12705            ..Default::default()
12706        };
12707
12708        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12709        let result = orchestrator.generate().unwrap();
12710
12711        // Balance validation should run
12712        assert!(result.balance_validation.validated);
12713        assert!(result.balance_validation.entries_processed > 0);
12714
12715        // Generated JEs should be balanced (no unbalanced entries)
12716        assert!(!result.balance_validation.has_unbalanced_entries);
12717
12718        // Total debits should equal total credits
12719        assert_eq!(
12720            result.balance_validation.total_debits,
12721            result.balance_validation.total_credits
12722        );
12723    }
12724
12725    #[test]
12726    fn test_statistics_accuracy() {
12727        let config = create_test_config();
12728        let phase_config = PhaseConfig {
12729            generate_master_data: true,
12730            generate_document_flows: false,
12731            generate_journal_entries: true,
12732            inject_anomalies: false,
12733            show_progress: false,
12734            vendors_per_company: 10,
12735            customers_per_company: 20,
12736            materials_per_company: 15,
12737            assets_per_company: 5,
12738            employees_per_company: 8,
12739            ..Default::default()
12740        };
12741
12742        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12743        let result = orchestrator.generate().unwrap();
12744
12745        // Statistics should match actual data
12746        assert_eq!(
12747            result.statistics.vendor_count,
12748            result.master_data.vendors.len()
12749        );
12750        assert_eq!(
12751            result.statistics.customer_count,
12752            result.master_data.customers.len()
12753        );
12754        assert_eq!(
12755            result.statistics.material_count,
12756            result.master_data.materials.len()
12757        );
12758        assert_eq!(
12759            result.statistics.total_entries as usize,
12760            result.journal_entries.len()
12761        );
12762    }
12763
12764    #[test]
12765    fn test_phase_config_defaults() {
12766        let config = PhaseConfig::default();
12767        assert!(config.generate_master_data);
12768        assert!(config.generate_document_flows);
12769        assert!(config.generate_journal_entries);
12770        assert!(!config.inject_anomalies);
12771        assert!(config.validate_balances);
12772        assert!(config.show_progress);
12773        assert!(config.vendors_per_company > 0);
12774        assert!(config.customers_per_company > 0);
12775    }
12776
12777    #[test]
12778    fn test_get_coa_before_generation() {
12779        let config = create_test_config();
12780        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
12781
12782        // Before generation, CoA should be None
12783        assert!(orchestrator.get_coa().is_none());
12784    }
12785
12786    #[test]
12787    fn test_get_coa_after_generation() {
12788        let config = create_test_config();
12789        let phase_config = PhaseConfig {
12790            generate_master_data: false,
12791            generate_document_flows: false,
12792            generate_journal_entries: true,
12793            inject_anomalies: false,
12794            show_progress: false,
12795            ..Default::default()
12796        };
12797
12798        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12799        let _ = orchestrator.generate().unwrap();
12800
12801        // After generation, CoA should be available
12802        assert!(orchestrator.get_coa().is_some());
12803    }
12804
12805    #[test]
12806    fn test_get_master_data() {
12807        let config = create_test_config();
12808        let phase_config = PhaseConfig {
12809            generate_master_data: true,
12810            generate_document_flows: false,
12811            generate_journal_entries: false,
12812            inject_anomalies: false,
12813            show_progress: false,
12814            vendors_per_company: 5,
12815            customers_per_company: 5,
12816            materials_per_company: 5,
12817            assets_per_company: 5,
12818            employees_per_company: 5,
12819            ..Default::default()
12820        };
12821
12822        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12823        let result = orchestrator.generate().unwrap();
12824
12825        // After generate(), master_data is moved into the result
12826        assert!(!result.master_data.vendors.is_empty());
12827    }
12828
12829    #[test]
12830    fn test_with_progress_builder() {
12831        let config = create_test_config();
12832        let orchestrator = EnhancedOrchestrator::with_defaults(config)
12833            .unwrap()
12834            .with_progress(false);
12835
12836        // Should still work without progress
12837        assert!(!orchestrator.phase_config.show_progress);
12838    }
12839
12840    #[test]
12841    fn test_multi_company_generation() {
12842        let mut config = create_test_config();
12843        config.companies.push(CompanyConfig {
12844            code: "2000".to_string(),
12845            name: "Subsidiary".to_string(),
12846            currency: "EUR".to_string(),
12847            functional_currency: None,
12848            country: "DE".to_string(),
12849            annual_transaction_volume: TransactionVolume::TenK,
12850            volume_weight: 0.5,
12851            fiscal_year_variant: "K4".to_string(),
12852        });
12853
12854        let phase_config = PhaseConfig {
12855            generate_master_data: true,
12856            generate_document_flows: false,
12857            generate_journal_entries: true,
12858            inject_anomalies: false,
12859            show_progress: false,
12860            vendors_per_company: 5,
12861            customers_per_company: 5,
12862            materials_per_company: 5,
12863            assets_per_company: 5,
12864            employees_per_company: 5,
12865            ..Default::default()
12866        };
12867
12868        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12869        let result = orchestrator.generate().unwrap();
12870
12871        // Should have master data for both companies
12872        assert!(result.statistics.vendor_count >= 10); // 5 per company
12873        assert!(result.statistics.customer_count >= 10);
12874        assert!(result.statistics.companies_count == 2);
12875    }
12876
12877    #[test]
12878    fn test_empty_master_data_skips_document_flows() {
12879        let config = create_test_config();
12880        let phase_config = PhaseConfig {
12881            generate_master_data: false,   // Skip master data
12882            generate_document_flows: true, // Try to generate flows
12883            generate_journal_entries: false,
12884            inject_anomalies: false,
12885            show_progress: false,
12886            ..Default::default()
12887        };
12888
12889        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12890        let result = orchestrator.generate().unwrap();
12891
12892        // Without master data, document flows should be empty
12893        assert!(result.document_flows.p2p_chains.is_empty());
12894        assert!(result.document_flows.o2c_chains.is_empty());
12895    }
12896
12897    #[test]
12898    fn test_journal_entry_line_item_count() {
12899        let config = create_test_config();
12900        let phase_config = PhaseConfig {
12901            generate_master_data: false,
12902            generate_document_flows: false,
12903            generate_journal_entries: true,
12904            inject_anomalies: false,
12905            show_progress: false,
12906            ..Default::default()
12907        };
12908
12909        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12910        let result = orchestrator.generate().unwrap();
12911
12912        // Total line items should match sum of all entry line counts
12913        let calculated_line_items: u64 = result
12914            .journal_entries
12915            .iter()
12916            .map(|e| e.line_count() as u64)
12917            .sum();
12918        assert_eq!(result.statistics.total_line_items, calculated_line_items);
12919    }
12920
12921    #[test]
12922    fn test_audit_generation() {
12923        let config = create_test_config();
12924        let phase_config = PhaseConfig {
12925            generate_master_data: false,
12926            generate_document_flows: false,
12927            generate_journal_entries: true,
12928            inject_anomalies: false,
12929            show_progress: false,
12930            generate_audit: true,
12931            audit_engagements: 2,
12932            workpapers_per_engagement: 5,
12933            evidence_per_workpaper: 2,
12934            risks_per_engagement: 3,
12935            findings_per_engagement: 2,
12936            judgments_per_engagement: 2,
12937            ..Default::default()
12938        };
12939
12940        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12941        let result = orchestrator.generate().unwrap();
12942
12943        // Should have generated audit data
12944        assert_eq!(result.audit.engagements.len(), 2);
12945        assert!(!result.audit.workpapers.is_empty());
12946        assert!(!result.audit.evidence.is_empty());
12947        assert!(!result.audit.risk_assessments.is_empty());
12948        assert!(!result.audit.findings.is_empty());
12949        assert!(!result.audit.judgments.is_empty());
12950
12951        // New ISA entity collections should also be populated
12952        assert!(
12953            !result.audit.confirmations.is_empty(),
12954            "ISA 505 confirmations should be generated"
12955        );
12956        assert!(
12957            !result.audit.confirmation_responses.is_empty(),
12958            "ISA 505 confirmation responses should be generated"
12959        );
12960        assert!(
12961            !result.audit.procedure_steps.is_empty(),
12962            "ISA 330 procedure steps should be generated"
12963        );
12964        // Samples may or may not be generated depending on workpaper sampling methods
12965        assert!(
12966            !result.audit.analytical_results.is_empty(),
12967            "ISA 520 analytical procedures should be generated"
12968        );
12969        assert!(
12970            !result.audit.ia_functions.is_empty(),
12971            "ISA 610 IA functions should be generated (one per engagement)"
12972        );
12973        assert!(
12974            !result.audit.related_parties.is_empty(),
12975            "ISA 550 related parties should be generated"
12976        );
12977
12978        // Statistics should match
12979        assert_eq!(
12980            result.statistics.audit_engagement_count,
12981            result.audit.engagements.len()
12982        );
12983        assert_eq!(
12984            result.statistics.audit_workpaper_count,
12985            result.audit.workpapers.len()
12986        );
12987        assert_eq!(
12988            result.statistics.audit_evidence_count,
12989            result.audit.evidence.len()
12990        );
12991        assert_eq!(
12992            result.statistics.audit_risk_count,
12993            result.audit.risk_assessments.len()
12994        );
12995        assert_eq!(
12996            result.statistics.audit_finding_count,
12997            result.audit.findings.len()
12998        );
12999        assert_eq!(
13000            result.statistics.audit_judgment_count,
13001            result.audit.judgments.len()
13002        );
13003        assert_eq!(
13004            result.statistics.audit_confirmation_count,
13005            result.audit.confirmations.len()
13006        );
13007        assert_eq!(
13008            result.statistics.audit_confirmation_response_count,
13009            result.audit.confirmation_responses.len()
13010        );
13011        assert_eq!(
13012            result.statistics.audit_procedure_step_count,
13013            result.audit.procedure_steps.len()
13014        );
13015        assert_eq!(
13016            result.statistics.audit_sample_count,
13017            result.audit.samples.len()
13018        );
13019        assert_eq!(
13020            result.statistics.audit_analytical_result_count,
13021            result.audit.analytical_results.len()
13022        );
13023        assert_eq!(
13024            result.statistics.audit_ia_function_count,
13025            result.audit.ia_functions.len()
13026        );
13027        assert_eq!(
13028            result.statistics.audit_ia_report_count,
13029            result.audit.ia_reports.len()
13030        );
13031        assert_eq!(
13032            result.statistics.audit_related_party_count,
13033            result.audit.related_parties.len()
13034        );
13035        assert_eq!(
13036            result.statistics.audit_related_party_transaction_count,
13037            result.audit.related_party_transactions.len()
13038        );
13039    }
13040
13041    #[test]
13042    fn test_new_phases_disabled_by_default() {
13043        let config = create_test_config();
13044        // Verify new config fields default to disabled
13045        assert!(!config.llm.enabled);
13046        assert!(!config.diffusion.enabled);
13047        assert!(!config.causal.enabled);
13048
13049        let phase_config = PhaseConfig {
13050            generate_master_data: false,
13051            generate_document_flows: false,
13052            generate_journal_entries: true,
13053            inject_anomalies: false,
13054            show_progress: false,
13055            ..Default::default()
13056        };
13057
13058        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13059        let result = orchestrator.generate().unwrap();
13060
13061        // All new phase statistics should be zero when disabled
13062        assert_eq!(result.statistics.llm_enrichment_ms, 0);
13063        assert_eq!(result.statistics.llm_vendors_enriched, 0);
13064        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
13065        assert_eq!(result.statistics.diffusion_samples_generated, 0);
13066        assert_eq!(result.statistics.causal_generation_ms, 0);
13067        assert_eq!(result.statistics.causal_samples_generated, 0);
13068        assert!(result.statistics.causal_validation_passed.is_none());
13069        assert_eq!(result.statistics.counterfactual_pair_count, 0);
13070        assert!(result.counterfactual_pairs.is_empty());
13071    }
13072
13073    #[test]
13074    fn test_counterfactual_generation_enabled() {
13075        let config = create_test_config();
13076        let phase_config = PhaseConfig {
13077            generate_master_data: false,
13078            generate_document_flows: false,
13079            generate_journal_entries: true,
13080            inject_anomalies: false,
13081            show_progress: false,
13082            generate_counterfactuals: true,
13083            generate_period_close: false, // Disable so entry count matches counterfactual pairs
13084            ..Default::default()
13085        };
13086
13087        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13088        let result = orchestrator.generate().unwrap();
13089
13090        // With JE generation enabled, counterfactual pairs should be generated
13091        if !result.journal_entries.is_empty() {
13092            assert_eq!(
13093                result.counterfactual_pairs.len(),
13094                result.journal_entries.len()
13095            );
13096            assert_eq!(
13097                result.statistics.counterfactual_pair_count,
13098                result.journal_entries.len()
13099            );
13100            // Each pair should have a distinct pair_id
13101            let ids: std::collections::HashSet<_> = result
13102                .counterfactual_pairs
13103                .iter()
13104                .map(|p| p.pair_id.clone())
13105                .collect();
13106            assert_eq!(ids.len(), result.counterfactual_pairs.len());
13107        }
13108    }
13109
13110    #[test]
13111    fn test_llm_enrichment_enabled() {
13112        let mut config = create_test_config();
13113        config.llm.enabled = true;
13114        config.llm.max_vendor_enrichments = 3;
13115
13116        let phase_config = PhaseConfig {
13117            generate_master_data: true,
13118            generate_document_flows: false,
13119            generate_journal_entries: false,
13120            inject_anomalies: false,
13121            show_progress: false,
13122            vendors_per_company: 5,
13123            customers_per_company: 3,
13124            materials_per_company: 3,
13125            assets_per_company: 3,
13126            employees_per_company: 3,
13127            ..Default::default()
13128        };
13129
13130        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13131        let result = orchestrator.generate().unwrap();
13132
13133        // LLM enrichment should have run
13134        assert!(result.statistics.llm_vendors_enriched > 0);
13135        assert!(result.statistics.llm_vendors_enriched <= 3);
13136    }
13137
13138    #[test]
13139    fn test_diffusion_enhancement_enabled() {
13140        let mut config = create_test_config();
13141        config.diffusion.enabled = true;
13142        config.diffusion.n_steps = 50;
13143        config.diffusion.sample_size = 20;
13144
13145        let phase_config = PhaseConfig {
13146            generate_master_data: false,
13147            generate_document_flows: false,
13148            generate_journal_entries: true,
13149            inject_anomalies: false,
13150            show_progress: false,
13151            ..Default::default()
13152        };
13153
13154        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13155        let result = orchestrator.generate().unwrap();
13156
13157        // Diffusion phase should have generated samples
13158        assert_eq!(result.statistics.diffusion_samples_generated, 20);
13159    }
13160
13161    #[test]
13162    fn test_causal_overlay_enabled() {
13163        let mut config = create_test_config();
13164        config.causal.enabled = true;
13165        config.causal.template = "fraud_detection".to_string();
13166        config.causal.sample_size = 100;
13167        config.causal.validate = true;
13168
13169        let phase_config = PhaseConfig {
13170            generate_master_data: false,
13171            generate_document_flows: false,
13172            generate_journal_entries: true,
13173            inject_anomalies: false,
13174            show_progress: false,
13175            ..Default::default()
13176        };
13177
13178        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13179        let result = orchestrator.generate().unwrap();
13180
13181        // Causal phase should have generated samples
13182        assert_eq!(result.statistics.causal_samples_generated, 100);
13183        // Validation should have run
13184        assert!(result.statistics.causal_validation_passed.is_some());
13185    }
13186
13187    #[test]
13188    fn test_causal_overlay_revenue_cycle_template() {
13189        let mut config = create_test_config();
13190        config.causal.enabled = true;
13191        config.causal.template = "revenue_cycle".to_string();
13192        config.causal.sample_size = 50;
13193        config.causal.validate = false;
13194
13195        let phase_config = PhaseConfig {
13196            generate_master_data: false,
13197            generate_document_flows: false,
13198            generate_journal_entries: true,
13199            inject_anomalies: false,
13200            show_progress: false,
13201            ..Default::default()
13202        };
13203
13204        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13205        let result = orchestrator.generate().unwrap();
13206
13207        // Causal phase should have generated samples
13208        assert_eq!(result.statistics.causal_samples_generated, 50);
13209        // Validation was disabled
13210        assert!(result.statistics.causal_validation_passed.is_none());
13211    }
13212
13213    #[test]
13214    fn test_all_new_phases_enabled_together() {
13215        let mut config = create_test_config();
13216        config.llm.enabled = true;
13217        config.llm.max_vendor_enrichments = 2;
13218        config.diffusion.enabled = true;
13219        config.diffusion.n_steps = 20;
13220        config.diffusion.sample_size = 10;
13221        config.causal.enabled = true;
13222        config.causal.sample_size = 50;
13223        config.causal.validate = true;
13224
13225        let phase_config = PhaseConfig {
13226            generate_master_data: true,
13227            generate_document_flows: false,
13228            generate_journal_entries: true,
13229            inject_anomalies: false,
13230            show_progress: false,
13231            vendors_per_company: 5,
13232            customers_per_company: 3,
13233            materials_per_company: 3,
13234            assets_per_company: 3,
13235            employees_per_company: 3,
13236            ..Default::default()
13237        };
13238
13239        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13240        let result = orchestrator.generate().unwrap();
13241
13242        // All three phases should have run
13243        assert!(result.statistics.llm_vendors_enriched > 0);
13244        assert_eq!(result.statistics.diffusion_samples_generated, 10);
13245        assert_eq!(result.statistics.causal_samples_generated, 50);
13246        assert!(result.statistics.causal_validation_passed.is_some());
13247    }
13248
13249    #[test]
13250    fn test_statistics_serialization_with_new_fields() {
13251        let stats = EnhancedGenerationStatistics {
13252            total_entries: 100,
13253            total_line_items: 500,
13254            llm_enrichment_ms: 42,
13255            llm_vendors_enriched: 10,
13256            diffusion_enhancement_ms: 100,
13257            diffusion_samples_generated: 50,
13258            causal_generation_ms: 200,
13259            causal_samples_generated: 100,
13260            causal_validation_passed: Some(true),
13261            ..Default::default()
13262        };
13263
13264        let json = serde_json::to_string(&stats).unwrap();
13265        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
13266
13267        assert_eq!(deserialized.llm_enrichment_ms, 42);
13268        assert_eq!(deserialized.llm_vendors_enriched, 10);
13269        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
13270        assert_eq!(deserialized.diffusion_samples_generated, 50);
13271        assert_eq!(deserialized.causal_generation_ms, 200);
13272        assert_eq!(deserialized.causal_samples_generated, 100);
13273        assert_eq!(deserialized.causal_validation_passed, Some(true));
13274    }
13275
13276    #[test]
13277    fn test_statistics_backward_compat_deserialization() {
13278        // Old JSON without the new fields should still deserialize
13279        let old_json = r#"{
13280            "total_entries": 100,
13281            "total_line_items": 500,
13282            "accounts_count": 50,
13283            "companies_count": 1,
13284            "period_months": 12,
13285            "vendor_count": 10,
13286            "customer_count": 20,
13287            "material_count": 15,
13288            "asset_count": 5,
13289            "employee_count": 8,
13290            "p2p_chain_count": 5,
13291            "o2c_chain_count": 5,
13292            "ap_invoice_count": 5,
13293            "ar_invoice_count": 5,
13294            "ocpm_event_count": 0,
13295            "ocpm_object_count": 0,
13296            "ocpm_case_count": 0,
13297            "audit_engagement_count": 0,
13298            "audit_workpaper_count": 0,
13299            "audit_evidence_count": 0,
13300            "audit_risk_count": 0,
13301            "audit_finding_count": 0,
13302            "audit_judgment_count": 0,
13303            "anomalies_injected": 0,
13304            "data_quality_issues": 0,
13305            "banking_customer_count": 0,
13306            "banking_account_count": 0,
13307            "banking_transaction_count": 0,
13308            "banking_suspicious_count": 0,
13309            "graph_export_count": 0,
13310            "graph_node_count": 0,
13311            "graph_edge_count": 0
13312        }"#;
13313
13314        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
13315
13316        // New fields should default to 0 / None
13317        assert_eq!(stats.llm_enrichment_ms, 0);
13318        assert_eq!(stats.llm_vendors_enriched, 0);
13319        assert_eq!(stats.diffusion_enhancement_ms, 0);
13320        assert_eq!(stats.diffusion_samples_generated, 0);
13321        assert_eq!(stats.causal_generation_ms, 0);
13322        assert_eq!(stats.causal_samples_generated, 0);
13323        assert!(stats.causal_validation_passed.is_none());
13324    }
13325}