Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    MaterialGenerator,
117    O2CDocumentChain,
118    O2CGenerator,
119    O2CGeneratorConfig,
120    O2CPaymentBehavior,
121    P2PDocumentChain,
122    // Document flow generators
123    P2PGenerator,
124    P2PGeneratorConfig,
125    P2PPaymentBehavior,
126    PaymentReference,
127    // Provisions and contingencies generator (IAS 37 / ASC 450)
128    ProvisionGenerator,
129    QualificationGenerator,
130    RfxGenerator,
131    RiskAssessmentGenerator,
132    // Balance validation
133    RunningBalanceTracker,
134    ScorecardGenerator,
135    // Segment reporting generator (IFRS 8 / ASC 280)
136    SegmentGenerator,
137    SegmentSeed,
138    SourcingProjectGenerator,
139    SpendAnalysisGenerator,
140    ValidationError,
141    // Master data generators
142    VendorGenerator,
143    WorkpaperGenerator,
144};
145use datasynth_graph::{
146    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
147    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
148    TransactionGraphConfig,
149};
150use datasynth_ocpm::{
151    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
152    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
153    OcpmUuidFactory, P2pDocuments, S2cDocuments,
154};
155
156use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
157use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
158use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
159use datasynth_core::llm::MockLlmProvider;
160use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
161use datasynth_core::models::documents::PaymentMethod;
162use datasynth_core::models::IndustrySector;
163use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
164use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
165use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
166use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
167use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
168use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
169use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
170use datasynth_generators::audit::sample_generator::SampleGenerator;
171use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
172use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
173use datasynth_generators::coa_generator::CoAFramework;
174use datasynth_generators::llm_enrichment::VendorLlmEnricher;
175use rayon::prelude::*;
176
177// ============================================================================
178// Configuration Conversion Functions
179// ============================================================================
180
181/// Convert P2P flow config from schema to generator config.
182fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
183    let payment_behavior = &schema_config.payment_behavior;
184    let late_dist = &payment_behavior.late_payment_days_distribution;
185
186    P2PGeneratorConfig {
187        three_way_match_rate: schema_config.three_way_match_rate,
188        partial_delivery_rate: schema_config.partial_delivery_rate,
189        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
190        price_variance_rate: schema_config.price_variance_rate,
191        max_price_variance_percent: schema_config.max_price_variance_percent,
192        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
193        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
194        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
195        payment_method_distribution: vec![
196            (PaymentMethod::BankTransfer, 0.60),
197            (PaymentMethod::Check, 0.25),
198            (PaymentMethod::Wire, 0.10),
199            (PaymentMethod::CreditCard, 0.05),
200        ],
201        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
202        payment_behavior: P2PPaymentBehavior {
203            late_payment_rate: payment_behavior.late_payment_rate,
204            late_payment_distribution: LatePaymentDistribution {
205                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
206                late_8_to_14: late_dist.late_8_to_14,
207                very_late_15_to_30: late_dist.very_late_15_to_30,
208                severely_late_31_to_60: late_dist.severely_late_31_to_60,
209                extremely_late_over_60: late_dist.extremely_late_over_60,
210            },
211            partial_payment_rate: payment_behavior.partial_payment_rate,
212            payment_correction_rate: payment_behavior.payment_correction_rate,
213            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
214        },
215    }
216}
217
218/// Convert O2C flow config from schema to generator config.
219fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
220    let payment_behavior = &schema_config.payment_behavior;
221
222    O2CGeneratorConfig {
223        credit_check_failure_rate: schema_config.credit_check_failure_rate,
224        partial_shipment_rate: schema_config.partial_shipment_rate,
225        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
226        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
227        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
228        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
229        bad_debt_rate: schema_config.bad_debt_rate,
230        returns_rate: schema_config.return_rate,
231        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
232        payment_method_distribution: vec![
233            (PaymentMethod::BankTransfer, 0.50),
234            (PaymentMethod::Check, 0.30),
235            (PaymentMethod::Wire, 0.15),
236            (PaymentMethod::CreditCard, 0.05),
237        ],
238        payment_behavior: O2CPaymentBehavior {
239            partial_payment_rate: payment_behavior.partial_payments.rate,
240            short_payment_rate: payment_behavior.short_payments.rate,
241            max_short_percent: payment_behavior.short_payments.max_short_percent,
242            on_account_rate: payment_behavior.on_account_payments.rate,
243            payment_correction_rate: payment_behavior.payment_corrections.rate,
244            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
245        },
246    }
247}
248
249/// Configuration for which generation phases to run.
250#[derive(Debug, Clone)]
251pub struct PhaseConfig {
252    /// Generate master data (vendors, customers, materials, assets, employees).
253    pub generate_master_data: bool,
254    /// Generate document flows (P2P, O2C).
255    pub generate_document_flows: bool,
256    /// Generate OCPM events from document flows.
257    pub generate_ocpm_events: bool,
258    /// Generate journal entries.
259    pub generate_journal_entries: bool,
260    /// Inject anomalies.
261    pub inject_anomalies: bool,
262    /// Inject data quality variations (typos, missing values, format variations).
263    pub inject_data_quality: bool,
264    /// Validate balance sheet equation after generation.
265    pub validate_balances: bool,
266    /// Show progress bars.
267    pub show_progress: bool,
268    /// Number of vendors to generate per company.
269    pub vendors_per_company: usize,
270    /// Number of customers to generate per company.
271    pub customers_per_company: usize,
272    /// Number of materials to generate per company.
273    pub materials_per_company: usize,
274    /// Number of assets to generate per company.
275    pub assets_per_company: usize,
276    /// Number of employees to generate per company.
277    pub employees_per_company: usize,
278    /// Number of P2P chains to generate.
279    pub p2p_chains: usize,
280    /// Number of O2C chains to generate.
281    pub o2c_chains: usize,
282    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
283    pub generate_audit: bool,
284    /// Number of audit engagements to generate.
285    pub audit_engagements: usize,
286    /// Number of workpapers per engagement.
287    pub workpapers_per_engagement: usize,
288    /// Number of evidence items per workpaper.
289    pub evidence_per_workpaper: usize,
290    /// Number of risk assessments per engagement.
291    pub risks_per_engagement: usize,
292    /// Number of findings per engagement.
293    pub findings_per_engagement: usize,
294    /// Number of professional judgments per engagement.
295    pub judgments_per_engagement: usize,
296    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
297    pub generate_banking: bool,
298    /// Generate graph exports (accounting network for ML training).
299    pub generate_graph_export: bool,
300    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
301    pub generate_sourcing: bool,
302    /// Generate bank reconciliations from payments.
303    pub generate_bank_reconciliation: bool,
304    /// Generate financial statements from trial balances.
305    pub generate_financial_statements: bool,
306    /// Generate accounting standards data (revenue recognition, impairment).
307    pub generate_accounting_standards: bool,
308    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
309    pub generate_manufacturing: bool,
310    /// Generate sales quotes, management KPIs, and budgets.
311    pub generate_sales_kpi_budgets: bool,
312    /// Generate tax jurisdictions and tax codes.
313    pub generate_tax: bool,
314    /// Generate ESG data (emissions, energy, water, waste, social, governance).
315    pub generate_esg: bool,
316    /// Generate intercompany transactions and eliminations.
317    pub generate_intercompany: bool,
318    /// Generate process evolution and organizational events.
319    pub generate_evolution_events: bool,
320    /// Generate counterfactual (original, mutated) JE pairs for ML training.
321    pub generate_counterfactuals: bool,
322    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
323    pub generate_compliance_regulations: bool,
324    /// Generate period-close journal entries (tax provision, income statement close).
325    pub generate_period_close: bool,
326    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
327    pub generate_hr: bool,
328    /// Generate treasury data (cash management, hedging, debt, pooling).
329    pub generate_treasury: bool,
330    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
331    pub generate_project_accounting: bool,
332}
333
334impl Default for PhaseConfig {
335    fn default() -> Self {
336        Self {
337            generate_master_data: true,
338            generate_document_flows: true,
339            generate_ocpm_events: false, // Off by default
340            generate_journal_entries: true,
341            inject_anomalies: false,
342            inject_data_quality: false, // Off by default (to preserve clean test data)
343            validate_balances: true,
344            show_progress: true,
345            vendors_per_company: 50,
346            customers_per_company: 100,
347            materials_per_company: 200,
348            assets_per_company: 50,
349            employees_per_company: 100,
350            p2p_chains: 100,
351            o2c_chains: 100,
352            generate_audit: false, // Off by default
353            audit_engagements: 5,
354            workpapers_per_engagement: 20,
355            evidence_per_workpaper: 5,
356            risks_per_engagement: 15,
357            findings_per_engagement: 8,
358            judgments_per_engagement: 10,
359            generate_banking: false,                // Off by default
360            generate_graph_export: false,           // Off by default
361            generate_sourcing: false,               // Off by default
362            generate_bank_reconciliation: false,    // Off by default
363            generate_financial_statements: false,   // Off by default
364            generate_accounting_standards: false,   // Off by default
365            generate_manufacturing: false,          // Off by default
366            generate_sales_kpi_budgets: false,      // Off by default
367            generate_tax: false,                    // Off by default
368            generate_esg: false,                    // Off by default
369            generate_intercompany: false,           // Off by default
370            generate_evolution_events: true,        // On by default
371            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
372            generate_compliance_regulations: false, // Off by default
373            generate_period_close: true,            // On by default
374            generate_hr: false,                     // Off by default
375            generate_treasury: false,               // Off by default
376            generate_project_accounting: false,     // Off by default
377        }
378    }
379}
380
381impl PhaseConfig {
382    /// Derive phase flags from [`GeneratorConfig`].
383    ///
384    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
385    /// CLI flags can override individual fields after calling this method.
386    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
387        Self {
388            // Always-on phases
389            generate_master_data: true,
390            generate_document_flows: true,
391            generate_journal_entries: true,
392            validate_balances: true,
393            generate_period_close: true,
394            generate_evolution_events: true,
395            show_progress: true,
396
397            // Feature-gated phases — derived from config sections
398            generate_audit: cfg.audit.enabled,
399            generate_banking: cfg.banking.enabled,
400            generate_graph_export: cfg.graph_export.enabled,
401            generate_sourcing: cfg.source_to_pay.enabled,
402            generate_intercompany: cfg.intercompany.enabled,
403            generate_financial_statements: cfg.financial_reporting.enabled,
404            generate_bank_reconciliation: cfg.financial_reporting.enabled,
405            generate_accounting_standards: cfg.accounting_standards.enabled,
406            generate_manufacturing: cfg.manufacturing.enabled,
407            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
408            generate_tax: cfg.tax.enabled,
409            generate_esg: cfg.esg.enabled,
410            generate_ocpm_events: cfg.ocpm.enabled,
411            generate_compliance_regulations: cfg.compliance_regulations.enabled,
412            generate_hr: cfg.hr.enabled,
413            generate_treasury: cfg.treasury.enabled,
414            generate_project_accounting: cfg.project_accounting.enabled,
415
416            // Explicit opt-in for ML workloads
417            generate_counterfactuals: false,
418
419            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
420            inject_data_quality: cfg.data_quality.enabled,
421
422            // Count defaults (CLI can override after calling this method)
423            vendors_per_company: 50,
424            customers_per_company: 100,
425            materials_per_company: 200,
426            assets_per_company: 50,
427            employees_per_company: 100,
428            p2p_chains: 100,
429            o2c_chains: 100,
430            audit_engagements: 5,
431            workpapers_per_engagement: 20,
432            evidence_per_workpaper: 5,
433            risks_per_engagement: 15,
434            findings_per_engagement: 8,
435            judgments_per_engagement: 10,
436        }
437    }
438}
439
440/// Master data snapshot containing all generated entities.
441#[derive(Debug, Clone, Default)]
442pub struct MasterDataSnapshot {
443    /// Generated vendors.
444    pub vendors: Vec<Vendor>,
445    /// Generated customers.
446    pub customers: Vec<Customer>,
447    /// Generated materials.
448    pub materials: Vec<Material>,
449    /// Generated fixed assets.
450    pub assets: Vec<FixedAsset>,
451    /// Generated employees.
452    pub employees: Vec<Employee>,
453}
454
455/// Info about a completed hypergraph export.
456#[derive(Debug, Clone)]
457pub struct HypergraphExportInfo {
458    /// Number of nodes exported.
459    pub node_count: usize,
460    /// Number of pairwise edges exported.
461    pub edge_count: usize,
462    /// Number of hyperedges exported.
463    pub hyperedge_count: usize,
464    /// Output directory path.
465    pub output_path: PathBuf,
466}
467
468/// Document flow snapshot containing all generated document chains.
469#[derive(Debug, Clone, Default)]
470pub struct DocumentFlowSnapshot {
471    /// P2P document chains.
472    pub p2p_chains: Vec<P2PDocumentChain>,
473    /// O2C document chains.
474    pub o2c_chains: Vec<O2CDocumentChain>,
475    /// All purchase orders (flattened).
476    pub purchase_orders: Vec<documents::PurchaseOrder>,
477    /// All goods receipts (flattened).
478    pub goods_receipts: Vec<documents::GoodsReceipt>,
479    /// All vendor invoices (flattened).
480    pub vendor_invoices: Vec<documents::VendorInvoice>,
481    /// All sales orders (flattened).
482    pub sales_orders: Vec<documents::SalesOrder>,
483    /// All deliveries (flattened).
484    pub deliveries: Vec<documents::Delivery>,
485    /// All customer invoices (flattened).
486    pub customer_invoices: Vec<documents::CustomerInvoice>,
487    /// All payments (flattened).
488    pub payments: Vec<documents::Payment>,
489}
490
491/// Subledger snapshot containing generated subledger records.
492#[derive(Debug, Clone, Default)]
493pub struct SubledgerSnapshot {
494    /// AP invoices linked from document flow vendor invoices.
495    pub ap_invoices: Vec<APInvoice>,
496    /// AR invoices linked from document flow customer invoices.
497    pub ar_invoices: Vec<ARInvoice>,
498    /// FA subledger records (asset acquisitions from FA generator).
499    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
500    /// Inventory positions from inventory generator.
501    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
502    /// Inventory movements from inventory generator.
503    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
504    /// AR aging reports, one per company, computed after payment settlement.
505    pub ar_aging_reports: Vec<ARAgingReport>,
506    /// AP aging reports, one per company, computed after payment settlement.
507    pub ap_aging_reports: Vec<APAgingReport>,
508    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
509    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
510    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
511    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
512}
513
514/// OCPM snapshot containing generated OCPM event log data.
515#[derive(Debug, Clone, Default)]
516pub struct OcpmSnapshot {
517    /// OCPM event log (if generated)
518    pub event_log: Option<OcpmEventLog>,
519    /// Number of events generated
520    pub event_count: usize,
521    /// Number of objects generated
522    pub object_count: usize,
523    /// Number of cases generated
524    pub case_count: usize,
525}
526
527/// Audit data snapshot containing all generated audit-related entities.
528#[derive(Debug, Clone, Default)]
529pub struct AuditSnapshot {
530    /// Audit engagements per ISA 210/220.
531    pub engagements: Vec<AuditEngagement>,
532    /// Workpapers per ISA 230.
533    pub workpapers: Vec<Workpaper>,
534    /// Audit evidence per ISA 500.
535    pub evidence: Vec<AuditEvidence>,
536    /// Risk assessments per ISA 315/330.
537    pub risk_assessments: Vec<RiskAssessment>,
538    /// Audit findings per ISA 265.
539    pub findings: Vec<AuditFinding>,
540    /// Professional judgments per ISA 200.
541    pub judgments: Vec<ProfessionalJudgment>,
542    /// External confirmations per ISA 505.
543    pub confirmations: Vec<ExternalConfirmation>,
544    /// Confirmation responses per ISA 505.
545    pub confirmation_responses: Vec<ConfirmationResponse>,
546    /// Audit procedure steps per ISA 330/530.
547    pub procedure_steps: Vec<AuditProcedureStep>,
548    /// Audit samples per ISA 530.
549    pub samples: Vec<AuditSample>,
550    /// Analytical procedure results per ISA 520.
551    pub analytical_results: Vec<AnalyticalProcedureResult>,
552    /// Internal audit functions per ISA 610.
553    pub ia_functions: Vec<InternalAuditFunction>,
554    /// Internal audit reports per ISA 610.
555    pub ia_reports: Vec<InternalAuditReport>,
556    /// Related parties per ISA 550.
557    pub related_parties: Vec<RelatedParty>,
558    /// Related party transactions per ISA 550.
559    pub related_party_transactions: Vec<RelatedPartyTransaction>,
560    // ---- ISA 600: Group Audits ----
561    /// Component auditors assigned by jurisdiction (ISA 600).
562    pub component_auditors: Vec<ComponentAuditor>,
563    /// Group audit plan with materiality allocations (ISA 600).
564    pub group_audit_plan: Option<GroupAuditPlan>,
565    /// Component instructions issued to component auditors (ISA 600).
566    pub component_instructions: Vec<ComponentInstruction>,
567    /// Reports received from component auditors (ISA 600).
568    pub component_reports: Vec<ComponentAuditorReport>,
569    // ---- ISA 210: Engagement Letters ----
570    /// Engagement letters per ISA 210.
571    pub engagement_letters: Vec<EngagementLetter>,
572    // ---- ISA 560 / IAS 10: Subsequent Events ----
573    /// Subsequent events per ISA 560 / IAS 10.
574    pub subsequent_events: Vec<SubsequentEvent>,
575    // ---- ISA 402: Service Organization Controls ----
576    /// Service organizations identified per ISA 402.
577    pub service_organizations: Vec<ServiceOrganization>,
578    /// SOC reports obtained per ISA 402.
579    pub soc_reports: Vec<SocReport>,
580    /// User entity controls documented per ISA 402.
581    pub user_entity_controls: Vec<UserEntityControl>,
582    // ---- ISA 570: Going Concern ----
583    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
584    pub going_concern_assessments:
585        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
586    // ---- ISA 540: Accounting Estimates ----
587    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
588    pub accounting_estimates:
589        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
590    // ---- ISA 700/701/705/706: Audit Opinions ----
591    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
592    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
593    /// Key Audit Matters per ISA 701 (flattened across all opinions).
594    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
595    // ---- SOX 302 / 404 ----
596    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
597    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
598    /// SOX Section 404 ICFR assessments (one per entity per year).
599    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
600    // ---- ISA 320: Materiality ----
601    /// Materiality calculations per entity per period (ISA 320).
602    pub materiality_calculations:
603        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
604    // ---- ISA 315: Combined Risk Assessments ----
605    /// Combined Risk Assessments per account area / assertion (ISA 315).
606    pub combined_risk_assessments:
607        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
608    // ---- ISA 530: Sampling Plans ----
609    /// Sampling plans per CRA at Moderate or higher (ISA 530).
610    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
611    /// Individual sampled items (key items + representative items) per ISA 530.
612    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
613    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
614    /// Significant classes of transactions per ISA 315 (one set per entity).
615    pub significant_transaction_classes:
616        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
617    // ---- ISA 520: Unusual Item Markers ----
618    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
619    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
620    // ---- ISA 520: Analytical Relationships ----
621    /// Analytical relationships (ratios, trends, correlations) per entity.
622    pub analytical_relationships:
623        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
624}
625
626/// Banking KYC/AML data snapshot containing all generated banking entities.
627#[derive(Debug, Clone, Default)]
628pub struct BankingSnapshot {
629    /// Banking customers (retail, business, trust).
630    pub customers: Vec<BankingCustomer>,
631    /// Bank accounts.
632    pub accounts: Vec<BankAccount>,
633    /// Bank transactions with AML labels.
634    pub transactions: Vec<BankTransaction>,
635    /// Transaction-level AML labels with features.
636    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
637    /// Customer-level AML labels.
638    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
639    /// Account-level AML labels.
640    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
641    /// Relationship-level AML labels.
642    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
643    /// Case narratives for AML scenarios.
644    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
645    /// Number of suspicious transactions.
646    pub suspicious_count: usize,
647    /// Number of AML scenarios generated.
648    pub scenario_count: usize,
649}
650
651/// Graph export snapshot containing exported graph metadata.
652#[derive(Debug, Clone, Default, Serialize)]
653pub struct GraphExportSnapshot {
654    /// Whether graph export was performed.
655    pub exported: bool,
656    /// Number of graphs exported.
657    pub graph_count: usize,
658    /// Exported graph metadata (by format name).
659    pub exports: HashMap<String, GraphExportInfo>,
660}
661
662/// Information about an exported graph.
663#[derive(Debug, Clone, Serialize)]
664pub struct GraphExportInfo {
665    /// Graph name.
666    pub name: String,
667    /// Export format (pytorch_geometric, neo4j, dgl).
668    pub format: String,
669    /// Output directory path.
670    pub output_path: PathBuf,
671    /// Number of nodes.
672    pub node_count: usize,
673    /// Number of edges.
674    pub edge_count: usize,
675}
676
677/// S2C sourcing data snapshot.
678#[derive(Debug, Clone, Default)]
679pub struct SourcingSnapshot {
680    /// Spend analyses.
681    pub spend_analyses: Vec<SpendAnalysis>,
682    /// Sourcing projects.
683    pub sourcing_projects: Vec<SourcingProject>,
684    /// Supplier qualifications.
685    pub qualifications: Vec<SupplierQualification>,
686    /// RFx events (RFI, RFP, RFQ).
687    pub rfx_events: Vec<RfxEvent>,
688    /// Supplier bids.
689    pub bids: Vec<SupplierBid>,
690    /// Bid evaluations.
691    pub bid_evaluations: Vec<BidEvaluation>,
692    /// Procurement contracts.
693    pub contracts: Vec<ProcurementContract>,
694    /// Catalog items.
695    pub catalog_items: Vec<CatalogItem>,
696    /// Supplier scorecards.
697    pub scorecards: Vec<SupplierScorecard>,
698}
699
700/// A single period's trial balance with metadata.
701#[derive(Debug, Clone, Serialize, Deserialize)]
702pub struct PeriodTrialBalance {
703    /// Fiscal year.
704    pub fiscal_year: u16,
705    /// Fiscal period (1-12).
706    pub fiscal_period: u8,
707    /// Period start date.
708    pub period_start: NaiveDate,
709    /// Period end date.
710    pub period_end: NaiveDate,
711    /// Trial balance entries for this period.
712    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
713}
714
715/// Financial reporting snapshot (financial statements + bank reconciliations).
716#[derive(Debug, Clone, Default)]
717pub struct FinancialReportingSnapshot {
718    /// Financial statements (balance sheet, income statement, cash flow).
719    /// For multi-entity configs this includes all standalone statements.
720    pub financial_statements: Vec<FinancialStatement>,
721    /// Standalone financial statements keyed by entity code.
722    /// Each entity has its own slice of statements.
723    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
724    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
725    pub consolidated_statements: Vec<FinancialStatement>,
726    /// Consolidation schedules (one per period) showing pre/post elimination detail.
727    pub consolidation_schedules: Vec<ConsolidationSchedule>,
728    /// Bank reconciliations.
729    pub bank_reconciliations: Vec<BankReconciliation>,
730    /// Period-close trial balances (one per period).
731    pub trial_balances: Vec<PeriodTrialBalance>,
732    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
733    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
734    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
735    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
736    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
737    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
738}
739
740/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
741#[derive(Debug, Clone, Default)]
742pub struct HrSnapshot {
743    /// Payroll runs (actual data).
744    pub payroll_runs: Vec<PayrollRun>,
745    /// Payroll line items (actual data).
746    pub payroll_line_items: Vec<PayrollLineItem>,
747    /// Time entries (actual data).
748    pub time_entries: Vec<TimeEntry>,
749    /// Expense reports (actual data).
750    pub expense_reports: Vec<ExpenseReport>,
751    /// Benefit enrollments (actual data).
752    pub benefit_enrollments: Vec<BenefitEnrollment>,
753    /// Defined benefit pension plans (IAS 19 / ASC 715).
754    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
755    /// Pension obligation (DBO) roll-forwards.
756    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
757    /// Plan asset roll-forwards.
758    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
759    /// Pension disclosures.
760    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
761    /// Journal entries generated from pension expense and OCI remeasurements.
762    pub pension_journal_entries: Vec<JournalEntry>,
763    /// Stock grants (ASC 718 / IFRS 2).
764    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
765    /// Stock-based compensation period expense records.
766    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
767    /// Journal entries generated from stock-based compensation expense.
768    pub stock_comp_journal_entries: Vec<JournalEntry>,
769    /// Payroll runs.
770    pub payroll_run_count: usize,
771    /// Payroll line item count.
772    pub payroll_line_item_count: usize,
773    /// Time entry count.
774    pub time_entry_count: usize,
775    /// Expense report count.
776    pub expense_report_count: usize,
777    /// Benefit enrollment count.
778    pub benefit_enrollment_count: usize,
779    /// Pension plan count.
780    pub pension_plan_count: usize,
781    /// Stock grant count.
782    pub stock_grant_count: usize,
783}
784
785/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
786#[derive(Debug, Clone, Default)]
787pub struct AccountingStandardsSnapshot {
788    /// Revenue recognition contracts (actual data).
789    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
790    /// Impairment tests (actual data).
791    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
792    /// Business combinations (IFRS 3 / ASC 805).
793    pub business_combinations:
794        Vec<datasynth_core::models::business_combination::BusinessCombination>,
795    /// Journal entries generated from business combinations (Day 1 + amortization).
796    pub business_combination_journal_entries: Vec<JournalEntry>,
797    /// ECL models (IFRS 9 / ASC 326).
798    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
799    /// ECL provision movements.
800    pub ecl_provision_movements:
801        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
802    /// Journal entries from ECL provision.
803    pub ecl_journal_entries: Vec<JournalEntry>,
804    /// Provisions (IAS 37 / ASC 450).
805    pub provisions: Vec<datasynth_core::models::provision::Provision>,
806    /// Provision movement roll-forwards (IAS 37 / ASC 450).
807    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
808    /// Contingent liabilities (IAS 37 / ASC 450).
809    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
810    /// Journal entries from provisions.
811    pub provision_journal_entries: Vec<JournalEntry>,
812    /// IAS 21 functional currency translation results (one per entity per period).
813    pub currency_translation_results:
814        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
815    /// Revenue recognition contract count.
816    pub revenue_contract_count: usize,
817    /// Impairment test count.
818    pub impairment_test_count: usize,
819    /// Business combination count.
820    pub business_combination_count: usize,
821    /// ECL model count.
822    pub ecl_model_count: usize,
823    /// Provision count.
824    pub provision_count: usize,
825    /// Currency translation result count (IAS 21).
826    pub currency_translation_count: usize,
827}
828
829/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
830#[derive(Debug, Clone, Default)]
831pub struct ComplianceRegulationsSnapshot {
832    /// Flattened standard records for output.
833    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
834    /// Cross-reference records.
835    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
836    /// Jurisdiction profile records.
837    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
838    /// Generated audit procedures.
839    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
840    /// Generated compliance findings.
841    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
842    /// Generated regulatory filings.
843    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
844    /// Compliance graph (if graph integration enabled).
845    pub compliance_graph: Option<datasynth_graph::Graph>,
846}
847
848/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
849#[derive(Debug, Clone, Default)]
850pub struct ManufacturingSnapshot {
851    /// Production orders (actual data).
852    pub production_orders: Vec<ProductionOrder>,
853    /// Quality inspections (actual data).
854    pub quality_inspections: Vec<QualityInspection>,
855    /// Cycle counts (actual data).
856    pub cycle_counts: Vec<CycleCount>,
857    /// BOM components (actual data).
858    pub bom_components: Vec<BomComponent>,
859    /// Inventory movements (actual data).
860    pub inventory_movements: Vec<InventoryMovement>,
861    /// Production order count.
862    pub production_order_count: usize,
863    /// Quality inspection count.
864    pub quality_inspection_count: usize,
865    /// Cycle count count.
866    pub cycle_count_count: usize,
867    /// BOM component count.
868    pub bom_component_count: usize,
869    /// Inventory movement count.
870    pub inventory_movement_count: usize,
871}
872
873/// Sales, KPI, and budget data snapshot.
874#[derive(Debug, Clone, Default)]
875pub struct SalesKpiBudgetsSnapshot {
876    /// Sales quotes (actual data).
877    pub sales_quotes: Vec<SalesQuote>,
878    /// Management KPIs (actual data).
879    pub kpis: Vec<ManagementKpi>,
880    /// Budgets (actual data).
881    pub budgets: Vec<Budget>,
882    /// Sales quote count.
883    pub sales_quote_count: usize,
884    /// Management KPI count.
885    pub kpi_count: usize,
886    /// Budget line count.
887    pub budget_line_count: usize,
888}
889
890/// Anomaly labels generated during injection.
891#[derive(Debug, Clone, Default)]
892pub struct AnomalyLabels {
893    /// All anomaly labels.
894    pub labels: Vec<LabeledAnomaly>,
895    /// Summary statistics.
896    pub summary: Option<AnomalySummary>,
897    /// Count by anomaly type.
898    pub by_type: HashMap<String, usize>,
899}
900
901/// Balance validation results from running balance tracker.
902#[derive(Debug, Clone, Default)]
903pub struct BalanceValidationResult {
904    /// Whether validation was performed.
905    pub validated: bool,
906    /// Whether balance sheet equation is satisfied.
907    pub is_balanced: bool,
908    /// Number of entries processed.
909    pub entries_processed: u64,
910    /// Total debits across all entries.
911    pub total_debits: rust_decimal::Decimal,
912    /// Total credits across all entries.
913    pub total_credits: rust_decimal::Decimal,
914    /// Number of accounts tracked.
915    pub accounts_tracked: usize,
916    /// Number of companies tracked.
917    pub companies_tracked: usize,
918    /// Validation errors encountered.
919    pub validation_errors: Vec<ValidationError>,
920    /// Whether any unbalanced entries were found.
921    pub has_unbalanced_entries: bool,
922}
923
924/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
925#[derive(Debug, Clone, Default)]
926pub struct TaxSnapshot {
927    /// Tax jurisdictions.
928    pub jurisdictions: Vec<TaxJurisdiction>,
929    /// Tax codes.
930    pub codes: Vec<TaxCode>,
931    /// Tax lines computed on documents.
932    pub tax_lines: Vec<TaxLine>,
933    /// Tax returns filed per period.
934    pub tax_returns: Vec<TaxReturn>,
935    /// Tax provisions.
936    pub tax_provisions: Vec<TaxProvision>,
937    /// Withholding tax records.
938    pub withholding_records: Vec<WithholdingTaxRecord>,
939    /// Tax anomaly labels.
940    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
941    /// Jurisdiction count.
942    pub jurisdiction_count: usize,
943    /// Code count.
944    pub code_count: usize,
945    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
946    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
947}
948
949/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
950#[derive(Debug, Clone, Default, Serialize, Deserialize)]
951pub struct IntercompanySnapshot {
952    /// Group ownership structure (parent/subsidiary/associate relationships).
953    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
954    /// IC matched pairs (transaction pairs between related entities).
955    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
956    /// IC journal entries generated from matched pairs (seller side).
957    pub seller_journal_entries: Vec<JournalEntry>,
958    /// IC journal entries generated from matched pairs (buyer side).
959    pub buyer_journal_entries: Vec<JournalEntry>,
960    /// Elimination entries for consolidation.
961    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
962    /// NCI measurements derived from group structure ownership percentages.
963    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
964    /// IC matched pair count.
965    pub matched_pair_count: usize,
966    /// IC elimination entry count.
967    pub elimination_entry_count: usize,
968    /// IC matching rate (0.0 to 1.0).
969    pub match_rate: f64,
970}
971
972/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
973#[derive(Debug, Clone, Default)]
974pub struct EsgSnapshot {
975    /// Emission records (scope 1, 2, 3).
976    pub emissions: Vec<EmissionRecord>,
977    /// Energy consumption records.
978    pub energy: Vec<EnergyConsumption>,
979    /// Water usage records.
980    pub water: Vec<WaterUsage>,
981    /// Waste records.
982    pub waste: Vec<WasteRecord>,
983    /// Workforce diversity metrics.
984    pub diversity: Vec<WorkforceDiversityMetric>,
985    /// Pay equity metrics.
986    pub pay_equity: Vec<PayEquityMetric>,
987    /// Safety incidents.
988    pub safety_incidents: Vec<SafetyIncident>,
989    /// Safety metrics.
990    pub safety_metrics: Vec<SafetyMetric>,
991    /// Governance metrics.
992    pub governance: Vec<GovernanceMetric>,
993    /// Supplier ESG assessments.
994    pub supplier_assessments: Vec<SupplierEsgAssessment>,
995    /// Materiality assessments.
996    pub materiality: Vec<MaterialityAssessment>,
997    /// ESG disclosures.
998    pub disclosures: Vec<EsgDisclosure>,
999    /// Climate scenarios.
1000    pub climate_scenarios: Vec<ClimateScenario>,
1001    /// ESG anomaly labels.
1002    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1003    /// Total emission record count.
1004    pub emission_count: usize,
1005    /// Total disclosure count.
1006    pub disclosure_count: usize,
1007}
1008
1009/// Treasury data snapshot (cash management, hedging, debt, pooling).
1010#[derive(Debug, Clone, Default)]
1011pub struct TreasurySnapshot {
1012    /// Cash positions (daily balances per account).
1013    pub cash_positions: Vec<CashPosition>,
1014    /// Cash forecasts.
1015    pub cash_forecasts: Vec<CashForecast>,
1016    /// Cash pools.
1017    pub cash_pools: Vec<CashPool>,
1018    /// Cash pool sweep transactions.
1019    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1020    /// Hedging instruments.
1021    pub hedging_instruments: Vec<HedgingInstrument>,
1022    /// Hedge relationships (ASC 815/IFRS 9 designations).
1023    pub hedge_relationships: Vec<HedgeRelationship>,
1024    /// Debt instruments.
1025    pub debt_instruments: Vec<DebtInstrument>,
1026    /// Bank guarantees and letters of credit.
1027    pub bank_guarantees: Vec<BankGuarantee>,
1028    /// Intercompany netting runs.
1029    pub netting_runs: Vec<NettingRun>,
1030    /// Treasury anomaly labels.
1031    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1032}
1033
1034/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1035#[derive(Debug, Clone, Default)]
1036pub struct ProjectAccountingSnapshot {
1037    /// Projects with WBS hierarchies.
1038    pub projects: Vec<Project>,
1039    /// Project cost lines (linked from source documents).
1040    pub cost_lines: Vec<ProjectCostLine>,
1041    /// Revenue recognition records.
1042    pub revenue_records: Vec<ProjectRevenue>,
1043    /// Earned value metrics.
1044    pub earned_value_metrics: Vec<EarnedValueMetric>,
1045    /// Change orders.
1046    pub change_orders: Vec<ChangeOrder>,
1047    /// Project milestones.
1048    pub milestones: Vec<ProjectMilestone>,
1049}
1050
1051/// Complete result of enhanced generation run.
1052#[derive(Debug, Default)]
1053pub struct EnhancedGenerationResult {
1054    /// Generated chart of accounts.
1055    pub chart_of_accounts: ChartOfAccounts,
1056    /// Master data snapshot.
1057    pub master_data: MasterDataSnapshot,
1058    /// Document flow snapshot.
1059    pub document_flows: DocumentFlowSnapshot,
1060    /// Subledger snapshot (linked from document flows).
1061    pub subledger: SubledgerSnapshot,
1062    /// OCPM event log snapshot (if OCPM generation enabled).
1063    pub ocpm: OcpmSnapshot,
1064    /// Audit data snapshot (if audit generation enabled).
1065    pub audit: AuditSnapshot,
1066    /// Banking KYC/AML data snapshot (if banking generation enabled).
1067    pub banking: BankingSnapshot,
1068    /// Graph export snapshot (if graph export enabled).
1069    pub graph_export: GraphExportSnapshot,
1070    /// S2C sourcing data snapshot (if sourcing generation enabled).
1071    pub sourcing: SourcingSnapshot,
1072    /// Financial reporting snapshot (financial statements + bank reconciliations).
1073    pub financial_reporting: FinancialReportingSnapshot,
1074    /// HR data snapshot (payroll, time entries, expenses).
1075    pub hr: HrSnapshot,
1076    /// Accounting standards snapshot (revenue recognition, impairment).
1077    pub accounting_standards: AccountingStandardsSnapshot,
1078    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1079    pub manufacturing: ManufacturingSnapshot,
1080    /// Sales, KPI, and budget snapshot.
1081    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1082    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1083    pub tax: TaxSnapshot,
1084    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1085    pub esg: EsgSnapshot,
1086    /// Treasury data snapshot (cash management, hedging, debt).
1087    pub treasury: TreasurySnapshot,
1088    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1089    pub project_accounting: ProjectAccountingSnapshot,
1090    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1091    pub process_evolution: Vec<ProcessEvolutionEvent>,
1092    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1093    pub organizational_events: Vec<OrganizationalEvent>,
1094    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1095    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1096    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1097    pub intercompany: IntercompanySnapshot,
1098    /// Generated journal entries.
1099    pub journal_entries: Vec<JournalEntry>,
1100    /// Anomaly labels (if injection enabled).
1101    pub anomaly_labels: AnomalyLabels,
1102    /// Balance validation results (if validation enabled).
1103    pub balance_validation: BalanceValidationResult,
1104    /// Data quality statistics (if injection enabled).
1105    pub data_quality_stats: DataQualityStats,
1106    /// Generation statistics.
1107    pub statistics: EnhancedGenerationStatistics,
1108    /// Data lineage graph (if tracking enabled).
1109    pub lineage: Option<super::lineage::LineageGraph>,
1110    /// Quality gate evaluation result.
1111    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1112    /// Internal controls (if controls generation enabled).
1113    pub internal_controls: Vec<InternalControl>,
1114    /// Opening balances (if opening balance generation enabled).
1115    pub opening_balances: Vec<GeneratedOpeningBalance>,
1116    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1117    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1118    /// Counterfactual (original, mutated) JE pairs for ML training.
1119    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1120    /// Fraud red-flag indicators on P2P/O2C documents.
1121    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1122    /// Collusion rings (coordinated fraud networks).
1123    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1124    /// Bi-temporal version chains for vendor entities.
1125    pub temporal_vendor_chains:
1126        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1127    /// Entity relationship graph (nodes + edges with strength scores).
1128    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1129    /// Cross-process links (P2P ↔ O2C via inventory movements).
1130    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1131    /// Industry-specific GL accounts and metadata.
1132    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1133    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1134    pub compliance_regulations: ComplianceRegulationsSnapshot,
1135}
1136
1137/// Enhanced statistics about a generation run.
1138#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1139pub struct EnhancedGenerationStatistics {
1140    /// Total journal entries generated.
1141    pub total_entries: u64,
1142    /// Total line items generated.
1143    pub total_line_items: u64,
1144    /// Number of accounts in CoA.
1145    pub accounts_count: usize,
1146    /// Number of companies.
1147    pub companies_count: usize,
1148    /// Period in months.
1149    pub period_months: u32,
1150    /// Master data counts.
1151    pub vendor_count: usize,
1152    pub customer_count: usize,
1153    pub material_count: usize,
1154    pub asset_count: usize,
1155    pub employee_count: usize,
1156    /// Document flow counts.
1157    pub p2p_chain_count: usize,
1158    pub o2c_chain_count: usize,
1159    /// Subledger counts.
1160    pub ap_invoice_count: usize,
1161    pub ar_invoice_count: usize,
1162    /// OCPM counts.
1163    pub ocpm_event_count: usize,
1164    pub ocpm_object_count: usize,
1165    pub ocpm_case_count: usize,
1166    /// Audit counts.
1167    pub audit_engagement_count: usize,
1168    pub audit_workpaper_count: usize,
1169    pub audit_evidence_count: usize,
1170    pub audit_risk_count: usize,
1171    pub audit_finding_count: usize,
1172    pub audit_judgment_count: usize,
1173    /// ISA 505 confirmation counts.
1174    #[serde(default)]
1175    pub audit_confirmation_count: usize,
1176    #[serde(default)]
1177    pub audit_confirmation_response_count: usize,
1178    /// ISA 330/530 procedure step and sample counts.
1179    #[serde(default)]
1180    pub audit_procedure_step_count: usize,
1181    #[serde(default)]
1182    pub audit_sample_count: usize,
1183    /// ISA 520 analytical procedure counts.
1184    #[serde(default)]
1185    pub audit_analytical_result_count: usize,
1186    /// ISA 610 internal audit counts.
1187    #[serde(default)]
1188    pub audit_ia_function_count: usize,
1189    #[serde(default)]
1190    pub audit_ia_report_count: usize,
1191    /// ISA 550 related party counts.
1192    #[serde(default)]
1193    pub audit_related_party_count: usize,
1194    #[serde(default)]
1195    pub audit_related_party_transaction_count: usize,
1196    /// Anomaly counts.
1197    pub anomalies_injected: usize,
1198    /// Data quality issue counts.
1199    pub data_quality_issues: usize,
1200    /// Banking counts.
1201    pub banking_customer_count: usize,
1202    pub banking_account_count: usize,
1203    pub banking_transaction_count: usize,
1204    pub banking_suspicious_count: usize,
1205    /// Graph export counts.
1206    pub graph_export_count: usize,
1207    pub graph_node_count: usize,
1208    pub graph_edge_count: usize,
1209    /// LLM enrichment timing (milliseconds).
1210    #[serde(default)]
1211    pub llm_enrichment_ms: u64,
1212    /// Number of vendor names enriched by LLM.
1213    #[serde(default)]
1214    pub llm_vendors_enriched: usize,
1215    /// Diffusion enhancement timing (milliseconds).
1216    #[serde(default)]
1217    pub diffusion_enhancement_ms: u64,
1218    /// Number of diffusion samples generated.
1219    #[serde(default)]
1220    pub diffusion_samples_generated: usize,
1221    /// Causal generation timing (milliseconds).
1222    #[serde(default)]
1223    pub causal_generation_ms: u64,
1224    /// Number of causal samples generated.
1225    #[serde(default)]
1226    pub causal_samples_generated: usize,
1227    /// Whether causal validation passed.
1228    #[serde(default)]
1229    pub causal_validation_passed: Option<bool>,
1230    /// S2C sourcing counts.
1231    #[serde(default)]
1232    pub sourcing_project_count: usize,
1233    #[serde(default)]
1234    pub rfx_event_count: usize,
1235    #[serde(default)]
1236    pub bid_count: usize,
1237    #[serde(default)]
1238    pub contract_count: usize,
1239    #[serde(default)]
1240    pub catalog_item_count: usize,
1241    #[serde(default)]
1242    pub scorecard_count: usize,
1243    /// Financial reporting counts.
1244    #[serde(default)]
1245    pub financial_statement_count: usize,
1246    #[serde(default)]
1247    pub bank_reconciliation_count: usize,
1248    /// HR counts.
1249    #[serde(default)]
1250    pub payroll_run_count: usize,
1251    #[serde(default)]
1252    pub time_entry_count: usize,
1253    #[serde(default)]
1254    pub expense_report_count: usize,
1255    #[serde(default)]
1256    pub benefit_enrollment_count: usize,
1257    #[serde(default)]
1258    pub pension_plan_count: usize,
1259    #[serde(default)]
1260    pub stock_grant_count: usize,
1261    /// Accounting standards counts.
1262    #[serde(default)]
1263    pub revenue_contract_count: usize,
1264    #[serde(default)]
1265    pub impairment_test_count: usize,
1266    #[serde(default)]
1267    pub business_combination_count: usize,
1268    #[serde(default)]
1269    pub ecl_model_count: usize,
1270    #[serde(default)]
1271    pub provision_count: usize,
1272    /// Manufacturing counts.
1273    #[serde(default)]
1274    pub production_order_count: usize,
1275    #[serde(default)]
1276    pub quality_inspection_count: usize,
1277    #[serde(default)]
1278    pub cycle_count_count: usize,
1279    #[serde(default)]
1280    pub bom_component_count: usize,
1281    #[serde(default)]
1282    pub inventory_movement_count: usize,
1283    /// Sales & reporting counts.
1284    #[serde(default)]
1285    pub sales_quote_count: usize,
1286    #[serde(default)]
1287    pub kpi_count: usize,
1288    #[serde(default)]
1289    pub budget_line_count: usize,
1290    /// Tax counts.
1291    #[serde(default)]
1292    pub tax_jurisdiction_count: usize,
1293    #[serde(default)]
1294    pub tax_code_count: usize,
1295    /// ESG counts.
1296    #[serde(default)]
1297    pub esg_emission_count: usize,
1298    #[serde(default)]
1299    pub esg_disclosure_count: usize,
1300    /// Intercompany counts.
1301    #[serde(default)]
1302    pub ic_matched_pair_count: usize,
1303    #[serde(default)]
1304    pub ic_elimination_count: usize,
1305    /// Number of intercompany journal entries (seller + buyer side).
1306    #[serde(default)]
1307    pub ic_transaction_count: usize,
1308    /// Number of fixed asset subledger records.
1309    #[serde(default)]
1310    pub fa_subledger_count: usize,
1311    /// Number of inventory subledger records.
1312    #[serde(default)]
1313    pub inventory_subledger_count: usize,
1314    /// Treasury debt instrument count.
1315    #[serde(default)]
1316    pub treasury_debt_instrument_count: usize,
1317    /// Treasury hedging instrument count.
1318    #[serde(default)]
1319    pub treasury_hedging_instrument_count: usize,
1320    /// Project accounting project count.
1321    #[serde(default)]
1322    pub project_count: usize,
1323    /// Project accounting change order count.
1324    #[serde(default)]
1325    pub project_change_order_count: usize,
1326    /// Tax provision count.
1327    #[serde(default)]
1328    pub tax_provision_count: usize,
1329    /// Opening balance count.
1330    #[serde(default)]
1331    pub opening_balance_count: usize,
1332    /// Subledger reconciliation count.
1333    #[serde(default)]
1334    pub subledger_reconciliation_count: usize,
1335    /// Tax line count.
1336    #[serde(default)]
1337    pub tax_line_count: usize,
1338    /// Project cost line count.
1339    #[serde(default)]
1340    pub project_cost_line_count: usize,
1341    /// Cash position count.
1342    #[serde(default)]
1343    pub cash_position_count: usize,
1344    /// Cash forecast count.
1345    #[serde(default)]
1346    pub cash_forecast_count: usize,
1347    /// Cash pool count.
1348    #[serde(default)]
1349    pub cash_pool_count: usize,
1350    /// Process evolution event count.
1351    #[serde(default)]
1352    pub process_evolution_event_count: usize,
1353    /// Organizational event count.
1354    #[serde(default)]
1355    pub organizational_event_count: usize,
1356    /// Counterfactual pair count.
1357    #[serde(default)]
1358    pub counterfactual_pair_count: usize,
1359    /// Number of fraud red-flag indicators generated.
1360    #[serde(default)]
1361    pub red_flag_count: usize,
1362    /// Number of collusion rings generated.
1363    #[serde(default)]
1364    pub collusion_ring_count: usize,
1365    /// Number of bi-temporal vendor version chains generated.
1366    #[serde(default)]
1367    pub temporal_version_chain_count: usize,
1368    /// Number of nodes in the entity relationship graph.
1369    #[serde(default)]
1370    pub entity_relationship_node_count: usize,
1371    /// Number of edges in the entity relationship graph.
1372    #[serde(default)]
1373    pub entity_relationship_edge_count: usize,
1374    /// Number of cross-process links generated.
1375    #[serde(default)]
1376    pub cross_process_link_count: usize,
1377    /// Number of disruption events generated.
1378    #[serde(default)]
1379    pub disruption_event_count: usize,
1380    /// Number of industry-specific GL accounts generated.
1381    #[serde(default)]
1382    pub industry_gl_account_count: usize,
1383    /// Number of period-close journal entries generated (tax provision + closing entries).
1384    #[serde(default)]
1385    pub period_close_je_count: usize,
1386}
1387
1388/// Enhanced orchestrator with full feature integration.
1389pub struct EnhancedOrchestrator {
1390    config: GeneratorConfig,
1391    phase_config: PhaseConfig,
1392    coa: Option<Arc<ChartOfAccounts>>,
1393    master_data: MasterDataSnapshot,
1394    seed: u64,
1395    multi_progress: Option<MultiProgress>,
1396    /// Resource guard for memory, disk, and CPU monitoring
1397    resource_guard: ResourceGuard,
1398    /// Output path for disk space monitoring
1399    output_path: Option<PathBuf>,
1400    /// Copula generators for preserving correlations (from fingerprint)
1401    copula_generators: Vec<CopulaGeneratorSpec>,
1402    /// Country pack registry for localized data generation
1403    country_pack_registry: datasynth_core::CountryPackRegistry,
1404    /// Optional streaming sink for phase-by-phase output
1405    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1406}
1407
1408impl EnhancedOrchestrator {
1409    /// Create a new enhanced orchestrator.
1410    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1411        datasynth_config::validate_config(&config)?;
1412
1413        let seed = config.global.seed.unwrap_or_else(rand::random);
1414
1415        // Build resource guard from config
1416        let resource_guard = Self::build_resource_guard(&config, None);
1417
1418        // Build country pack registry from config
1419        let country_pack_registry = match &config.country_packs {
1420            Some(cp) => {
1421                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1422                    .map_err(|e| SynthError::config(e.to_string()))?
1423            }
1424            None => datasynth_core::CountryPackRegistry::builtin_only()
1425                .map_err(|e| SynthError::config(e.to_string()))?,
1426        };
1427
1428        Ok(Self {
1429            config,
1430            phase_config,
1431            coa: None,
1432            master_data: MasterDataSnapshot::default(),
1433            seed,
1434            multi_progress: None,
1435            resource_guard,
1436            output_path: None,
1437            copula_generators: Vec::new(),
1438            country_pack_registry,
1439            phase_sink: None,
1440        })
1441    }
1442
1443    /// Create with default phase config.
1444    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1445        Self::new(config, PhaseConfig::default())
1446    }
1447
1448    /// Set a streaming phase sink for real-time output.
1449    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1450        self.phase_sink = Some(sink);
1451        self
1452    }
1453
1454    /// Emit a batch of items to the phase sink (if configured).
1455    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1456        if let Some(ref sink) = self.phase_sink {
1457            for item in items {
1458                if let Ok(value) = serde_json::to_value(item) {
1459                    if let Err(e) = sink.emit(phase, type_name, &value) {
1460                        warn!(
1461                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1462                        );
1463                    }
1464                }
1465            }
1466            if let Err(e) = sink.phase_complete(phase) {
1467                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1468            }
1469        }
1470    }
1471
1472    /// Enable/disable progress bars.
1473    pub fn with_progress(mut self, show: bool) -> Self {
1474        self.phase_config.show_progress = show;
1475        if show {
1476            self.multi_progress = Some(MultiProgress::new());
1477        }
1478        self
1479    }
1480
1481    /// Set the output path for disk space monitoring.
1482    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1483        let path = path.into();
1484        self.output_path = Some(path.clone());
1485        // Rebuild resource guard with the output path
1486        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1487        self
1488    }
1489
1490    /// Access the country pack registry.
1491    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1492        &self.country_pack_registry
1493    }
1494
1495    /// Look up a country pack by country code string.
1496    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1497        self.country_pack_registry.get_by_str(country)
1498    }
1499
1500    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1501    /// company, defaulting to `"US"` if no companies are configured.
1502    fn primary_country_code(&self) -> &str {
1503        self.config
1504            .companies
1505            .first()
1506            .map(|c| c.country.as_str())
1507            .unwrap_or("US")
1508    }
1509
1510    /// Resolve the country pack for the primary (first) company.
1511    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1512        self.country_pack_for(self.primary_country_code())
1513    }
1514
1515    /// Resolve the CoA framework from config/country-pack.
1516    fn resolve_coa_framework(&self) -> CoAFramework {
1517        if self.config.accounting_standards.enabled {
1518            match self.config.accounting_standards.framework {
1519                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1520                    return CoAFramework::FrenchPcg;
1521                }
1522                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1523                    return CoAFramework::GermanSkr04;
1524                }
1525                _ => {}
1526            }
1527        }
1528        // Fallback: derive from country pack
1529        let pack = self.primary_pack();
1530        match pack.accounting.framework.as_str() {
1531            "french_gaap" => CoAFramework::FrenchPcg,
1532            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1533            _ => CoAFramework::UsGaap,
1534        }
1535    }
1536
1537    /// Check if copula generators are available.
1538    ///
1539    /// Returns true if the orchestrator has copula generators for preserving
1540    /// correlations (typically from fingerprint-based generation).
1541    pub fn has_copulas(&self) -> bool {
1542        !self.copula_generators.is_empty()
1543    }
1544
1545    /// Get the copula generators.
1546    ///
1547    /// Returns a reference to the copula generators for use during generation.
1548    /// These can be used to generate correlated samples that preserve the
1549    /// statistical relationships from the source data.
1550    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1551        &self.copula_generators
1552    }
1553
1554    /// Get a mutable reference to the copula generators.
1555    ///
1556    /// Allows generators to sample from copulas during data generation.
1557    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1558        &mut self.copula_generators
1559    }
1560
1561    /// Sample correlated values from a named copula.
1562    ///
1563    /// Returns None if the copula doesn't exist.
1564    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1565        self.copula_generators
1566            .iter_mut()
1567            .find(|c| c.name == copula_name)
1568            .map(|c| c.generator.sample())
1569    }
1570
1571    /// Create an orchestrator from a fingerprint file.
1572    ///
1573    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1574    /// and creates an orchestrator configured to generate data matching
1575    /// the statistical properties of the original data.
1576    ///
1577    /// # Arguments
1578    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1579    /// * `phase_config` - Phase configuration for generation
1580    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1581    ///
1582    /// # Example
1583    /// ```no_run
1584    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1585    /// use std::path::Path;
1586    ///
1587    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1588    ///     Path::new("fingerprint.dsf"),
1589    ///     PhaseConfig::default(),
1590    ///     1.0,
1591    /// ).unwrap();
1592    /// ```
1593    pub fn from_fingerprint(
1594        fingerprint_path: &std::path::Path,
1595        phase_config: PhaseConfig,
1596        scale: f64,
1597    ) -> SynthResult<Self> {
1598        info!("Loading fingerprint from: {}", fingerprint_path.display());
1599
1600        // Read the fingerprint
1601        let reader = FingerprintReader::new();
1602        let fingerprint = reader
1603            .read_from_file(fingerprint_path)
1604            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1605
1606        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1607    }
1608
1609    /// Create an orchestrator from a loaded fingerprint.
1610    ///
1611    /// # Arguments
1612    /// * `fingerprint` - The loaded fingerprint
1613    /// * `phase_config` - Phase configuration for generation
1614    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1615    pub fn from_fingerprint_data(
1616        fingerprint: Fingerprint,
1617        phase_config: PhaseConfig,
1618        scale: f64,
1619    ) -> SynthResult<Self> {
1620        info!(
1621            "Synthesizing config from fingerprint (version: {}, tables: {})",
1622            fingerprint.manifest.version,
1623            fingerprint.schema.tables.len()
1624        );
1625
1626        // Generate a seed for the synthesis
1627        let seed: u64 = rand::random();
1628
1629        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1630        let options = SynthesisOptions {
1631            scale,
1632            seed: Some(seed),
1633            preserve_correlations: true,
1634            inject_anomalies: true,
1635        };
1636        let synthesizer = ConfigSynthesizer::with_options(options);
1637
1638        // Synthesize full result including copula generators
1639        let synthesis_result = synthesizer
1640            .synthesize_full(&fingerprint, seed)
1641            .map_err(|e| {
1642                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1643            })?;
1644
1645        // Start with a base config from the fingerprint's industry if available
1646        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1647            Self::base_config_for_industry(industry)
1648        } else {
1649            Self::base_config_for_industry("manufacturing")
1650        };
1651
1652        // Apply the synthesized patches
1653        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1654
1655        // Log synthesis results
1656        info!(
1657            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1658            fingerprint.schema.tables.len(),
1659            scale,
1660            synthesis_result.copula_generators.len()
1661        );
1662
1663        if !synthesis_result.copula_generators.is_empty() {
1664            for spec in &synthesis_result.copula_generators {
1665                info!(
1666                    "  Copula '{}' for table '{}': {} columns",
1667                    spec.name,
1668                    spec.table,
1669                    spec.columns.len()
1670                );
1671            }
1672        }
1673
1674        // Create the orchestrator with the synthesized config
1675        let mut orchestrator = Self::new(config, phase_config)?;
1676
1677        // Store copula generators for use during generation
1678        orchestrator.copula_generators = synthesis_result.copula_generators;
1679
1680        Ok(orchestrator)
1681    }
1682
1683    /// Create a base config for a given industry.
1684    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1685        use datasynth_config::presets::create_preset;
1686        use datasynth_config::TransactionVolume;
1687        use datasynth_core::models::{CoAComplexity, IndustrySector};
1688
1689        let sector = match industry.to_lowercase().as_str() {
1690            "manufacturing" => IndustrySector::Manufacturing,
1691            "retail" => IndustrySector::Retail,
1692            "financial" | "financial_services" => IndustrySector::FinancialServices,
1693            "healthcare" => IndustrySector::Healthcare,
1694            "technology" | "tech" => IndustrySector::Technology,
1695            _ => IndustrySector::Manufacturing,
1696        };
1697
1698        // Create a preset with reasonable defaults
1699        create_preset(
1700            sector,
1701            1,  // company count
1702            12, // period months
1703            CoAComplexity::Medium,
1704            TransactionVolume::TenK,
1705        )
1706    }
1707
1708    /// Apply a config patch to a GeneratorConfig.
1709    fn apply_config_patch(
1710        mut config: GeneratorConfig,
1711        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1712    ) -> GeneratorConfig {
1713        use datasynth_fingerprint::synthesis::ConfigValue;
1714
1715        for (key, value) in patch.values() {
1716            match (key.as_str(), value) {
1717                // Transaction count is handled via TransactionVolume enum on companies
1718                // Log it but cannot directly set it (would need to modify company volumes)
1719                ("transactions.count", ConfigValue::Integer(n)) => {
1720                    info!(
1721                        "Fingerprint suggests {} transactions (apply via company volumes)",
1722                        n
1723                    );
1724                }
1725                ("global.period_months", ConfigValue::Integer(n)) => {
1726                    config.global.period_months = (*n).clamp(1, 120) as u32;
1727                }
1728                ("global.start_date", ConfigValue::String(s)) => {
1729                    config.global.start_date = s.clone();
1730                }
1731                ("global.seed", ConfigValue::Integer(n)) => {
1732                    config.global.seed = Some(*n as u64);
1733                }
1734                ("fraud.enabled", ConfigValue::Bool(b)) => {
1735                    config.fraud.enabled = *b;
1736                }
1737                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1738                    config.fraud.fraud_rate = *f;
1739                }
1740                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1741                    config.data_quality.enabled = *b;
1742                }
1743                // Handle anomaly injection paths (mapped to fraud config)
1744                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1745                    config.fraud.enabled = *b;
1746                }
1747                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1748                    config.fraud.fraud_rate = *f;
1749                }
1750                _ => {
1751                    debug!("Ignoring unknown config patch key: {}", key);
1752                }
1753            }
1754        }
1755
1756        config
1757    }
1758
1759    /// Build a resource guard from the configuration.
1760    fn build_resource_guard(
1761        config: &GeneratorConfig,
1762        output_path: Option<PathBuf>,
1763    ) -> ResourceGuard {
1764        let mut builder = ResourceGuardBuilder::new();
1765
1766        // Configure memory limit if set
1767        if config.global.memory_limit_mb > 0 {
1768            builder = builder.memory_limit(config.global.memory_limit_mb);
1769        }
1770
1771        // Configure disk monitoring for output path
1772        if let Some(path) = output_path {
1773            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1774        }
1775
1776        // Use conservative degradation settings for production safety
1777        builder = builder.conservative();
1778
1779        builder.build()
1780    }
1781
1782    /// Check resources (memory, disk, CPU) and return degradation level.
1783    ///
1784    /// Returns an error if hard limits are exceeded.
1785    /// Returns Ok(DegradationLevel) indicating current resource state.
1786    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1787        self.resource_guard.check()
1788    }
1789
1790    /// Check resources with logging.
1791    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1792        let level = self.resource_guard.check()?;
1793
1794        if level != DegradationLevel::Normal {
1795            warn!(
1796                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1797                phase,
1798                level,
1799                self.resource_guard.current_memory_mb(),
1800                self.resource_guard.available_disk_mb()
1801            );
1802        }
1803
1804        Ok(level)
1805    }
1806
1807    /// Get current degradation actions based on resource state.
1808    fn get_degradation_actions(&self) -> DegradationActions {
1809        self.resource_guard.get_actions()
1810    }
1811
1812    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1813    fn check_memory_limit(&self) -> SynthResult<()> {
1814        self.check_resources()?;
1815        Ok(())
1816    }
1817
1818    /// Run the complete generation workflow.
1819    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1820        info!("Starting enhanced generation workflow");
1821        info!(
1822            "Config: industry={:?}, period_months={}, companies={}",
1823            self.config.global.industry,
1824            self.config.global.period_months,
1825            self.config.companies.len()
1826        );
1827
1828        // Initial resource check before starting
1829        let initial_level = self.check_resources_with_log("initial")?;
1830        if initial_level == DegradationLevel::Emergency {
1831            return Err(SynthError::resource(
1832                "Insufficient resources to start generation",
1833            ));
1834        }
1835
1836        let mut stats = EnhancedGenerationStatistics {
1837            companies_count: self.config.companies.len(),
1838            period_months: self.config.global.period_months,
1839            ..Default::default()
1840        };
1841
1842        // Phase 1: Chart of Accounts
1843        let coa = self.phase_chart_of_accounts(&mut stats)?;
1844
1845        // Phase 2: Master Data
1846        self.phase_master_data(&mut stats)?;
1847
1848        // Emit master data to stream sink
1849        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1850        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1851        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1852
1853        // Phase 3: Document Flows + Subledger Linking
1854        let (mut document_flows, subledger, fa_journal_entries) =
1855            self.phase_document_flows(&mut stats)?;
1856
1857        // Emit document flows to stream sink
1858        self.emit_phase_items(
1859            "document_flows",
1860            "PurchaseOrder",
1861            &document_flows.purchase_orders,
1862        );
1863        self.emit_phase_items(
1864            "document_flows",
1865            "GoodsReceipt",
1866            &document_flows.goods_receipts,
1867        );
1868        self.emit_phase_items(
1869            "document_flows",
1870            "VendorInvoice",
1871            &document_flows.vendor_invoices,
1872        );
1873        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1874        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1875
1876        // Phase 3b: Opening Balances (before JE generation)
1877        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1878
1879        // Phase 3c: Convert opening balances to journal entries and prepend them.
1880        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1881        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1882        // balance map type.
1883        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1884            .iter()
1885            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1886            .collect();
1887        if !opening_balance_jes.is_empty() {
1888            debug!(
1889                "Prepending {} opening balance JEs to entries",
1890                opening_balance_jes.len()
1891            );
1892        }
1893
1894        // Phase 4: Journal Entries
1895        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1896
1897        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1898        // starts from the correct initial state.
1899        if !opening_balance_jes.is_empty() {
1900            let mut combined = opening_balance_jes;
1901            combined.extend(entries);
1902            entries = combined;
1903        }
1904
1905        // Phase 4c: Append FA acquisition journal entries to main entries
1906        if !fa_journal_entries.is_empty() {
1907            debug!(
1908                "Appending {} FA acquisition JEs to main entries",
1909                fa_journal_entries.len()
1910            );
1911            entries.extend(fa_journal_entries);
1912        }
1913
1914        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1915        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1916
1917        // Get current degradation actions for optional phases
1918        let actions = self.get_degradation_actions();
1919
1920        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1921        let sourcing = self.phase_sourcing_data(&mut stats)?;
1922
1923        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1924        if !sourcing.contracts.is_empty() {
1925            let mut linked_count = 0usize;
1926            for chain in &mut document_flows.p2p_chains {
1927                if chain.purchase_order.contract_id.is_none() {
1928                    if let Some(contract) = sourcing
1929                        .contracts
1930                        .iter()
1931                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1932                    {
1933                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1934                        linked_count += 1;
1935                    }
1936                }
1937            }
1938            if linked_count > 0 {
1939                debug!(
1940                    "Linked {} purchase orders to S2C contracts by vendor match",
1941                    linked_count
1942                );
1943            }
1944        }
1945
1946        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1947        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
1948
1949        // Phase 5c: Append IC journal entries to main entries
1950        if !intercompany.seller_journal_entries.is_empty()
1951            || !intercompany.buyer_journal_entries.is_empty()
1952        {
1953            let ic_je_count = intercompany.seller_journal_entries.len()
1954                + intercompany.buyer_journal_entries.len();
1955            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1956            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1957            debug!(
1958                "Appended {} IC journal entries to main entries",
1959                ic_je_count
1960            );
1961        }
1962
1963        // Phase 5d: Convert IC elimination entries to GL journal entries and append
1964        if !intercompany.elimination_entries.is_empty() {
1965            let elim_jes = datasynth_generators::elimination_to_journal_entries(
1966                &intercompany.elimination_entries,
1967            );
1968            if !elim_jes.is_empty() {
1969                debug!(
1970                    "Appended {} elimination journal entries to main entries",
1971                    elim_jes.len()
1972                );
1973                entries.extend(elim_jes);
1974            }
1975        }
1976
1977        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1978        let hr = self.phase_hr_data(&mut stats)?;
1979
1980        // Phase 6b: Generate JEs from payroll runs
1981        if !hr.payroll_runs.is_empty() {
1982            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1983            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1984            entries.extend(payroll_jes);
1985        }
1986
1987        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
1988        if !hr.pension_journal_entries.is_empty() {
1989            debug!(
1990                "Generated {} JEs from pension plans",
1991                hr.pension_journal_entries.len()
1992            );
1993            entries.extend(hr.pension_journal_entries.iter().cloned());
1994        }
1995
1996        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
1997        if !hr.stock_comp_journal_entries.is_empty() {
1998            debug!(
1999                "Generated {} JEs from stock-based compensation",
2000                hr.stock_comp_journal_entries.len()
2001            );
2002            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2003        }
2004
2005        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2006        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2007
2008        // Phase 7a: Generate JEs from production orders
2009        if !manufacturing_snap.production_orders.is_empty() {
2010            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
2011            debug!("Generated {} JEs from production orders", mfg_jes.len());
2012            entries.extend(mfg_jes);
2013        }
2014
2015        // Update final entry/line-item stats after all JE-generating phases
2016        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2017        if !entries.is_empty() {
2018            stats.total_entries = entries.len() as u64;
2019            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2020            debug!(
2021                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2022                stats.total_entries, stats.total_line_items
2023            );
2024        }
2025
2026        // Phase 7b: Apply internal controls to journal entries
2027        if self.config.internal_controls.enabled && !entries.is_empty() {
2028            info!("Phase 7b: Applying internal controls to journal entries");
2029            let control_config = ControlGeneratorConfig {
2030                exception_rate: self.config.internal_controls.exception_rate,
2031                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2032                enable_sox_marking: true,
2033                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2034                    self.config.internal_controls.sox_materiality_threshold,
2035                )
2036                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2037                ..Default::default()
2038            };
2039            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
2040            for entry in &mut entries {
2041                control_gen.apply_controls(entry, &coa);
2042            }
2043            let with_controls = entries
2044                .iter()
2045                .filter(|e| !e.header.control_ids.is_empty())
2046                .count();
2047            info!(
2048                "Applied controls to {} entries ({} with control IDs assigned)",
2049                entries.len(),
2050                with_controls
2051            );
2052        }
2053
2054        // Emit journal entries to stream sink (after all JE-generating phases)
2055        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2056
2057        // Phase 8: Anomaly Injection (after all JE-generating phases)
2058        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2059
2060        // Emit anomaly labels to stream sink
2061        self.emit_phase_items(
2062            "anomaly_injection",
2063            "LabeledAnomaly",
2064            &anomaly_labels.labels,
2065        );
2066
2067        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2068        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2069
2070        // Emit red flags to stream sink
2071        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2072
2073        // Phase 26b: Collusion Ring Generation (after red flags)
2074        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2075
2076        // Emit collusion rings to stream sink
2077        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2078
2079        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2080        let balance_validation = self.phase_balance_validation(&entries)?;
2081
2082        // Phase 9b: GL-to-Subledger Reconciliation
2083        let subledger_reconciliation =
2084            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2085
2086        // Phase 10: Data Quality Injection
2087        let data_quality_stats =
2088            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2089
2090        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2091        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2092
2093        // Phase 11: Audit Data
2094        let audit = self.phase_audit_data(&entries, &mut stats)?;
2095
2096        // Phase 12: Banking KYC/AML Data
2097        let banking = self.phase_banking_data(&mut stats)?;
2098
2099        // Phase 13: Graph Export
2100        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2101
2102        // Phase 14: LLM Enrichment
2103        self.phase_llm_enrichment(&mut stats);
2104
2105        // Phase 15: Diffusion Enhancement
2106        self.phase_diffusion_enhancement(&mut stats);
2107
2108        // Phase 16: Causal Overlay
2109        self.phase_causal_overlay(&mut stats);
2110
2111        // Phase 17: Bank Reconciliation + Financial Statements
2112        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2113        // provision data (from accounting_standards / tax snapshots) can be wired in.
2114        let mut financial_reporting = self.phase_financial_reporting(
2115            &document_flows,
2116            &entries,
2117            &coa,
2118            &hr,
2119            &audit,
2120            &mut stats,
2121        )?;
2122
2123        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2124        let accounting_standards =
2125            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2126
2127        // Phase 18a: Merge ECL journal entries into main GL
2128        if !accounting_standards.ecl_journal_entries.is_empty() {
2129            debug!(
2130                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2131                accounting_standards.ecl_journal_entries.len()
2132            );
2133            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2134        }
2135
2136        // Phase 18a: Merge provision journal entries into main GL
2137        if !accounting_standards.provision_journal_entries.is_empty() {
2138            debug!(
2139                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2140                accounting_standards.provision_journal_entries.len()
2141            );
2142            entries.extend(
2143                accounting_standards
2144                    .provision_journal_entries
2145                    .iter()
2146                    .cloned(),
2147            );
2148        }
2149
2150        // Phase 18b: OCPM Events (after all process data is available)
2151        let ocpm = self.phase_ocpm_events(
2152            &document_flows,
2153            &sourcing,
2154            &hr,
2155            &manufacturing_snap,
2156            &banking,
2157            &audit,
2158            &financial_reporting,
2159            &mut stats,
2160        )?;
2161
2162        // Emit OCPM events to stream sink
2163        if let Some(ref event_log) = ocpm.event_log {
2164            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2165        }
2166
2167        // Phase 19: Sales Quotes, Management KPIs, Budgets
2168        let sales_kpi_budgets =
2169            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2170
2171        // Phase 20: Tax Generation
2172        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2173
2174        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2175        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2176        self.generate_notes_to_financial_statements(
2177            &mut financial_reporting,
2178            &accounting_standards,
2179            &tax,
2180            &hr,
2181            &audit,
2182        );
2183
2184        // Phase 21: ESG Data Generation
2185        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
2186
2187        // Phase 22: Treasury Data Generation
2188        let treasury =
2189            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2190
2191        // Phase 23: Project Accounting Data Generation
2192        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2193
2194        // Phase 24: Process Evolution + Organizational Events
2195        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2196
2197        // Phase 24b: Disruption Events
2198        let disruption_events = self.phase_disruption_events(&mut stats)?;
2199
2200        // Phase 27: Bi-Temporal Vendor Version Chains
2201        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2202
2203        // Phase 28: Entity Relationship Graph + Cross-Process Links
2204        let (entity_relationship_graph, cross_process_links) =
2205            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2206
2207        // Phase 29: Industry-specific GL accounts
2208        let industry_output = self.phase_industry_data(&mut stats);
2209
2210        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2211        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2212
2213        // Phase 19b: Hypergraph Export (after all data is available)
2214        self.phase_hypergraph_export(
2215            &coa,
2216            &entries,
2217            &document_flows,
2218            &sourcing,
2219            &hr,
2220            &manufacturing_snap,
2221            &banking,
2222            &audit,
2223            &financial_reporting,
2224            &ocpm,
2225            &compliance_regulations,
2226            &mut stats,
2227        )?;
2228
2229        // Phase 10c: Additional graph builders (approval, entity, banking)
2230        // These run after all data is available since they need banking/IC data.
2231        if self.phase_config.generate_graph_export {
2232            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2233        }
2234
2235        // Log informational messages for config sections not yet fully wired
2236        if self.config.streaming.enabled {
2237            info!("Note: streaming config is enabled but batch mode does not use it");
2238        }
2239        if self.config.vendor_network.enabled {
2240            debug!("Vendor network config available; relationship graph generation is partial");
2241        }
2242        if self.config.customer_segmentation.enabled {
2243            debug!("Customer segmentation config available; segment-aware generation is partial");
2244        }
2245
2246        // Log final resource statistics
2247        let resource_stats = self.resource_guard.stats();
2248        info!(
2249            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2250            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2251            resource_stats.disk.estimated_bytes_written,
2252            resource_stats.degradation_level
2253        );
2254
2255        // Flush any remaining stream sink data
2256        if let Some(ref sink) = self.phase_sink {
2257            if let Err(e) = sink.flush() {
2258                warn!("Stream sink flush failed: {e}");
2259            }
2260        }
2261
2262        // Build data lineage graph
2263        let lineage = self.build_lineage_graph();
2264
2265        // Evaluate quality gates if enabled in config
2266        let gate_result = if self.config.quality_gates.enabled {
2267            let profile_name = &self.config.quality_gates.profile;
2268            match datasynth_eval::gates::get_profile(profile_name) {
2269                Some(profile) => {
2270                    // Build an evaluation populated with actual generation metrics.
2271                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2272
2273                    // Populate balance sheet evaluation from balance validation results
2274                    if balance_validation.validated {
2275                        eval.coherence.balance =
2276                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2277                                equation_balanced: balance_validation.is_balanced,
2278                                max_imbalance: (balance_validation.total_debits
2279                                    - balance_validation.total_credits)
2280                                    .abs(),
2281                                periods_evaluated: 1,
2282                                periods_imbalanced: if balance_validation.is_balanced {
2283                                    0
2284                                } else {
2285                                    1
2286                                },
2287                                period_results: Vec::new(),
2288                                companies_evaluated: self.config.companies.len(),
2289                            });
2290                    }
2291
2292                    // Set coherence passes based on balance validation
2293                    eval.coherence.passes = balance_validation.is_balanced;
2294                    if !balance_validation.is_balanced {
2295                        eval.coherence
2296                            .failures
2297                            .push("Balance sheet equation not satisfied".to_string());
2298                    }
2299
2300                    // Set statistical score based on entry count (basic sanity)
2301                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2302                    eval.statistical.passes = !entries.is_empty();
2303
2304                    // Set quality score from data quality stats
2305                    eval.quality.overall_score = 0.9; // Default high for generated data
2306                    eval.quality.passes = true;
2307
2308                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2309                    info!(
2310                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2311                        profile_name, result.gates_passed, result.gates_total, result.summary
2312                    );
2313                    Some(result)
2314                }
2315                None => {
2316                    warn!(
2317                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2318                        profile_name
2319                    );
2320                    None
2321                }
2322            }
2323        } else {
2324            None
2325        };
2326
2327        // Generate internal controls if enabled
2328        let internal_controls = if self.config.internal_controls.enabled {
2329            InternalControl::standard_controls()
2330        } else {
2331            Vec::new()
2332        };
2333
2334        Ok(EnhancedGenerationResult {
2335            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2336            master_data: std::mem::take(&mut self.master_data),
2337            document_flows,
2338            subledger,
2339            ocpm,
2340            audit,
2341            banking,
2342            graph_export,
2343            sourcing,
2344            financial_reporting,
2345            hr,
2346            accounting_standards,
2347            manufacturing: manufacturing_snap,
2348            sales_kpi_budgets,
2349            tax,
2350            esg: esg_snap,
2351            treasury,
2352            project_accounting,
2353            process_evolution,
2354            organizational_events,
2355            disruption_events,
2356            intercompany,
2357            journal_entries: entries,
2358            anomaly_labels,
2359            balance_validation,
2360            data_quality_stats,
2361            statistics: stats,
2362            lineage: Some(lineage),
2363            gate_result,
2364            internal_controls,
2365            opening_balances,
2366            subledger_reconciliation,
2367            counterfactual_pairs,
2368            red_flags,
2369            collusion_rings,
2370            temporal_vendor_chains,
2371            entity_relationship_graph,
2372            cross_process_links,
2373            industry_output,
2374            compliance_regulations,
2375        })
2376    }
2377
2378    // ========================================================================
2379    // Generation Phase Methods
2380    // ========================================================================
2381
2382    /// Phase 1: Generate Chart of Accounts and update statistics.
2383    fn phase_chart_of_accounts(
2384        &mut self,
2385        stats: &mut EnhancedGenerationStatistics,
2386    ) -> SynthResult<Arc<ChartOfAccounts>> {
2387        info!("Phase 1: Generating Chart of Accounts");
2388        let coa = self.generate_coa()?;
2389        stats.accounts_count = coa.account_count();
2390        info!(
2391            "Chart of Accounts generated: {} accounts",
2392            stats.accounts_count
2393        );
2394        self.check_resources_with_log("post-coa")?;
2395        Ok(coa)
2396    }
2397
2398    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2399    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2400        if self.phase_config.generate_master_data {
2401            info!("Phase 2: Generating Master Data");
2402            self.generate_master_data()?;
2403            stats.vendor_count = self.master_data.vendors.len();
2404            stats.customer_count = self.master_data.customers.len();
2405            stats.material_count = self.master_data.materials.len();
2406            stats.asset_count = self.master_data.assets.len();
2407            stats.employee_count = self.master_data.employees.len();
2408            info!(
2409                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2410                stats.vendor_count, stats.customer_count, stats.material_count,
2411                stats.asset_count, stats.employee_count
2412            );
2413            self.check_resources_with_log("post-master-data")?;
2414        } else {
2415            debug!("Phase 2: Skipped (master data generation disabled)");
2416        }
2417        Ok(())
2418    }
2419
2420    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2421    fn phase_document_flows(
2422        &mut self,
2423        stats: &mut EnhancedGenerationStatistics,
2424    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2425        let mut document_flows = DocumentFlowSnapshot::default();
2426        let mut subledger = SubledgerSnapshot::default();
2427
2428        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2429            info!("Phase 3: Generating Document Flows");
2430            self.generate_document_flows(&mut document_flows)?;
2431            stats.p2p_chain_count = document_flows.p2p_chains.len();
2432            stats.o2c_chain_count = document_flows.o2c_chains.len();
2433            info!(
2434                "Document flows generated: {} P2P chains, {} O2C chains",
2435                stats.p2p_chain_count, stats.o2c_chain_count
2436            );
2437
2438            // Phase 3b: Link document flows to subledgers (for data coherence)
2439            debug!("Phase 3b: Linking document flows to subledgers");
2440            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2441            stats.ap_invoice_count = subledger.ap_invoices.len();
2442            stats.ar_invoice_count = subledger.ar_invoices.len();
2443            debug!(
2444                "Subledgers linked: {} AP invoices, {} AR invoices",
2445                stats.ap_invoice_count, stats.ar_invoice_count
2446            );
2447
2448            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
2449            // Without this step the subledger is systematically overstated because
2450            // amount_remaining is set at invoice creation and never reduced by
2451            // the payments that were generated in the document-flow phase.
2452            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2453            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2454            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2455            debug!("Payment settlements applied to AP and AR subledgers");
2456
2457            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
2458            // The as-of date is the last day of the configured period.
2459            if let Ok(start_date) =
2460                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2461            {
2462                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2463                    - chrono::Days::new(1);
2464                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2465                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
2466                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
2467                // derived from JE-level aggregation and will typically differ. This is a known
2468                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
2469                // generated independently. A future reconciliation phase should align them by
2470                // using subledger totals as the authoritative source for BS Receivables.
2471                for company in &self.config.companies {
2472                    let ar_report = ARAgingReport::from_invoices(
2473                        company.code.clone(),
2474                        &subledger.ar_invoices,
2475                        as_of_date,
2476                    );
2477                    subledger.ar_aging_reports.push(ar_report);
2478
2479                    let ap_report = APAgingReport::from_invoices(
2480                        company.code.clone(),
2481                        &subledger.ap_invoices,
2482                        as_of_date,
2483                    );
2484                    subledger.ap_aging_reports.push(ap_report);
2485                }
2486                debug!(
2487                    "AR/AP aging reports built: {} AR, {} AP",
2488                    subledger.ar_aging_reports.len(),
2489                    subledger.ap_aging_reports.len()
2490                );
2491            }
2492
2493            self.check_resources_with_log("post-document-flows")?;
2494        } else {
2495            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2496        }
2497
2498        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2499        let mut fa_journal_entries = Vec::new();
2500        if !self.master_data.assets.is_empty() {
2501            debug!("Generating FA subledger records");
2502            let company_code = self
2503                .config
2504                .companies
2505                .first()
2506                .map(|c| c.code.as_str())
2507                .unwrap_or("1000");
2508            let currency = self
2509                .config
2510                .companies
2511                .first()
2512                .map(|c| c.currency.as_str())
2513                .unwrap_or("USD");
2514
2515            let mut fa_gen = datasynth_generators::FAGenerator::new(
2516                datasynth_generators::FAGeneratorConfig::default(),
2517                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2518            );
2519
2520            for asset in &self.master_data.assets {
2521                let (record, je) = fa_gen.generate_asset_acquisition(
2522                    company_code,
2523                    &format!("{:?}", asset.asset_class),
2524                    &asset.description,
2525                    asset.acquisition_date,
2526                    currency,
2527                    asset.cost_center.as_deref(),
2528                );
2529                subledger.fa_records.push(record);
2530                fa_journal_entries.push(je);
2531            }
2532
2533            stats.fa_subledger_count = subledger.fa_records.len();
2534            debug!(
2535                "FA subledger records generated: {} (with {} acquisition JEs)",
2536                stats.fa_subledger_count,
2537                fa_journal_entries.len()
2538            );
2539        }
2540
2541        // Generate Inventory subledger records from master data materials
2542        if !self.master_data.materials.is_empty() {
2543            debug!("Generating Inventory subledger records");
2544            let first_company = self.config.companies.first();
2545            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2546            let inv_currency = first_company
2547                .map(|c| c.currency.clone())
2548                .unwrap_or_else(|| "USD".to_string());
2549
2550            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2551                datasynth_generators::InventoryGeneratorConfig::default(),
2552                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2553                inv_currency.clone(),
2554            );
2555
2556            for (i, material) in self.master_data.materials.iter().enumerate() {
2557                let plant = format!("PLANT{:02}", (i % 3) + 1);
2558                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2559                let initial_qty = rust_decimal::Decimal::from(
2560                    material
2561                        .safety_stock
2562                        .to_string()
2563                        .parse::<i64>()
2564                        .unwrap_or(100),
2565                );
2566
2567                let position = inv_gen.generate_position(
2568                    company_code,
2569                    &plant,
2570                    &storage_loc,
2571                    &material.material_id,
2572                    &material.description,
2573                    initial_qty,
2574                    Some(material.standard_cost),
2575                    &inv_currency,
2576                );
2577                subledger.inventory_positions.push(position);
2578            }
2579
2580            stats.inventory_subledger_count = subledger.inventory_positions.len();
2581            debug!(
2582                "Inventory subledger records generated: {}",
2583                stats.inventory_subledger_count
2584            );
2585        }
2586
2587        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
2588        if !subledger.fa_records.is_empty() {
2589            if let Ok(start_date) =
2590                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2591            {
2592                let company_code = self
2593                    .config
2594                    .companies
2595                    .first()
2596                    .map(|c| c.code.as_str())
2597                    .unwrap_or("1000");
2598                let fiscal_year = start_date.year();
2599                let start_period = start_date.month();
2600                let end_period =
2601                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
2602
2603                let depr_cfg = FaDepreciationScheduleConfig {
2604                    fiscal_year,
2605                    start_period,
2606                    end_period,
2607                    seed_offset: 800,
2608                };
2609                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
2610                let runs = depr_gen.generate(company_code, &subledger.fa_records);
2611                let run_count = runs.len();
2612                subledger.depreciation_runs = runs;
2613                debug!(
2614                    "Depreciation runs generated: {} runs for {} periods",
2615                    run_count, self.config.global.period_months
2616                );
2617            }
2618        }
2619
2620        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
2621        if !subledger.inventory_positions.is_empty() {
2622            if let Ok(start_date) =
2623                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2624            {
2625                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2626                    - chrono::Days::new(1);
2627
2628                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
2629                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
2630
2631                for company in &self.config.companies {
2632                    let result = inv_val_gen.generate(
2633                        &company.code,
2634                        &subledger.inventory_positions,
2635                        as_of_date,
2636                    );
2637                    subledger.inventory_valuations.push(result);
2638                }
2639                debug!(
2640                    "Inventory valuations generated: {} company reports",
2641                    subledger.inventory_valuations.len()
2642                );
2643            }
2644        }
2645
2646        Ok((document_flows, subledger, fa_journal_entries))
2647    }
2648
2649    /// Phase 3c: Generate OCPM events from document flows.
2650    #[allow(clippy::too_many_arguments)]
2651    fn phase_ocpm_events(
2652        &mut self,
2653        document_flows: &DocumentFlowSnapshot,
2654        sourcing: &SourcingSnapshot,
2655        hr: &HrSnapshot,
2656        manufacturing: &ManufacturingSnapshot,
2657        banking: &BankingSnapshot,
2658        audit: &AuditSnapshot,
2659        financial_reporting: &FinancialReportingSnapshot,
2660        stats: &mut EnhancedGenerationStatistics,
2661    ) -> SynthResult<OcpmSnapshot> {
2662        if self.phase_config.generate_ocpm_events {
2663            info!("Phase 3c: Generating OCPM Events");
2664            let ocpm_snapshot = self.generate_ocpm_events(
2665                document_flows,
2666                sourcing,
2667                hr,
2668                manufacturing,
2669                banking,
2670                audit,
2671                financial_reporting,
2672            )?;
2673            stats.ocpm_event_count = ocpm_snapshot.event_count;
2674            stats.ocpm_object_count = ocpm_snapshot.object_count;
2675            stats.ocpm_case_count = ocpm_snapshot.case_count;
2676            info!(
2677                "OCPM events generated: {} events, {} objects, {} cases",
2678                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2679            );
2680            self.check_resources_with_log("post-ocpm")?;
2681            Ok(ocpm_snapshot)
2682        } else {
2683            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2684            Ok(OcpmSnapshot::default())
2685        }
2686    }
2687
2688    /// Phase 4: Generate journal entries from document flows and standalone generation.
2689    fn phase_journal_entries(
2690        &mut self,
2691        coa: &Arc<ChartOfAccounts>,
2692        document_flows: &DocumentFlowSnapshot,
2693        _stats: &mut EnhancedGenerationStatistics,
2694    ) -> SynthResult<Vec<JournalEntry>> {
2695        let mut entries = Vec::new();
2696
2697        // Phase 4a: Generate JEs from document flows (for data coherence)
2698        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2699            debug!("Phase 4a: Generating JEs from document flows");
2700            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2701            debug!("Generated {} JEs from document flows", flow_entries.len());
2702            entries.extend(flow_entries);
2703        }
2704
2705        // Phase 4b: Generate standalone journal entries
2706        if self.phase_config.generate_journal_entries {
2707            info!("Phase 4: Generating Journal Entries");
2708            let je_entries = self.generate_journal_entries(coa)?;
2709            info!("Generated {} standalone journal entries", je_entries.len());
2710            entries.extend(je_entries);
2711        } else {
2712            debug!("Phase 4: Skipped (journal entry generation disabled)");
2713        }
2714
2715        if !entries.is_empty() {
2716            // Note: stats.total_entries/total_line_items are set in generate()
2717            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2718            self.check_resources_with_log("post-journal-entries")?;
2719        }
2720
2721        Ok(entries)
2722    }
2723
2724    /// Phase 5: Inject anomalies into journal entries.
2725    fn phase_anomaly_injection(
2726        &mut self,
2727        entries: &mut [JournalEntry],
2728        actions: &DegradationActions,
2729        stats: &mut EnhancedGenerationStatistics,
2730    ) -> SynthResult<AnomalyLabels> {
2731        if self.phase_config.inject_anomalies
2732            && !entries.is_empty()
2733            && !actions.skip_anomaly_injection
2734        {
2735            info!("Phase 5: Injecting Anomalies");
2736            let result = self.inject_anomalies(entries)?;
2737            stats.anomalies_injected = result.labels.len();
2738            info!("Injected {} anomalies", stats.anomalies_injected);
2739            self.check_resources_with_log("post-anomaly-injection")?;
2740            Ok(result)
2741        } else if actions.skip_anomaly_injection {
2742            warn!("Phase 5: Skipped due to resource degradation");
2743            Ok(AnomalyLabels::default())
2744        } else {
2745            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2746            Ok(AnomalyLabels::default())
2747        }
2748    }
2749
2750    /// Phase 6: Validate balance sheet equation on journal entries.
2751    fn phase_balance_validation(
2752        &mut self,
2753        entries: &[JournalEntry],
2754    ) -> SynthResult<BalanceValidationResult> {
2755        if self.phase_config.validate_balances && !entries.is_empty() {
2756            debug!("Phase 6: Validating Balances");
2757            let balance_validation = self.validate_journal_entries(entries)?;
2758            if balance_validation.is_balanced {
2759                debug!("Balance validation passed");
2760            } else {
2761                warn!(
2762                    "Balance validation found {} errors",
2763                    balance_validation.validation_errors.len()
2764                );
2765            }
2766            Ok(balance_validation)
2767        } else {
2768            Ok(BalanceValidationResult::default())
2769        }
2770    }
2771
2772    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2773    fn phase_data_quality_injection(
2774        &mut self,
2775        entries: &mut [JournalEntry],
2776        actions: &DegradationActions,
2777        stats: &mut EnhancedGenerationStatistics,
2778    ) -> SynthResult<DataQualityStats> {
2779        if self.phase_config.inject_data_quality
2780            && !entries.is_empty()
2781            && !actions.skip_data_quality
2782        {
2783            info!("Phase 7: Injecting Data Quality Variations");
2784            let dq_stats = self.inject_data_quality(entries)?;
2785            stats.data_quality_issues = dq_stats.records_with_issues;
2786            info!("Injected {} data quality issues", stats.data_quality_issues);
2787            self.check_resources_with_log("post-data-quality")?;
2788            Ok(dq_stats)
2789        } else if actions.skip_data_quality {
2790            warn!("Phase 7: Skipped due to resource degradation");
2791            Ok(DataQualityStats::default())
2792        } else {
2793            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2794            Ok(DataQualityStats::default())
2795        }
2796    }
2797
2798    /// Phase 10b: Generate period-close journal entries.
2799    ///
2800    /// Generates:
2801    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
2802    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
2803    ///    for the configured period.
2804    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
2805    /// 3. Income statement closing JE per company: transfer net income after tax to retained
2806    ///    earnings via the Income Summary (3600) clearing account.
2807    fn phase_period_close(
2808        &mut self,
2809        entries: &mut Vec<JournalEntry>,
2810        subledger: &SubledgerSnapshot,
2811        stats: &mut EnhancedGenerationStatistics,
2812    ) -> SynthResult<()> {
2813        if !self.phase_config.generate_period_close || entries.is_empty() {
2814            debug!("Phase 10b: Skipped (period close disabled or no entries)");
2815            return Ok(());
2816        }
2817
2818        info!("Phase 10b: Generating period-close journal entries");
2819
2820        use datasynth_core::accounts::{
2821            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
2822        };
2823        use rust_decimal::Decimal;
2824
2825        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2826            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2827        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2828        // Posting date for close entries is the last day of the period
2829        let close_date = end_date - chrono::Days::new(1);
2830
2831        // Statutory tax rate (21% — configurable rates come in later tiers)
2832        let tax_rate = Decimal::new(21, 2); // 0.21
2833
2834        // Collect company codes from config
2835        let company_codes: Vec<String> = self
2836            .config
2837            .companies
2838            .iter()
2839            .map(|c| c.code.clone())
2840            .collect();
2841
2842        let mut close_jes: Vec<JournalEntry> = Vec::new();
2843
2844        // --- Depreciation JEs (per asset) ---
2845        // Compute period depreciation for each active fixed asset using straight-line method.
2846        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
2847        let period_months = self.config.global.period_months;
2848        for asset in &subledger.fa_records {
2849            // Skip assets that are inactive / fully depreciated / non-depreciable
2850            use datasynth_core::models::subledger::fa::AssetStatus;
2851            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
2852                continue;
2853            }
2854            let useful_life_months = asset.useful_life_months();
2855            if useful_life_months == 0 {
2856                // Land or CIP — not depreciated
2857                continue;
2858            }
2859            let salvage_value = asset.salvage_value();
2860            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
2861            if depreciable_base == Decimal::ZERO {
2862                continue;
2863            }
2864            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
2865                * Decimal::from(period_months))
2866            .round_dp(2);
2867            if period_depr <= Decimal::ZERO {
2868                continue;
2869            }
2870
2871            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
2872            depr_header.document_type = "CL".to_string();
2873            depr_header.header_text = Some(format!(
2874                "Depreciation - {} {}",
2875                asset.asset_number, asset.description
2876            ));
2877            depr_header.created_by = "CLOSE_ENGINE".to_string();
2878            depr_header.source = TransactionSource::Automated;
2879            depr_header.business_process = Some(BusinessProcess::R2R);
2880
2881            let doc_id = depr_header.document_id;
2882            let mut depr_je = JournalEntry::new(depr_header);
2883
2884            // DR Depreciation Expense (6000)
2885            depr_je.add_line(JournalEntryLine::debit(
2886                doc_id,
2887                1,
2888                expense_accounts::DEPRECIATION.to_string(),
2889                period_depr,
2890            ));
2891            // CR Accumulated Depreciation (1510)
2892            depr_je.add_line(JournalEntryLine::credit(
2893                doc_id,
2894                2,
2895                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
2896                period_depr,
2897            ));
2898
2899            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
2900            close_jes.push(depr_je);
2901        }
2902
2903        if !subledger.fa_records.is_empty() {
2904            debug!(
2905                "Generated {} depreciation JEs from {} FA records",
2906                close_jes.len(),
2907                subledger.fa_records.len()
2908            );
2909        }
2910
2911        for company_code in &company_codes {
2912            // Calculate net income for this company from existing JEs:
2913            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
2914            // Revenue (4xxx): credit-normal, so net = credits - debits
2915            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
2916            let mut total_revenue = Decimal::ZERO;
2917            let mut total_expenses = Decimal::ZERO;
2918
2919            for entry in entries.iter() {
2920                if entry.header.company_code != *company_code {
2921                    continue;
2922                }
2923                for line in &entry.lines {
2924                    let category = AccountCategory::from_account(&line.gl_account);
2925                    match category {
2926                        AccountCategory::Revenue => {
2927                            // Revenue is credit-normal: net revenue = credits - debits
2928                            total_revenue += line.credit_amount - line.debit_amount;
2929                        }
2930                        AccountCategory::Cogs
2931                        | AccountCategory::OperatingExpense
2932                        | AccountCategory::OtherIncomeExpense
2933                        | AccountCategory::Tax => {
2934                            // Expenses are debit-normal: net expense = debits - credits
2935                            total_expenses += line.debit_amount - line.credit_amount;
2936                        }
2937                        _ => {}
2938                    }
2939                }
2940            }
2941
2942            let pre_tax_income = total_revenue - total_expenses;
2943
2944            // Skip if no income statement activity
2945            if pre_tax_income == Decimal::ZERO {
2946                debug!(
2947                    "Company {}: no pre-tax income, skipping period close",
2948                    company_code
2949                );
2950                continue;
2951            }
2952
2953            // --- Tax provision JE ---
2954            // Only generate if pre_tax_income > 0 (no tax on losses in this stub)
2955            let tax_amount = if pre_tax_income > Decimal::ZERO {
2956                (pre_tax_income * tax_rate).round_dp(2)
2957            } else {
2958                Decimal::ZERO
2959            };
2960
2961            if tax_amount > Decimal::ZERO {
2962                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
2963                tax_header.document_type = "CL".to_string();
2964                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
2965                tax_header.created_by = "CLOSE_ENGINE".to_string();
2966                tax_header.source = TransactionSource::Automated;
2967                tax_header.business_process = Some(BusinessProcess::R2R);
2968
2969                let doc_id = tax_header.document_id;
2970                let mut tax_je = JournalEntry::new(tax_header);
2971
2972                // DR Tax Expense (8000)
2973                tax_je.add_line(JournalEntryLine::debit(
2974                    doc_id,
2975                    1,
2976                    tax_accounts::TAX_EXPENSE.to_string(),
2977                    tax_amount,
2978                ));
2979                // CR Sales Tax Payable (2100) — used as income tax payable in this stub
2980                tax_je.add_line(JournalEntryLine::credit(
2981                    doc_id,
2982                    2,
2983                    tax_accounts::SALES_TAX_PAYABLE.to_string(),
2984                    tax_amount,
2985                ));
2986
2987                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
2988                close_jes.push(tax_je);
2989            }
2990
2991            // --- Income statement closing JE ---
2992            // Net income after tax
2993            let net_income = pre_tax_income - tax_amount;
2994
2995            if net_income != Decimal::ZERO {
2996                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
2997                close_header.document_type = "CL".to_string();
2998                close_header.header_text =
2999                    Some(format!("Income statement close - {}", company_code));
3000                close_header.created_by = "CLOSE_ENGINE".to_string();
3001                close_header.source = TransactionSource::Automated;
3002                close_header.business_process = Some(BusinessProcess::R2R);
3003
3004                let doc_id = close_header.document_id;
3005                let mut close_je = JournalEntry::new(close_header);
3006
3007                let abs_net_income = net_income.abs();
3008
3009                if net_income > Decimal::ZERO {
3010                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3011                    close_je.add_line(JournalEntryLine::debit(
3012                        doc_id,
3013                        1,
3014                        equity_accounts::INCOME_SUMMARY.to_string(),
3015                        abs_net_income,
3016                    ));
3017                    close_je.add_line(JournalEntryLine::credit(
3018                        doc_id,
3019                        2,
3020                        equity_accounts::RETAINED_EARNINGS.to_string(),
3021                        abs_net_income,
3022                    ));
3023                } else {
3024                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3025                    close_je.add_line(JournalEntryLine::debit(
3026                        doc_id,
3027                        1,
3028                        equity_accounts::RETAINED_EARNINGS.to_string(),
3029                        abs_net_income,
3030                    ));
3031                    close_je.add_line(JournalEntryLine::credit(
3032                        doc_id,
3033                        2,
3034                        equity_accounts::INCOME_SUMMARY.to_string(),
3035                        abs_net_income,
3036                    ));
3037                }
3038
3039                debug_assert!(
3040                    close_je.is_balanced(),
3041                    "Income statement closing JE must be balanced"
3042                );
3043                close_jes.push(close_je);
3044            }
3045        }
3046
3047        let close_count = close_jes.len();
3048        if close_count > 0 {
3049            info!("Generated {} period-close journal entries", close_count);
3050            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3051            entries.extend(close_jes);
3052            stats.period_close_je_count = close_count;
3053
3054            // Update total entry/line-item stats
3055            stats.total_entries = entries.len() as u64;
3056            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3057        } else {
3058            debug!("No period-close entries generated (no income statement activity)");
3059        }
3060
3061        Ok(())
3062    }
3063
3064    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3065    fn phase_audit_data(
3066        &mut self,
3067        entries: &[JournalEntry],
3068        stats: &mut EnhancedGenerationStatistics,
3069    ) -> SynthResult<AuditSnapshot> {
3070        if self.phase_config.generate_audit {
3071            info!("Phase 8: Generating Audit Data");
3072            let audit_snapshot = self.generate_audit_data(entries)?;
3073            stats.audit_engagement_count = audit_snapshot.engagements.len();
3074            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3075            stats.audit_evidence_count = audit_snapshot.evidence.len();
3076            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3077            stats.audit_finding_count = audit_snapshot.findings.len();
3078            stats.audit_judgment_count = audit_snapshot.judgments.len();
3079            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3080            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3081            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3082            stats.audit_sample_count = audit_snapshot.samples.len();
3083            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3084            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3085            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3086            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3087            stats.audit_related_party_transaction_count =
3088                audit_snapshot.related_party_transactions.len();
3089            info!(
3090                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3091                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3092                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3093                 {} RP transactions",
3094                stats.audit_engagement_count,
3095                stats.audit_workpaper_count,
3096                stats.audit_evidence_count,
3097                stats.audit_risk_count,
3098                stats.audit_finding_count,
3099                stats.audit_judgment_count,
3100                stats.audit_confirmation_count,
3101                stats.audit_procedure_step_count,
3102                stats.audit_sample_count,
3103                stats.audit_analytical_result_count,
3104                stats.audit_ia_function_count,
3105                stats.audit_ia_report_count,
3106                stats.audit_related_party_count,
3107                stats.audit_related_party_transaction_count,
3108            );
3109            self.check_resources_with_log("post-audit")?;
3110            Ok(audit_snapshot)
3111        } else {
3112            debug!("Phase 8: Skipped (audit generation disabled)");
3113            Ok(AuditSnapshot::default())
3114        }
3115    }
3116
3117    /// Phase 9: Generate banking KYC/AML data.
3118    fn phase_banking_data(
3119        &mut self,
3120        stats: &mut EnhancedGenerationStatistics,
3121    ) -> SynthResult<BankingSnapshot> {
3122        if self.phase_config.generate_banking {
3123            info!("Phase 9: Generating Banking KYC/AML Data");
3124            let banking_snapshot = self.generate_banking_data()?;
3125            stats.banking_customer_count = banking_snapshot.customers.len();
3126            stats.banking_account_count = banking_snapshot.accounts.len();
3127            stats.banking_transaction_count = banking_snapshot.transactions.len();
3128            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3129            info!(
3130                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3131                stats.banking_customer_count, stats.banking_account_count,
3132                stats.banking_transaction_count, stats.banking_suspicious_count
3133            );
3134            self.check_resources_with_log("post-banking")?;
3135            Ok(banking_snapshot)
3136        } else {
3137            debug!("Phase 9: Skipped (banking generation disabled)");
3138            Ok(BankingSnapshot::default())
3139        }
3140    }
3141
3142    /// Phase 10: Export accounting network graphs for ML training.
3143    fn phase_graph_export(
3144        &mut self,
3145        entries: &[JournalEntry],
3146        coa: &Arc<ChartOfAccounts>,
3147        stats: &mut EnhancedGenerationStatistics,
3148    ) -> SynthResult<GraphExportSnapshot> {
3149        if self.phase_config.generate_graph_export && !entries.is_empty() {
3150            info!("Phase 10: Exporting Accounting Network Graphs");
3151            match self.export_graphs(entries, coa, stats) {
3152                Ok(snapshot) => {
3153                    info!(
3154                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3155                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3156                    );
3157                    Ok(snapshot)
3158                }
3159                Err(e) => {
3160                    warn!("Phase 10: Graph export failed: {}", e);
3161                    Ok(GraphExportSnapshot::default())
3162                }
3163            }
3164        } else {
3165            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3166            Ok(GraphExportSnapshot::default())
3167        }
3168    }
3169
3170    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3171    #[allow(clippy::too_many_arguments)]
3172    fn phase_hypergraph_export(
3173        &self,
3174        coa: &Arc<ChartOfAccounts>,
3175        entries: &[JournalEntry],
3176        document_flows: &DocumentFlowSnapshot,
3177        sourcing: &SourcingSnapshot,
3178        hr: &HrSnapshot,
3179        manufacturing: &ManufacturingSnapshot,
3180        banking: &BankingSnapshot,
3181        audit: &AuditSnapshot,
3182        financial_reporting: &FinancialReportingSnapshot,
3183        ocpm: &OcpmSnapshot,
3184        compliance: &ComplianceRegulationsSnapshot,
3185        stats: &mut EnhancedGenerationStatistics,
3186    ) -> SynthResult<()> {
3187        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3188            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3189            match self.export_hypergraph(
3190                coa,
3191                entries,
3192                document_flows,
3193                sourcing,
3194                hr,
3195                manufacturing,
3196                banking,
3197                audit,
3198                financial_reporting,
3199                ocpm,
3200                compliance,
3201                stats,
3202            ) {
3203                Ok(info) => {
3204                    info!(
3205                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3206                        info.node_count, info.edge_count, info.hyperedge_count
3207                    );
3208                }
3209                Err(e) => {
3210                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3211                }
3212            }
3213        } else {
3214            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3215        }
3216        Ok(())
3217    }
3218
3219    /// Phase 11: LLM Enrichment.
3220    ///
3221    /// Uses an LLM provider (mock by default) to enrich vendor names with
3222    /// realistic, context-aware names. This phase is non-blocking: failures
3223    /// log a warning but do not stop the generation pipeline.
3224    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3225        if !self.config.llm.enabled {
3226            debug!("Phase 11: Skipped (LLM enrichment disabled)");
3227            return;
3228        }
3229
3230        info!("Phase 11: Starting LLM Enrichment");
3231        let start = std::time::Instant::now();
3232
3233        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3234            let provider = Arc::new(MockLlmProvider::new(self.seed));
3235            let enricher = VendorLlmEnricher::new(provider);
3236
3237            let industry = format!("{:?}", self.config.global.industry);
3238            let max_enrichments = self
3239                .config
3240                .llm
3241                .max_vendor_enrichments
3242                .min(self.master_data.vendors.len());
3243
3244            let mut enriched_count = 0usize;
3245            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3246                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3247                    Ok(name) => {
3248                        vendor.name = name;
3249                        enriched_count += 1;
3250                    }
3251                    Err(e) => {
3252                        warn!(
3253                            "LLM vendor enrichment failed for {}: {}",
3254                            vendor.vendor_id, e
3255                        );
3256                    }
3257                }
3258            }
3259
3260            enriched_count
3261        }));
3262
3263        match result {
3264            Ok(enriched_count) => {
3265                stats.llm_vendors_enriched = enriched_count;
3266                let elapsed = start.elapsed();
3267                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3268                info!(
3269                    "Phase 11 complete: {} vendors enriched in {}ms",
3270                    enriched_count, stats.llm_enrichment_ms
3271                );
3272            }
3273            Err(_) => {
3274                let elapsed = start.elapsed();
3275                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3276                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3277            }
3278        }
3279    }
3280
3281    /// Phase 12: Diffusion Enhancement.
3282    ///
3283    /// Generates a sample set using the statistical diffusion backend to
3284    /// demonstrate distribution-matching data generation. This phase is
3285    /// non-blocking: failures log a warning but do not stop the pipeline.
3286    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3287        if !self.config.diffusion.enabled {
3288            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3289            return;
3290        }
3291
3292        info!("Phase 12: Starting Diffusion Enhancement");
3293        let start = std::time::Instant::now();
3294
3295        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3296            // Target distribution: transaction amounts (log-normal-like)
3297            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
3298            let stds = vec![2000.0, 1.5, 1.0];
3299
3300            let diffusion_config = DiffusionConfig {
3301                n_steps: self.config.diffusion.n_steps,
3302                seed: self.seed,
3303                ..Default::default()
3304            };
3305
3306            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3307
3308            let n_samples = self.config.diffusion.sample_size;
3309            let n_features = 3; // amount, line_items, approval_level
3310            let samples = backend.generate(n_samples, n_features, self.seed);
3311
3312            samples.len()
3313        }));
3314
3315        match result {
3316            Ok(sample_count) => {
3317                stats.diffusion_samples_generated = sample_count;
3318                let elapsed = start.elapsed();
3319                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3320                info!(
3321                    "Phase 12 complete: {} diffusion samples generated in {}ms",
3322                    sample_count, stats.diffusion_enhancement_ms
3323                );
3324            }
3325            Err(_) => {
3326                let elapsed = start.elapsed();
3327                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
3328                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
3329            }
3330        }
3331    }
3332
3333    /// Phase 13: Causal Overlay.
3334    ///
3335    /// Builds a structural causal model from a built-in template (e.g.,
3336    /// fraud_detection) and generates causal samples. Optionally validates
3337    /// that the output respects the causal structure. This phase is
3338    /// non-blocking: failures log a warning but do not stop the pipeline.
3339    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
3340        if !self.config.causal.enabled {
3341            debug!("Phase 13: Skipped (causal generation disabled)");
3342            return;
3343        }
3344
3345        info!("Phase 13: Starting Causal Overlay");
3346        let start = std::time::Instant::now();
3347
3348        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3349            // Select template based on config
3350            let graph = match self.config.causal.template.as_str() {
3351                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
3352                _ => CausalGraph::fraud_detection_template(),
3353            };
3354
3355            let scm = StructuralCausalModel::new(graph.clone())
3356                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
3357
3358            let n_samples = self.config.causal.sample_size;
3359            let samples = scm
3360                .generate(n_samples, self.seed)
3361                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
3362
3363            // Optionally validate causal structure
3364            let validation_passed = if self.config.causal.validate {
3365                let report = CausalValidator::validate_causal_structure(&samples, &graph);
3366                if report.valid {
3367                    info!(
3368                        "Causal validation passed: all {} checks OK",
3369                        report.checks.len()
3370                    );
3371                } else {
3372                    warn!(
3373                        "Causal validation: {} violations detected: {:?}",
3374                        report.violations.len(),
3375                        report.violations
3376                    );
3377                }
3378                Some(report.valid)
3379            } else {
3380                None
3381            };
3382
3383            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
3384        }));
3385
3386        match result {
3387            Ok(Ok((sample_count, validation_passed))) => {
3388                stats.causal_samples_generated = sample_count;
3389                stats.causal_validation_passed = validation_passed;
3390                let elapsed = start.elapsed();
3391                stats.causal_generation_ms = elapsed.as_millis() as u64;
3392                info!(
3393                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
3394                    sample_count, stats.causal_generation_ms, validation_passed,
3395                );
3396            }
3397            Ok(Err(e)) => {
3398                let elapsed = start.elapsed();
3399                stats.causal_generation_ms = elapsed.as_millis() as u64;
3400                warn!("Phase 13: Causal generation failed: {}", e);
3401            }
3402            Err(_) => {
3403                let elapsed = start.elapsed();
3404                stats.causal_generation_ms = elapsed.as_millis() as u64;
3405                warn!("Phase 13: Causal generation failed (panic caught), continuing");
3406            }
3407        }
3408    }
3409
3410    /// Phase 14: Generate S2C sourcing data.
3411    fn phase_sourcing_data(
3412        &mut self,
3413        stats: &mut EnhancedGenerationStatistics,
3414    ) -> SynthResult<SourcingSnapshot> {
3415        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
3416            debug!("Phase 14: Skipped (sourcing generation disabled)");
3417            return Ok(SourcingSnapshot::default());
3418        }
3419
3420        info!("Phase 14: Generating S2C Sourcing Data");
3421        let seed = self.seed;
3422
3423        // Gather vendor data from master data
3424        let vendor_ids: Vec<String> = self
3425            .master_data
3426            .vendors
3427            .iter()
3428            .map(|v| v.vendor_id.clone())
3429            .collect();
3430        if vendor_ids.is_empty() {
3431            debug!("Phase 14: Skipped (no vendors available)");
3432            return Ok(SourcingSnapshot::default());
3433        }
3434
3435        let categories: Vec<(String, String)> = vec![
3436            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
3437            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
3438            ("CAT-IT".to_string(), "IT Equipment".to_string()),
3439            ("CAT-SVC".to_string(), "Professional Services".to_string()),
3440            ("CAT-LOG".to_string(), "Logistics".to_string()),
3441        ];
3442        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
3443            .iter()
3444            .map(|(id, name)| {
3445                (
3446                    id.clone(),
3447                    name.clone(),
3448                    rust_decimal::Decimal::from(100_000),
3449                )
3450            })
3451            .collect();
3452
3453        let company_code = self
3454            .config
3455            .companies
3456            .first()
3457            .map(|c| c.code.as_str())
3458            .unwrap_or("1000");
3459        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3460            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3461        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3462        let fiscal_year = start_date.year() as u16;
3463        let owner_ids: Vec<String> = self
3464            .master_data
3465            .employees
3466            .iter()
3467            .take(5)
3468            .map(|e| e.employee_id.clone())
3469            .collect();
3470        let owner_id = owner_ids
3471            .first()
3472            .map(std::string::String::as_str)
3473            .unwrap_or("BUYER-001");
3474
3475        // Step 1: Spend Analysis
3476        let mut spend_gen = SpendAnalysisGenerator::new(seed);
3477        let spend_analyses =
3478            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
3479
3480        // Step 2: Sourcing Projects
3481        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
3482        let sourcing_projects = if owner_ids.is_empty() {
3483            Vec::new()
3484        } else {
3485            project_gen.generate(
3486                company_code,
3487                &categories_with_spend,
3488                &owner_ids,
3489                start_date,
3490                self.config.global.period_months,
3491            )
3492        };
3493        stats.sourcing_project_count = sourcing_projects.len();
3494
3495        // Step 3: Qualifications
3496        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
3497        let mut qual_gen = QualificationGenerator::new(seed + 2);
3498        let qualifications = qual_gen.generate(
3499            company_code,
3500            &qual_vendor_ids,
3501            sourcing_projects.first().map(|p| p.project_id.as_str()),
3502            owner_id,
3503            start_date,
3504        );
3505
3506        // Step 4: RFx Events
3507        let mut rfx_gen = RfxGenerator::new(seed + 3);
3508        let rfx_events: Vec<RfxEvent> = sourcing_projects
3509            .iter()
3510            .map(|proj| {
3511                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
3512                rfx_gen.generate(
3513                    company_code,
3514                    &proj.project_id,
3515                    &proj.category_id,
3516                    &qualified_vids,
3517                    owner_id,
3518                    start_date,
3519                    50000.0,
3520                )
3521            })
3522            .collect();
3523        stats.rfx_event_count = rfx_events.len();
3524
3525        // Step 5: Bids
3526        let mut bid_gen = BidGenerator::new(seed + 4);
3527        let mut all_bids = Vec::new();
3528        for rfx in &rfx_events {
3529            let bidder_count = vendor_ids.len().clamp(2, 5);
3530            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
3531            let bids = bid_gen.generate(rfx, &responding, start_date);
3532            all_bids.extend(bids);
3533        }
3534        stats.bid_count = all_bids.len();
3535
3536        // Step 6: Bid Evaluations
3537        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
3538        let bid_evaluations: Vec<BidEvaluation> = rfx_events
3539            .iter()
3540            .map(|rfx| {
3541                let rfx_bids: Vec<SupplierBid> = all_bids
3542                    .iter()
3543                    .filter(|b| b.rfx_id == rfx.rfx_id)
3544                    .cloned()
3545                    .collect();
3546                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
3547            })
3548            .collect();
3549
3550        // Step 7: Contracts from winning bids
3551        let mut contract_gen = ContractGenerator::new(seed + 6);
3552        let contracts: Vec<ProcurementContract> = bid_evaluations
3553            .iter()
3554            .zip(rfx_events.iter())
3555            .filter_map(|(eval, rfx)| {
3556                eval.ranked_bids.first().and_then(|winner| {
3557                    all_bids
3558                        .iter()
3559                        .find(|b| b.bid_id == winner.bid_id)
3560                        .map(|winning_bid| {
3561                            contract_gen.generate_from_bid(
3562                                winning_bid,
3563                                Some(&rfx.sourcing_project_id),
3564                                &rfx.category_id,
3565                                owner_id,
3566                                start_date,
3567                            )
3568                        })
3569                })
3570            })
3571            .collect();
3572        stats.contract_count = contracts.len();
3573
3574        // Step 8: Catalog Items
3575        let mut catalog_gen = CatalogGenerator::new(seed + 7);
3576        let catalog_items = catalog_gen.generate(&contracts);
3577        stats.catalog_item_count = catalog_items.len();
3578
3579        // Step 9: Scorecards
3580        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
3581        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
3582            .iter()
3583            .fold(
3584                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
3585                |mut acc, c| {
3586                    acc.entry(c.vendor_id.clone()).or_default().push(c);
3587                    acc
3588                },
3589            )
3590            .into_iter()
3591            .collect();
3592        let scorecards = scorecard_gen.generate(
3593            company_code,
3594            &vendor_contracts,
3595            start_date,
3596            end_date,
3597            owner_id,
3598        );
3599        stats.scorecard_count = scorecards.len();
3600
3601        // Back-populate cross-references on sourcing projects (Task 35)
3602        // Link each project to its RFx events, contracts, and spend analyses
3603        let mut sourcing_projects = sourcing_projects;
3604        for project in &mut sourcing_projects {
3605            // Link RFx events generated for this project
3606            project.rfx_ids = rfx_events
3607                .iter()
3608                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
3609                .map(|rfx| rfx.rfx_id.clone())
3610                .collect();
3611
3612            // Link contract awarded from this project's RFx
3613            project.contract_id = contracts
3614                .iter()
3615                .find(|c| {
3616                    c.sourcing_project_id
3617                        .as_deref()
3618                        .is_some_and(|sp| sp == project.project_id)
3619                })
3620                .map(|c| c.contract_id.clone());
3621
3622            // Link spend analysis for matching category (use category_id as the reference)
3623            project.spend_analysis_id = spend_analyses
3624                .iter()
3625                .find(|sa| sa.category_id == project.category_id)
3626                .map(|sa| sa.category_id.clone());
3627        }
3628
3629        info!(
3630            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
3631            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
3632            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
3633        );
3634        self.check_resources_with_log("post-sourcing")?;
3635
3636        Ok(SourcingSnapshot {
3637            spend_analyses,
3638            sourcing_projects,
3639            qualifications,
3640            rfx_events,
3641            bids: all_bids,
3642            bid_evaluations,
3643            contracts,
3644            catalog_items,
3645            scorecards,
3646        })
3647    }
3648
3649    /// Build a [`GroupStructure`] from the current company configuration.
3650    ///
3651    /// The first company in the configuration is treated as the ultimate parent.
3652    /// All remaining companies become wholly-owned (100 %) subsidiaries with
3653    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
3654    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
3655        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
3656
3657        let parent_code = self
3658            .config
3659            .companies
3660            .first()
3661            .map(|c| c.code.clone())
3662            .unwrap_or_else(|| "PARENT".to_string());
3663
3664        let mut group = GroupStructure::new(parent_code);
3665
3666        for company in self.config.companies.iter().skip(1) {
3667            let sub =
3668                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
3669            group.add_subsidiary(sub);
3670        }
3671
3672        group
3673    }
3674
3675    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
3676    fn phase_intercompany(
3677        &mut self,
3678        journal_entries: &[JournalEntry],
3679        stats: &mut EnhancedGenerationStatistics,
3680    ) -> SynthResult<IntercompanySnapshot> {
3681        // Skip if intercompany is disabled in config
3682        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
3683            debug!("Phase 14b: Skipped (intercompany generation disabled)");
3684            return Ok(IntercompanySnapshot::default());
3685        }
3686
3687        // Intercompany requires at least 2 companies
3688        if self.config.companies.len() < 2 {
3689            debug!(
3690                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
3691                self.config.companies.len()
3692            );
3693            return Ok(IntercompanySnapshot::default());
3694        }
3695
3696        info!("Phase 14b: Generating Intercompany Transactions");
3697
3698        // Build the group structure early — used by ISA 600 component auditor scope
3699        // and consolidated financial statement generators downstream.
3700        let group_structure = self.build_group_structure();
3701        debug!(
3702            "Group structure built: parent={}, subsidiaries={}",
3703            group_structure.parent_entity,
3704            group_structure.subsidiaries.len()
3705        );
3706
3707        let seed = self.seed;
3708        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3709            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3710        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3711
3712        // Build ownership structure from company configs
3713        // First company is treated as the parent, remaining are subsidiaries
3714        let parent_code = self.config.companies[0].code.clone();
3715        let mut ownership_structure =
3716            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
3717
3718        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
3719            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
3720                format!("REL{:03}", i + 1),
3721                parent_code.clone(),
3722                company.code.clone(),
3723                rust_decimal::Decimal::from(100), // Default 100% ownership
3724                start_date,
3725            );
3726            ownership_structure.add_relationship(relationship);
3727        }
3728
3729        // Convert config transfer pricing method to core model enum
3730        let tp_method = match self.config.intercompany.transfer_pricing_method {
3731            datasynth_config::schema::TransferPricingMethod::CostPlus => {
3732                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
3733            }
3734            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
3735                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
3736            }
3737            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
3738                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
3739            }
3740            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
3741                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
3742            }
3743            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
3744                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
3745            }
3746        };
3747
3748        // Build IC generator config from schema config
3749        let ic_currency = self
3750            .config
3751            .companies
3752            .first()
3753            .map(|c| c.currency.clone())
3754            .unwrap_or_else(|| "USD".to_string());
3755        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
3756            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
3757            transfer_pricing_method: tp_method,
3758            markup_percent: rust_decimal::Decimal::from_f64_retain(
3759                self.config.intercompany.markup_percent,
3760            )
3761            .unwrap_or(rust_decimal::Decimal::from(5)),
3762            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
3763            default_currency: ic_currency,
3764            ..Default::default()
3765        };
3766
3767        // Create IC generator
3768        let mut ic_generator = datasynth_generators::ICGenerator::new(
3769            ic_gen_config,
3770            ownership_structure.clone(),
3771            seed + 50,
3772        );
3773
3774        // Generate IC transactions for the period
3775        // Use ~3 transactions per day as a reasonable default
3776        let transactions_per_day = 3;
3777        let matched_pairs = ic_generator.generate_transactions_for_period(
3778            start_date,
3779            end_date,
3780            transactions_per_day,
3781        );
3782
3783        // Generate journal entries from matched pairs
3784        let mut seller_entries = Vec::new();
3785        let mut buyer_entries = Vec::new();
3786        let fiscal_year = start_date.year();
3787
3788        for pair in &matched_pairs {
3789            let fiscal_period = pair.posting_date.month();
3790            let (seller_je, buyer_je) =
3791                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
3792            seller_entries.push(seller_je);
3793            buyer_entries.push(buyer_je);
3794        }
3795
3796        // Run matching engine
3797        let matching_config = datasynth_generators::ICMatchingConfig {
3798            base_currency: self
3799                .config
3800                .companies
3801                .first()
3802                .map(|c| c.currency.clone())
3803                .unwrap_or_else(|| "USD".to_string()),
3804            ..Default::default()
3805        };
3806        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
3807        matching_engine.load_matched_pairs(&matched_pairs);
3808        let matching_result = matching_engine.run_matching(end_date);
3809
3810        // Generate elimination entries if configured
3811        let mut elimination_entries = Vec::new();
3812        if self.config.intercompany.generate_eliminations {
3813            let elim_config = datasynth_generators::EliminationConfig {
3814                consolidation_entity: "GROUP".to_string(),
3815                base_currency: self
3816                    .config
3817                    .companies
3818                    .first()
3819                    .map(|c| c.currency.clone())
3820                    .unwrap_or_else(|| "USD".to_string()),
3821                ..Default::default()
3822            };
3823
3824            let mut elim_generator =
3825                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
3826
3827            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3828            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3829                matching_result
3830                    .matched_balances
3831                    .iter()
3832                    .chain(matching_result.unmatched_balances.iter())
3833                    .cloned()
3834                    .collect();
3835
3836            // Build investment and equity maps from the group structure so that the
3837            // elimination generator can produce equity-investment elimination entries
3838            // (parent's investment in subsidiary vs. subsidiary's equity capital).
3839            //
3840            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
3841            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
3842            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
3843            //
3844            // Net assets are derived from the journal entries using account-range heuristics:
3845            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
3846            // no JE data is available (IC phase runs early in the generation pipeline).
3847            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
3848                std::collections::HashMap::new();
3849            let mut equity_amounts: std::collections::HashMap<
3850                String,
3851                std::collections::HashMap<String, rust_decimal::Decimal>,
3852            > = std::collections::HashMap::new();
3853            {
3854                use rust_decimal::Decimal;
3855                let hundred = Decimal::from(100u32);
3856                let ten_pct = Decimal::new(10, 2); // 0.10
3857                let thirty_pct = Decimal::new(30, 2); // 0.30
3858                let sixty_pct = Decimal::new(60, 2); // 0.60
3859                let parent_code = &group_structure.parent_entity;
3860                for sub in &group_structure.subsidiaries {
3861                    let net_assets = {
3862                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
3863                        if na > Decimal::ZERO {
3864                            na
3865                        } else {
3866                            Decimal::from(1_000_000u64)
3867                        }
3868                    };
3869                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
3870                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
3871                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
3872
3873                    // Split subsidiary equity into conventional components:
3874                    // 10 % share capital / 30 % APIC / 60 % retained earnings
3875                    let mut eq_map = std::collections::HashMap::new();
3876                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
3877                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
3878                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
3879                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
3880                }
3881            }
3882
3883            let journal = elim_generator.generate_eliminations(
3884                &fiscal_period,
3885                end_date,
3886                &all_balances,
3887                &matched_pairs,
3888                &investment_amounts,
3889                &equity_amounts,
3890            );
3891
3892            elimination_entries = journal.entries.clone();
3893        }
3894
3895        let matched_pair_count = matched_pairs.len();
3896        let elimination_entry_count = elimination_entries.len();
3897        let match_rate = matching_result.match_rate;
3898
3899        stats.ic_matched_pair_count = matched_pair_count;
3900        stats.ic_elimination_count = elimination_entry_count;
3901        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3902
3903        info!(
3904            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3905            matched_pair_count,
3906            stats.ic_transaction_count,
3907            seller_entries.len(),
3908            buyer_entries.len(),
3909            elimination_entry_count,
3910            match_rate * 100.0
3911        );
3912        self.check_resources_with_log("post-intercompany")?;
3913
3914        // ----------------------------------------------------------------
3915        // NCI measurements: derive from group structure ownership percentages
3916        // ----------------------------------------------------------------
3917        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
3918            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
3919            use rust_decimal::Decimal;
3920
3921            let eight_pct = Decimal::new(8, 2); // 0.08
3922
3923            group_structure
3924                .subsidiaries
3925                .iter()
3926                .filter(|sub| {
3927                    sub.nci_percentage > Decimal::ZERO
3928                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
3929                })
3930                .map(|sub| {
3931                    // Compute net assets from actual journal entries for this subsidiary.
3932                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
3933                    // IC phase runs before the main JE batch has been populated).
3934                    let net_assets_from_jes =
3935                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
3936
3937                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
3938                        net_assets_from_jes.round_dp(2)
3939                    } else {
3940                        // Fallback: use a plausible base amount
3941                        Decimal::from(1_000_000u64)
3942                    };
3943
3944                    // Net income approximated as 8% of net assets
3945                    let net_income = (net_assets * eight_pct).round_dp(2);
3946
3947                    NciMeasurement::compute(
3948                        sub.entity_code.clone(),
3949                        sub.nci_percentage,
3950                        net_assets,
3951                        net_income,
3952                    )
3953                })
3954                .collect()
3955        };
3956
3957        if !nci_measurements.is_empty() {
3958            info!(
3959                "NCI measurements: {} subsidiaries with non-controlling interests",
3960                nci_measurements.len()
3961            );
3962        }
3963
3964        Ok(IntercompanySnapshot {
3965            group_structure: Some(group_structure),
3966            matched_pairs,
3967            seller_journal_entries: seller_entries,
3968            buyer_journal_entries: buyer_entries,
3969            elimination_entries,
3970            nci_measurements,
3971            matched_pair_count,
3972            elimination_entry_count,
3973            match_rate,
3974        })
3975    }
3976
3977    /// Phase 15: Generate bank reconciliations and financial statements.
3978    fn phase_financial_reporting(
3979        &mut self,
3980        document_flows: &DocumentFlowSnapshot,
3981        journal_entries: &[JournalEntry],
3982        coa: &Arc<ChartOfAccounts>,
3983        _hr: &HrSnapshot,
3984        _audit: &AuditSnapshot,
3985        stats: &mut EnhancedGenerationStatistics,
3986    ) -> SynthResult<FinancialReportingSnapshot> {
3987        let fs_enabled = self.phase_config.generate_financial_statements
3988            || self.config.financial_reporting.enabled;
3989        let br_enabled = self.phase_config.generate_bank_reconciliation;
3990
3991        if !fs_enabled && !br_enabled {
3992            debug!("Phase 15: Skipped (financial reporting disabled)");
3993            return Ok(FinancialReportingSnapshot::default());
3994        }
3995
3996        info!("Phase 15: Generating Financial Reporting Data");
3997
3998        let seed = self.seed;
3999        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4000            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4001
4002        let mut financial_statements = Vec::new();
4003        let mut bank_reconciliations = Vec::new();
4004        let mut trial_balances = Vec::new();
4005        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4006        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4007            Vec::new();
4008        // Standalone statements keyed by entity code
4009        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4010            std::collections::HashMap::new();
4011        // Consolidated statements (one per period)
4012        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4013        // Consolidation schedules (one per period)
4014        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4015
4016        // Generate financial statements from JE-derived trial balances.
4017        //
4018        // When journal entries are available, we use cumulative trial balances for
4019        // balance sheet accounts and current-period trial balances for income
4020        // statement accounts. We also track prior-period trial balances so the
4021        // generator can produce comparative amounts, and we build a proper
4022        // cash flow statement from working capital changes rather than random data.
4023        if fs_enabled {
4024            let has_journal_entries = !journal_entries.is_empty();
4025
4026            // Use FinancialStatementGenerator for balance sheet and income statement,
4027            // but build cash flow ourselves from TB data when JEs are available.
4028            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4029            // Separate generator for consolidated statements (different seed offset)
4030            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4031
4032            // Collect elimination JEs once (reused across periods)
4033            let elimination_entries: Vec<&JournalEntry> = journal_entries
4034                .iter()
4035                .filter(|je| je.header.is_elimination)
4036                .collect();
4037
4038            // Generate one set of statements per period, per entity
4039            for period in 0..self.config.global.period_months {
4040                let period_start = start_date + chrono::Months::new(period);
4041                let period_end =
4042                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4043                let fiscal_year = period_end.year() as u16;
4044                let fiscal_period = period_end.month() as u8;
4045                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4046
4047                // Build per-entity trial balances for this period (non-elimination JEs)
4048                // We accumulate them for the consolidation step.
4049                let mut entity_tb_map: std::collections::HashMap<
4050                    String,
4051                    std::collections::HashMap<String, rust_decimal::Decimal>,
4052                > = std::collections::HashMap::new();
4053
4054                // --- Standalone: one set of statements per company ---
4055                for (company_idx, company) in self.config.companies.iter().enumerate() {
4056                    let company_code = company.code.as_str();
4057                    let currency = company.currency.as_str();
4058                    // Use a unique seed offset per company to keep statements deterministic
4059                    // and distinct across companies
4060                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4061                    let mut company_fs_gen =
4062                        FinancialStatementGenerator::new(seed + company_seed_offset);
4063
4064                    if has_journal_entries {
4065                        let tb_entries = Self::build_cumulative_trial_balance(
4066                            journal_entries,
4067                            coa,
4068                            company_code,
4069                            start_date,
4070                            period_end,
4071                            fiscal_year,
4072                            fiscal_period,
4073                        );
4074
4075                        // Accumulate per-entity category balances for consolidation
4076                        let entity_cat_map =
4077                            entity_tb_map.entry(company_code.to_string()).or_default();
4078                        for tb_entry in &tb_entries {
4079                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4080                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4081                        }
4082
4083                        let stmts = company_fs_gen.generate(
4084                            company_code,
4085                            currency,
4086                            &tb_entries,
4087                            period_start,
4088                            period_end,
4089                            fiscal_year,
4090                            fiscal_period,
4091                            None,
4092                            "SYS-AUTOCLOSE",
4093                        );
4094
4095                        let mut entity_stmts = Vec::new();
4096                        for stmt in stmts {
4097                            if stmt.statement_type == StatementType::CashFlowStatement {
4098                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4099                                let cf_items = Self::build_cash_flow_from_trial_balances(
4100                                    &tb_entries,
4101                                    None,
4102                                    net_income,
4103                                );
4104                                entity_stmts.push(FinancialStatement {
4105                                    cash_flow_items: cf_items,
4106                                    ..stmt
4107                                });
4108                            } else {
4109                                entity_stmts.push(stmt);
4110                            }
4111                        }
4112
4113                        // Add to the flat financial_statements list (used by KPI/budget)
4114                        financial_statements.extend(entity_stmts.clone());
4115
4116                        // Store standalone per-entity
4117                        standalone_statements
4118                            .entry(company_code.to_string())
4119                            .or_default()
4120                            .extend(entity_stmts);
4121
4122                        // Only store trial balance for the first company in the period
4123                        // to avoid duplicates in the trial_balances list
4124                        if company_idx == 0 {
4125                            trial_balances.push(PeriodTrialBalance {
4126                                fiscal_year,
4127                                fiscal_period,
4128                                period_start,
4129                                period_end,
4130                                entries: tb_entries,
4131                            });
4132                        }
4133                    } else {
4134                        // Fallback: no JEs available
4135                        let tb_entries = Self::build_trial_balance_from_entries(
4136                            journal_entries,
4137                            coa,
4138                            company_code,
4139                            fiscal_year,
4140                            fiscal_period,
4141                        );
4142
4143                        let stmts = company_fs_gen.generate(
4144                            company_code,
4145                            currency,
4146                            &tb_entries,
4147                            period_start,
4148                            period_end,
4149                            fiscal_year,
4150                            fiscal_period,
4151                            None,
4152                            "SYS-AUTOCLOSE",
4153                        );
4154                        financial_statements.extend(stmts.clone());
4155                        standalone_statements
4156                            .entry(company_code.to_string())
4157                            .or_default()
4158                            .extend(stmts);
4159
4160                        if company_idx == 0 && !tb_entries.is_empty() {
4161                            trial_balances.push(PeriodTrialBalance {
4162                                fiscal_year,
4163                                fiscal_period,
4164                                period_start,
4165                                period_end,
4166                                entries: tb_entries,
4167                            });
4168                        }
4169                    }
4170                }
4171
4172                // --- Consolidated: aggregate all entities + apply eliminations ---
4173                // Use the primary (first) company's currency for the consolidated statement
4174                let group_currency = self
4175                    .config
4176                    .companies
4177                    .first()
4178                    .map(|c| c.currency.as_str())
4179                    .unwrap_or("USD");
4180
4181                // Build owned elimination entries for this period
4182                let period_eliminations: Vec<JournalEntry> = elimination_entries
4183                    .iter()
4184                    .filter(|je| {
4185                        je.header.fiscal_year == fiscal_year
4186                            && je.header.fiscal_period == fiscal_period
4187                    })
4188                    .map(|je| (*je).clone())
4189                    .collect();
4190
4191                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4192                    &entity_tb_map,
4193                    &period_eliminations,
4194                    &period_label,
4195                );
4196
4197                // Build a pseudo trial balance from consolidated line items for the
4198                // FinancialStatementGenerator to use (only for cash flow direction).
4199                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4200                    .line_items
4201                    .iter()
4202                    .map(|li| {
4203                        let net = li.post_elimination_total;
4204                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4205                            (net, rust_decimal::Decimal::ZERO)
4206                        } else {
4207                            (rust_decimal::Decimal::ZERO, -net)
4208                        };
4209                        datasynth_generators::TrialBalanceEntry {
4210                            account_code: li.account_category.clone(),
4211                            account_name: li.account_category.clone(),
4212                            category: li.account_category.clone(),
4213                            debit_balance: debit,
4214                            credit_balance: credit,
4215                        }
4216                    })
4217                    .collect();
4218
4219                let mut cons_stmts = cons_gen.generate(
4220                    "GROUP",
4221                    group_currency,
4222                    &cons_tb,
4223                    period_start,
4224                    period_end,
4225                    fiscal_year,
4226                    fiscal_period,
4227                    None,
4228                    "SYS-AUTOCLOSE",
4229                );
4230
4231                // Split consolidated line items by statement type.
4232                // The consolidation generator returns BS items first, then IS items,
4233                // identified by their CONS- prefix and category.
4234                let bs_categories: &[&str] = &[
4235                    "CASH",
4236                    "RECEIVABLES",
4237                    "INVENTORY",
4238                    "FIXEDASSETS",
4239                    "PAYABLES",
4240                    "ACCRUEDLIABILITIES",
4241                    "LONGTERMDEBT",
4242                    "EQUITY",
4243                ];
4244                let (bs_items, is_items): (Vec<_>, Vec<_>) =
4245                    cons_line_items.into_iter().partition(|li| {
4246                        let upper = li.label.to_uppercase();
4247                        bs_categories.iter().any(|c| upper == *c)
4248                    });
4249
4250                for stmt in &mut cons_stmts {
4251                    stmt.is_consolidated = true;
4252                    match stmt.statement_type {
4253                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4254                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4255                        _ => {} // CF and equity change statements keep generator output
4256                    }
4257                }
4258
4259                consolidated_statements.extend(cons_stmts);
4260                consolidation_schedules.push(schedule);
4261            }
4262
4263            // Backward compat: if only 1 company, use existing code path logic
4264            // (prior_cumulative_tb for comparative amounts). Already handled above;
4265            // the prior_ref is omitted to keep this change minimal.
4266            let _ = &mut fs_gen; // suppress unused warning
4267
4268            stats.financial_statement_count = financial_statements.len();
4269            info!(
4270                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4271                stats.financial_statement_count,
4272                consolidated_statements.len(),
4273                has_journal_entries
4274            );
4275
4276            // ----------------------------------------------------------------
4277            // IFRS 8 / ASC 280: Operating Segment Reporting
4278            // ----------------------------------------------------------------
4279            // Build entity seeds from the company configuration.
4280            let entity_seeds: Vec<SegmentSeed> = self
4281                .config
4282                .companies
4283                .iter()
4284                .map(|c| SegmentSeed {
4285                    code: c.code.clone(),
4286                    name: c.name.clone(),
4287                    currency: c.currency.clone(),
4288                })
4289                .collect();
4290
4291            let mut seg_gen = SegmentGenerator::new(seed + 30);
4292
4293            // Generate one set of segment reports per period.
4294            // We extract consolidated revenue / profit / assets from the consolidated
4295            // financial statements produced above, falling back to simple sums when
4296            // no consolidated statements were generated (single-entity path).
4297            for period in 0..self.config.global.period_months {
4298                let period_end =
4299                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4300                let fiscal_year = period_end.year() as u16;
4301                let fiscal_period = period_end.month() as u8;
4302                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4303
4304                use datasynth_core::models::StatementType;
4305
4306                // Try to find consolidated income statement for this period
4307                let cons_is = consolidated_statements.iter().find(|s| {
4308                    s.fiscal_year == fiscal_year
4309                        && s.fiscal_period == fiscal_period
4310                        && s.statement_type == StatementType::IncomeStatement
4311                });
4312                let cons_bs = consolidated_statements.iter().find(|s| {
4313                    s.fiscal_year == fiscal_year
4314                        && s.fiscal_period == fiscal_period
4315                        && s.statement_type == StatementType::BalanceSheet
4316                });
4317
4318                // If consolidated statements not available fall back to the flat list
4319                let is_stmt = cons_is.or_else(|| {
4320                    financial_statements.iter().find(|s| {
4321                        s.fiscal_year == fiscal_year
4322                            && s.fiscal_period == fiscal_period
4323                            && s.statement_type == StatementType::IncomeStatement
4324                    })
4325                });
4326                let bs_stmt = cons_bs.or_else(|| {
4327                    financial_statements.iter().find(|s| {
4328                        s.fiscal_year == fiscal_year
4329                            && s.fiscal_period == fiscal_period
4330                            && s.statement_type == StatementType::BalanceSheet
4331                    })
4332                });
4333
4334                let consolidated_revenue = is_stmt
4335                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4336                    .map(|li| -li.amount) // revenue is stored as negative in IS
4337                    .unwrap_or(rust_decimal::Decimal::ZERO);
4338
4339                let consolidated_profit = is_stmt
4340                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
4341                    .map(|li| li.amount)
4342                    .unwrap_or(rust_decimal::Decimal::ZERO);
4343
4344                let consolidated_assets = bs_stmt
4345                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
4346                    .map(|li| li.amount)
4347                    .unwrap_or(rust_decimal::Decimal::ZERO);
4348
4349                // Skip periods where we have no financial data
4350                if consolidated_revenue == rust_decimal::Decimal::ZERO
4351                    && consolidated_assets == rust_decimal::Decimal::ZERO
4352                {
4353                    continue;
4354                }
4355
4356                let group_code = self
4357                    .config
4358                    .companies
4359                    .first()
4360                    .map(|c| c.code.as_str())
4361                    .unwrap_or("GROUP");
4362
4363                let (segs, recon) = seg_gen.generate(
4364                    group_code,
4365                    &period_label,
4366                    consolidated_revenue,
4367                    consolidated_profit,
4368                    consolidated_assets,
4369                    &entity_seeds,
4370                );
4371                segment_reports.extend(segs);
4372                segment_reconciliations.push(recon);
4373            }
4374
4375            info!(
4376                "Segment reports generated: {} segments, {} reconciliations",
4377                segment_reports.len(),
4378                segment_reconciliations.len()
4379            );
4380        }
4381
4382        // Generate bank reconciliations from payment data
4383        if br_enabled && !document_flows.payments.is_empty() {
4384            let employee_ids: Vec<String> = self
4385                .master_data
4386                .employees
4387                .iter()
4388                .map(|e| e.employee_id.clone())
4389                .collect();
4390            let mut br_gen =
4391                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
4392
4393            // Group payments by company code and period
4394            for company in &self.config.companies {
4395                let company_payments: Vec<PaymentReference> = document_flows
4396                    .payments
4397                    .iter()
4398                    .filter(|p| p.header.company_code == company.code)
4399                    .map(|p| PaymentReference {
4400                        id: p.header.document_id.clone(),
4401                        amount: if p.is_vendor { p.amount } else { -p.amount },
4402                        date: p.header.document_date,
4403                        reference: p
4404                            .check_number
4405                            .clone()
4406                            .or_else(|| p.wire_reference.clone())
4407                            .unwrap_or_else(|| p.header.document_id.clone()),
4408                    })
4409                    .collect();
4410
4411                if company_payments.is_empty() {
4412                    continue;
4413                }
4414
4415                let bank_account_id = format!("{}-MAIN", company.code);
4416
4417                // Generate one reconciliation per period
4418                for period in 0..self.config.global.period_months {
4419                    let period_start = start_date + chrono::Months::new(period);
4420                    let period_end =
4421                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4422
4423                    let period_payments: Vec<PaymentReference> = company_payments
4424                        .iter()
4425                        .filter(|p| p.date >= period_start && p.date <= period_end)
4426                        .cloned()
4427                        .collect();
4428
4429                    let recon = br_gen.generate(
4430                        &company.code,
4431                        &bank_account_id,
4432                        period_start,
4433                        period_end,
4434                        &company.currency,
4435                        &period_payments,
4436                    );
4437                    bank_reconciliations.push(recon);
4438                }
4439            }
4440            info!(
4441                "Bank reconciliations generated: {} reconciliations",
4442                bank_reconciliations.len()
4443            );
4444        }
4445
4446        stats.bank_reconciliation_count = bank_reconciliations.len();
4447        self.check_resources_with_log("post-financial-reporting")?;
4448
4449        if !trial_balances.is_empty() {
4450            info!(
4451                "Period-close trial balances captured: {} periods",
4452                trial_balances.len()
4453            );
4454        }
4455
4456        // Notes to financial statements are generated in a separate post-processing step
4457        // (generate_notes_to_financial_statements) called after accounting_standards and tax
4458        // phases have completed, so that deferred tax and provision data can be wired in.
4459        let notes_to_financial_statements = Vec::new();
4460
4461        Ok(FinancialReportingSnapshot {
4462            financial_statements,
4463            standalone_statements,
4464            consolidated_statements,
4465            consolidation_schedules,
4466            bank_reconciliations,
4467            trial_balances,
4468            segment_reports,
4469            segment_reconciliations,
4470            notes_to_financial_statements,
4471        })
4472    }
4473
4474    /// Populate notes to financial statements using fully-resolved snapshots.
4475    ///
4476    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
4477    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
4478    /// can be wired into the notes context.  The method mutates
4479    /// `financial_reporting.notes_to_financial_statements` in-place.
4480    fn generate_notes_to_financial_statements(
4481        &self,
4482        financial_reporting: &mut FinancialReportingSnapshot,
4483        accounting_standards: &AccountingStandardsSnapshot,
4484        tax: &TaxSnapshot,
4485        hr: &HrSnapshot,
4486        audit: &AuditSnapshot,
4487    ) {
4488        use datasynth_config::schema::AccountingFrameworkConfig;
4489        use datasynth_core::models::StatementType;
4490        use datasynth_generators::period_close::notes_generator::{
4491            NotesGenerator, NotesGeneratorContext,
4492        };
4493
4494        let seed = self.seed;
4495        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4496        {
4497            Ok(d) => d,
4498            Err(_) => return,
4499        };
4500
4501        let mut notes_gen = NotesGenerator::new(seed + 4235);
4502
4503        for company in &self.config.companies {
4504            let last_period_end = start_date
4505                + chrono::Months::new(self.config.global.period_months)
4506                - chrono::Days::new(1);
4507            let fiscal_year = last_period_end.year() as u16;
4508
4509            // Extract relevant amounts from the already-generated financial statements
4510            let entity_is = financial_reporting
4511                .standalone_statements
4512                .get(&company.code)
4513                .and_then(|stmts| {
4514                    stmts.iter().find(|s| {
4515                        s.fiscal_year == fiscal_year
4516                            && s.statement_type == StatementType::IncomeStatement
4517                    })
4518                });
4519            let entity_bs = financial_reporting
4520                .standalone_statements
4521                .get(&company.code)
4522                .and_then(|stmts| {
4523                    stmts.iter().find(|s| {
4524                        s.fiscal_year == fiscal_year
4525                            && s.statement_type == StatementType::BalanceSheet
4526                    })
4527                });
4528
4529            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
4530            let revenue_amount = entity_is
4531                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
4532                .map(|li| li.amount);
4533            let ppe_gross = entity_bs
4534                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
4535                .map(|li| li.amount);
4536
4537            let framework = match self
4538                .config
4539                .accounting_standards
4540                .framework
4541                .unwrap_or_default()
4542            {
4543                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
4544                    "IFRS".to_string()
4545                }
4546                _ => "US GAAP".to_string(),
4547            };
4548
4549            // ---- Deferred tax (IAS 12 / ASC 740) ----
4550            // Sum closing DTA and DTL from rollforward entries for this entity.
4551            let (entity_dta, entity_dtl) = {
4552                let mut dta = rust_decimal::Decimal::ZERO;
4553                let mut dtl = rust_decimal::Decimal::ZERO;
4554                for rf in &tax.deferred_tax.rollforwards {
4555                    if rf.entity_code == company.code {
4556                        dta += rf.closing_dta;
4557                        dtl += rf.closing_dtl;
4558                    }
4559                }
4560                (
4561                    if dta > rust_decimal::Decimal::ZERO {
4562                        Some(dta)
4563                    } else {
4564                        None
4565                    },
4566                    if dtl > rust_decimal::Decimal::ZERO {
4567                        Some(dtl)
4568                    } else {
4569                        None
4570                    },
4571                )
4572            };
4573
4574            // ---- Provisions (IAS 37 / ASC 450) ----
4575            // Filter provisions to this entity; sum best_estimate amounts.
4576            let entity_provisions: Vec<_> = accounting_standards
4577                .provisions
4578                .iter()
4579                .filter(|p| p.entity_code == company.code)
4580                .collect();
4581            let provision_count = entity_provisions.len();
4582            let total_provisions = if provision_count > 0 {
4583                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
4584            } else {
4585                None
4586            };
4587
4588            // ---- Pension data from HR snapshot ----
4589            let entity_pension_plan_count = hr
4590                .pension_plans
4591                .iter()
4592                .filter(|p| p.entity_code == company.code)
4593                .count();
4594            let entity_total_dbo: Option<rust_decimal::Decimal> = {
4595                let sum: rust_decimal::Decimal = hr
4596                    .pension_disclosures
4597                    .iter()
4598                    .filter(|d| {
4599                        hr.pension_plans
4600                            .iter()
4601                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
4602                    })
4603                    .map(|d| d.net_pension_liability)
4604                    .sum();
4605                let plan_assets_sum: rust_decimal::Decimal = hr
4606                    .pension_plan_assets
4607                    .iter()
4608                    .filter(|a| {
4609                        hr.pension_plans
4610                            .iter()
4611                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
4612                    })
4613                    .map(|a| a.fair_value_closing)
4614                    .sum();
4615                if entity_pension_plan_count > 0 {
4616                    Some(sum + plan_assets_sum)
4617                } else {
4618                    None
4619                }
4620            };
4621            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
4622                let sum: rust_decimal::Decimal = hr
4623                    .pension_plan_assets
4624                    .iter()
4625                    .filter(|a| {
4626                        hr.pension_plans
4627                            .iter()
4628                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
4629                    })
4630                    .map(|a| a.fair_value_closing)
4631                    .sum();
4632                if entity_pension_plan_count > 0 {
4633                    Some(sum)
4634                } else {
4635                    None
4636                }
4637            };
4638
4639            // ---- Audit data: related parties + subsequent events ----
4640            // Audit snapshot covers all entities; use total counts (common case = single entity).
4641            let rp_count = audit.related_party_transactions.len();
4642            let se_count = audit.subsequent_events.len();
4643            let adjusting_count = audit
4644                .subsequent_events
4645                .iter()
4646                .filter(|e| {
4647                    matches!(
4648                        e.classification,
4649                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
4650                    )
4651                })
4652                .count();
4653
4654            let ctx = NotesGeneratorContext {
4655                entity_code: company.code.clone(),
4656                framework,
4657                period: format!("FY{}", fiscal_year),
4658                period_end: last_period_end,
4659                currency: company.currency.clone(),
4660                revenue_amount,
4661                total_ppe_gross: ppe_gross,
4662                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
4663                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
4664                deferred_tax_asset: entity_dta,
4665                deferred_tax_liability: entity_dtl,
4666                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
4667                provision_count,
4668                total_provisions,
4669                // Pension data from HR snapshot
4670                pension_plan_count: entity_pension_plan_count,
4671                total_dbo: entity_total_dbo,
4672                total_plan_assets: entity_total_plan_assets,
4673                // Audit data
4674                related_party_transaction_count: rp_count,
4675                subsequent_event_count: se_count,
4676                adjusting_event_count: adjusting_count,
4677                ..NotesGeneratorContext::default()
4678            };
4679
4680            let entity_notes = notes_gen.generate(&ctx);
4681            info!(
4682                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
4683                company.code,
4684                entity_notes.len(),
4685                entity_dta,
4686                entity_dtl,
4687                provision_count,
4688            );
4689            financial_reporting
4690                .notes_to_financial_statements
4691                .extend(entity_notes);
4692        }
4693    }
4694
4695    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
4696    ///
4697    /// This ensures the trial balance is coherent with the JEs: every debit and credit
4698    /// posted in the journal entries flows through to the trial balance, using the real
4699    /// GL account numbers from the CoA.
4700    fn build_trial_balance_from_entries(
4701        journal_entries: &[JournalEntry],
4702        coa: &ChartOfAccounts,
4703        company_code: &str,
4704        fiscal_year: u16,
4705        fiscal_period: u8,
4706    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
4707        use rust_decimal::Decimal;
4708
4709        // Accumulate total debits and credits per GL account
4710        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
4711        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
4712
4713        for je in journal_entries {
4714            // Filter to matching company, fiscal year, and period
4715            if je.header.company_code != company_code
4716                || je.header.fiscal_year != fiscal_year
4717                || je.header.fiscal_period != fiscal_period
4718            {
4719                continue;
4720            }
4721
4722            for line in &je.lines {
4723                let acct = &line.gl_account;
4724                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
4725                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
4726            }
4727        }
4728
4729        // Build a TrialBalanceEntry for each account that had activity
4730        let mut all_accounts: Vec<&String> = account_debits
4731            .keys()
4732            .chain(account_credits.keys())
4733            .collect::<std::collections::HashSet<_>>()
4734            .into_iter()
4735            .collect();
4736        all_accounts.sort();
4737
4738        let mut entries = Vec::new();
4739
4740        for acct_number in all_accounts {
4741            let debit = account_debits
4742                .get(acct_number)
4743                .copied()
4744                .unwrap_or(Decimal::ZERO);
4745            let credit = account_credits
4746                .get(acct_number)
4747                .copied()
4748                .unwrap_or(Decimal::ZERO);
4749
4750            if debit.is_zero() && credit.is_zero() {
4751                continue;
4752            }
4753
4754            // Look up account name from CoA, fall back to "Account {code}"
4755            let account_name = coa
4756                .get_account(acct_number)
4757                .map(|gl| gl.short_description.clone())
4758                .unwrap_or_else(|| format!("Account {acct_number}"));
4759
4760            // Map account code prefix to the category strings expected by
4761            // FinancialStatementGenerator (Cash, Receivables, Inventory,
4762            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
4763            // OperatingExpenses).
4764            let category = Self::category_from_account_code(acct_number);
4765
4766            entries.push(datasynth_generators::TrialBalanceEntry {
4767                account_code: acct_number.clone(),
4768                account_name,
4769                category,
4770                debit_balance: debit,
4771                credit_balance: credit,
4772            });
4773        }
4774
4775        entries
4776    }
4777
4778    /// Build a cumulative trial balance by aggregating all JEs from the start up to
4779    /// (and including) the given period end date.
4780    ///
4781    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
4782    /// while income statement accounts (revenue, expenses) show only the current period.
4783    /// The two are merged into a single Vec for the FinancialStatementGenerator.
4784    fn build_cumulative_trial_balance(
4785        journal_entries: &[JournalEntry],
4786        coa: &ChartOfAccounts,
4787        company_code: &str,
4788        start_date: NaiveDate,
4789        period_end: NaiveDate,
4790        fiscal_year: u16,
4791        fiscal_period: u8,
4792    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
4793        use rust_decimal::Decimal;
4794
4795        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
4796        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
4797        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
4798
4799        // Accumulate debits/credits for income statement accounts (current period only)
4800        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
4801        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
4802
4803        for je in journal_entries {
4804            if je.header.company_code != company_code {
4805                continue;
4806            }
4807
4808            for line in &je.lines {
4809                let acct = &line.gl_account;
4810                let category = Self::category_from_account_code(acct);
4811                let is_bs_account = matches!(
4812                    category.as_str(),
4813                    "Cash"
4814                        | "Receivables"
4815                        | "Inventory"
4816                        | "FixedAssets"
4817                        | "Payables"
4818                        | "AccruedLiabilities"
4819                        | "LongTermDebt"
4820                        | "Equity"
4821                );
4822
4823                if is_bs_account {
4824                    // Balance sheet: accumulate from start through period_end
4825                    if je.header.document_date <= period_end
4826                        && je.header.document_date >= start_date
4827                    {
4828                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4829                            line.debit_amount;
4830                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4831                            line.credit_amount;
4832                    }
4833                } else {
4834                    // Income statement: current period only
4835                    if je.header.fiscal_year == fiscal_year
4836                        && je.header.fiscal_period == fiscal_period
4837                    {
4838                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4839                            line.debit_amount;
4840                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
4841                            line.credit_amount;
4842                    }
4843                }
4844            }
4845        }
4846
4847        // Merge all accounts
4848        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
4849        all_accounts.extend(bs_debits.keys().cloned());
4850        all_accounts.extend(bs_credits.keys().cloned());
4851        all_accounts.extend(is_debits.keys().cloned());
4852        all_accounts.extend(is_credits.keys().cloned());
4853
4854        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
4855        sorted_accounts.sort();
4856
4857        let mut entries = Vec::new();
4858
4859        for acct_number in &sorted_accounts {
4860            let category = Self::category_from_account_code(acct_number);
4861            let is_bs_account = matches!(
4862                category.as_str(),
4863                "Cash"
4864                    | "Receivables"
4865                    | "Inventory"
4866                    | "FixedAssets"
4867                    | "Payables"
4868                    | "AccruedLiabilities"
4869                    | "LongTermDebt"
4870                    | "Equity"
4871            );
4872
4873            let (debit, credit) = if is_bs_account {
4874                (
4875                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
4876                    bs_credits
4877                        .get(acct_number)
4878                        .copied()
4879                        .unwrap_or(Decimal::ZERO),
4880                )
4881            } else {
4882                (
4883                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
4884                    is_credits
4885                        .get(acct_number)
4886                        .copied()
4887                        .unwrap_or(Decimal::ZERO),
4888                )
4889            };
4890
4891            if debit.is_zero() && credit.is_zero() {
4892                continue;
4893            }
4894
4895            let account_name = coa
4896                .get_account(acct_number)
4897                .map(|gl| gl.short_description.clone())
4898                .unwrap_or_else(|| format!("Account {acct_number}"));
4899
4900            entries.push(datasynth_generators::TrialBalanceEntry {
4901                account_code: acct_number.clone(),
4902                account_name,
4903                category,
4904                debit_balance: debit,
4905                credit_balance: credit,
4906            });
4907        }
4908
4909        entries
4910    }
4911
4912    /// Build a JE-derived cash flow statement using the indirect method.
4913    ///
4914    /// Compares current and prior cumulative trial balances to derive working capital
4915    /// changes, producing a coherent cash flow statement tied to actual journal entries.
4916    fn build_cash_flow_from_trial_balances(
4917        current_tb: &[datasynth_generators::TrialBalanceEntry],
4918        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
4919        net_income: rust_decimal::Decimal,
4920    ) -> Vec<CashFlowItem> {
4921        use rust_decimal::Decimal;
4922
4923        // Helper: aggregate a TB by category and return net (debit - credit)
4924        let aggregate =
4925            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
4926                let mut map: HashMap<String, Decimal> = HashMap::new();
4927                for entry in tb {
4928                    let net = entry.debit_balance - entry.credit_balance;
4929                    *map.entry(entry.category.clone()).or_default() += net;
4930                }
4931                map
4932            };
4933
4934        let current = aggregate(current_tb);
4935        let prior = prior_tb.map(aggregate);
4936
4937        // Get balance for a category, defaulting to zero
4938        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
4939            *map.get(key).unwrap_or(&Decimal::ZERO)
4940        };
4941
4942        // Compute change: current - prior (or current if no prior)
4943        let change = |key: &str| -> Decimal {
4944            let curr = get(&current, key);
4945            match &prior {
4946                Some(p) => curr - get(p, key),
4947                None => curr,
4948            }
4949        };
4950
4951        // Operating activities (indirect method)
4952        // Depreciation add-back: approximate from FixedAssets decrease
4953        let fixed_asset_change = change("FixedAssets");
4954        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
4955            -fixed_asset_change
4956        } else {
4957            Decimal::ZERO
4958        };
4959
4960        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
4961        let ar_change = change("Receivables");
4962        let inventory_change = change("Inventory");
4963        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
4964        let ap_change = change("Payables");
4965        let accrued_change = change("AccruedLiabilities");
4966
4967        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
4968            + (-ap_change)
4969            + (-accrued_change);
4970
4971        // Investing activities
4972        let capex = if fixed_asset_change > Decimal::ZERO {
4973            -fixed_asset_change
4974        } else {
4975            Decimal::ZERO
4976        };
4977        let investing_cf = capex;
4978
4979        // Financing activities
4980        let debt_change = -change("LongTermDebt");
4981        let equity_change = -change("Equity");
4982        let financing_cf = debt_change + equity_change;
4983
4984        let net_change = operating_cf + investing_cf + financing_cf;
4985
4986        vec![
4987            CashFlowItem {
4988                item_code: "CF-NI".to_string(),
4989                label: "Net Income".to_string(),
4990                category: CashFlowCategory::Operating,
4991                amount: net_income,
4992                amount_prior: None,
4993                sort_order: 1,
4994                is_total: false,
4995            },
4996            CashFlowItem {
4997                item_code: "CF-DEP".to_string(),
4998                label: "Depreciation & Amortization".to_string(),
4999                category: CashFlowCategory::Operating,
5000                amount: depreciation_addback,
5001                amount_prior: None,
5002                sort_order: 2,
5003                is_total: false,
5004            },
5005            CashFlowItem {
5006                item_code: "CF-AR".to_string(),
5007                label: "Change in Accounts Receivable".to_string(),
5008                category: CashFlowCategory::Operating,
5009                amount: -ar_change,
5010                amount_prior: None,
5011                sort_order: 3,
5012                is_total: false,
5013            },
5014            CashFlowItem {
5015                item_code: "CF-AP".to_string(),
5016                label: "Change in Accounts Payable".to_string(),
5017                category: CashFlowCategory::Operating,
5018                amount: -ap_change,
5019                amount_prior: None,
5020                sort_order: 4,
5021                is_total: false,
5022            },
5023            CashFlowItem {
5024                item_code: "CF-INV".to_string(),
5025                label: "Change in Inventory".to_string(),
5026                category: CashFlowCategory::Operating,
5027                amount: -inventory_change,
5028                amount_prior: None,
5029                sort_order: 5,
5030                is_total: false,
5031            },
5032            CashFlowItem {
5033                item_code: "CF-OP".to_string(),
5034                label: "Net Cash from Operating Activities".to_string(),
5035                category: CashFlowCategory::Operating,
5036                amount: operating_cf,
5037                amount_prior: None,
5038                sort_order: 6,
5039                is_total: true,
5040            },
5041            CashFlowItem {
5042                item_code: "CF-CAPEX".to_string(),
5043                label: "Capital Expenditures".to_string(),
5044                category: CashFlowCategory::Investing,
5045                amount: capex,
5046                amount_prior: None,
5047                sort_order: 7,
5048                is_total: false,
5049            },
5050            CashFlowItem {
5051                item_code: "CF-INV-T".to_string(),
5052                label: "Net Cash from Investing Activities".to_string(),
5053                category: CashFlowCategory::Investing,
5054                amount: investing_cf,
5055                amount_prior: None,
5056                sort_order: 8,
5057                is_total: true,
5058            },
5059            CashFlowItem {
5060                item_code: "CF-DEBT".to_string(),
5061                label: "Net Borrowings / (Repayments)".to_string(),
5062                category: CashFlowCategory::Financing,
5063                amount: debt_change,
5064                amount_prior: None,
5065                sort_order: 9,
5066                is_total: false,
5067            },
5068            CashFlowItem {
5069                item_code: "CF-EQ".to_string(),
5070                label: "Equity Changes".to_string(),
5071                category: CashFlowCategory::Financing,
5072                amount: equity_change,
5073                amount_prior: None,
5074                sort_order: 10,
5075                is_total: false,
5076            },
5077            CashFlowItem {
5078                item_code: "CF-FIN-T".to_string(),
5079                label: "Net Cash from Financing Activities".to_string(),
5080                category: CashFlowCategory::Financing,
5081                amount: financing_cf,
5082                amount_prior: None,
5083                sort_order: 11,
5084                is_total: true,
5085            },
5086            CashFlowItem {
5087                item_code: "CF-NET".to_string(),
5088                label: "Net Change in Cash".to_string(),
5089                category: CashFlowCategory::Operating,
5090                amount: net_change,
5091                amount_prior: None,
5092                sort_order: 12,
5093                is_total: true,
5094            },
5095        ]
5096    }
5097
5098    /// Calculate net income from a set of trial balance entries.
5099    ///
5100    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
5101    fn calculate_net_income_from_tb(
5102        tb: &[datasynth_generators::TrialBalanceEntry],
5103    ) -> rust_decimal::Decimal {
5104        use rust_decimal::Decimal;
5105
5106        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5107        for entry in tb {
5108            let net = entry.debit_balance - entry.credit_balance;
5109            *aggregated.entry(entry.category.clone()).or_default() += net;
5110        }
5111
5112        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5113        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5114        let opex = *aggregated
5115            .get("OperatingExpenses")
5116            .unwrap_or(&Decimal::ZERO);
5117        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5118        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5119
5120        // revenue is negative (credit-normal), expenses are positive (debit-normal)
5121        // other_income is typically negative (credit), other_expenses is typically positive
5122        let operating_income = revenue - cogs - opex - other_expenses - other_income;
5123        let tax_rate = Decimal::new(25, 2); // 0.25
5124        let tax = operating_income * tax_rate;
5125        operating_income - tax
5126    }
5127
5128    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
5129    ///
5130    /// Uses the first two digits of the account code to classify into the categories
5131    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
5132    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
5133    /// OperatingExpenses, OtherIncome, OtherExpenses.
5134    fn category_from_account_code(code: &str) -> String {
5135        let prefix: String = code.chars().take(2).collect();
5136        match prefix.as_str() {
5137            "10" => "Cash",
5138            "11" => "Receivables",
5139            "12" | "13" | "14" => "Inventory",
5140            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5141            "20" => "Payables",
5142            "21" | "22" | "23" | "24" => "AccruedLiabilities",
5143            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5144            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5145            "40" | "41" | "42" | "43" | "44" => "Revenue",
5146            "50" | "51" | "52" => "CostOfSales",
5147            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5148                "OperatingExpenses"
5149            }
5150            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5151            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5152            _ => "OperatingExpenses",
5153        }
5154        .to_string()
5155    }
5156
5157    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
5158    fn phase_hr_data(
5159        &mut self,
5160        stats: &mut EnhancedGenerationStatistics,
5161    ) -> SynthResult<HrSnapshot> {
5162        if !self.phase_config.generate_hr {
5163            debug!("Phase 16: Skipped (HR generation disabled)");
5164            return Ok(HrSnapshot::default());
5165        }
5166
5167        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5168
5169        let seed = self.seed;
5170        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5171            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5172        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5173        let company_code = self
5174            .config
5175            .companies
5176            .first()
5177            .map(|c| c.code.as_str())
5178            .unwrap_or("1000");
5179        let currency = self
5180            .config
5181            .companies
5182            .first()
5183            .map(|c| c.currency.as_str())
5184            .unwrap_or("USD");
5185
5186        let employee_ids: Vec<String> = self
5187            .master_data
5188            .employees
5189            .iter()
5190            .map(|e| e.employee_id.clone())
5191            .collect();
5192
5193        if employee_ids.is_empty() {
5194            debug!("Phase 16: Skipped (no employees available)");
5195            return Ok(HrSnapshot::default());
5196        }
5197
5198        // Extract cost-center pool from master data employees for cross-reference
5199        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
5200        let cost_center_ids: Vec<String> = self
5201            .master_data
5202            .employees
5203            .iter()
5204            .filter_map(|e| e.cost_center.clone())
5205            .collect::<std::collections::HashSet<_>>()
5206            .into_iter()
5207            .collect();
5208
5209        let mut snapshot = HrSnapshot::default();
5210
5211        // Generate payroll runs (one per month)
5212        if self.config.hr.payroll.enabled {
5213            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
5214                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5215
5216            // Look up country pack for payroll deductions and labels
5217            let payroll_pack = self.primary_pack();
5218
5219            // Store the pack on the generator so generate() resolves
5220            // localized deduction rates and labels from it.
5221            payroll_gen.set_country_pack(payroll_pack.clone());
5222
5223            let employees_with_salary: Vec<(
5224                String,
5225                rust_decimal::Decimal,
5226                Option<String>,
5227                Option<String>,
5228            )> = self
5229                .master_data
5230                .employees
5231                .iter()
5232                .map(|e| {
5233                    (
5234                        e.employee_id.clone(),
5235                        rust_decimal::Decimal::from(5000), // Default monthly salary
5236                        e.cost_center.clone(),
5237                        e.department_id.clone(),
5238                    )
5239                })
5240                .collect();
5241
5242            for month in 0..self.config.global.period_months {
5243                let period_start = start_date + chrono::Months::new(month);
5244                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
5245                let (run, items) = payroll_gen.generate(
5246                    company_code,
5247                    &employees_with_salary,
5248                    period_start,
5249                    period_end,
5250                    currency,
5251                );
5252                snapshot.payroll_runs.push(run);
5253                snapshot.payroll_run_count += 1;
5254                snapshot.payroll_line_item_count += items.len();
5255                snapshot.payroll_line_items.extend(items);
5256            }
5257        }
5258
5259        // Generate time entries
5260        if self.config.hr.time_attendance.enabled {
5261            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
5262                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5263            let entries = time_gen.generate(
5264                &employee_ids,
5265                start_date,
5266                end_date,
5267                &self.config.hr.time_attendance,
5268            );
5269            snapshot.time_entry_count = entries.len();
5270            snapshot.time_entries = entries;
5271        }
5272
5273        // Generate expense reports
5274        if self.config.hr.expenses.enabled {
5275            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
5276                .with_pools(employee_ids.clone(), cost_center_ids.clone());
5277            expense_gen.set_country_pack(self.primary_pack().clone());
5278            let company_currency = self
5279                .config
5280                .companies
5281                .first()
5282                .map(|c| c.currency.as_str())
5283                .unwrap_or("USD");
5284            let reports = expense_gen.generate_with_currency(
5285                &employee_ids,
5286                start_date,
5287                end_date,
5288                &self.config.hr.expenses,
5289                company_currency,
5290            );
5291            snapshot.expense_report_count = reports.len();
5292            snapshot.expense_reports = reports;
5293        }
5294
5295        // Generate benefit enrollments (gated on payroll, since benefits require employees)
5296        if self.config.hr.payroll.enabled {
5297            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
5298            let employee_pairs: Vec<(String, String)> = self
5299                .master_data
5300                .employees
5301                .iter()
5302                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
5303                .collect();
5304            let enrollments =
5305                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
5306            snapshot.benefit_enrollment_count = enrollments.len();
5307            snapshot.benefit_enrollments = enrollments;
5308        }
5309
5310        // Generate defined benefit pension plans (IAS 19 / ASC 715)
5311        if self.phase_config.generate_hr {
5312            let entity_name = self
5313                .config
5314                .companies
5315                .first()
5316                .map(|c| c.name.as_str())
5317                .unwrap_or("Entity");
5318            let period_months = self.config.global.period_months;
5319            let period_label = {
5320                let y = start_date.year();
5321                let m = start_date.month();
5322                if period_months >= 12 {
5323                    format!("FY{y}")
5324                } else {
5325                    format!("{y}-{m:02}")
5326                }
5327            };
5328            let reporting_date =
5329                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5330
5331            // Compute average annual salary from actual payroll data when available.
5332            // PayrollRun.total_gross covers all employees for one pay period; we sum
5333            // across all runs and divide by employee_count to get per-employee total,
5334            // then annualise for sub-annual periods.
5335            let avg_salary: Option<rust_decimal::Decimal> = {
5336                let employee_count = employee_ids.len();
5337                if self.config.hr.payroll.enabled
5338                    && employee_count > 0
5339                    && !snapshot.payroll_runs.is_empty()
5340                {
5341                    // Sum total gross pay across all payroll runs for this company
5342                    let total_gross: rust_decimal::Decimal = snapshot
5343                        .payroll_runs
5344                        .iter()
5345                        .filter(|r| r.company_code == company_code)
5346                        .map(|r| r.total_gross)
5347                        .sum();
5348                    if total_gross > rust_decimal::Decimal::ZERO {
5349                        // Annualise: total_gross covers `period_months` months of pay
5350                        let annual_total = if period_months > 0 && period_months < 12 {
5351                            total_gross * rust_decimal::Decimal::from(12u32)
5352                                / rust_decimal::Decimal::from(period_months)
5353                        } else {
5354                            total_gross
5355                        };
5356                        Some(
5357                            (annual_total / rust_decimal::Decimal::from(employee_count))
5358                                .round_dp(2),
5359                        )
5360                    } else {
5361                        None
5362                    }
5363                } else {
5364                    None
5365                }
5366            };
5367
5368            let mut pension_gen =
5369                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
5370            let pension_snap = pension_gen.generate(
5371                company_code,
5372                entity_name,
5373                &period_label,
5374                reporting_date,
5375                employee_ids.len(),
5376                currency,
5377                avg_salary,
5378                period_months,
5379            );
5380            snapshot.pension_plan_count = pension_snap.plans.len();
5381            snapshot.pension_plans = pension_snap.plans;
5382            snapshot.pension_obligations = pension_snap.obligations;
5383            snapshot.pension_plan_assets = pension_snap.plan_assets;
5384            snapshot.pension_disclosures = pension_snap.disclosures;
5385            // Pension JEs are returned here so they can be added to entries
5386            // in the caller (stored temporarily on snapshot for transfer).
5387            // We embed them in the hr snapshot for simplicity; the orchestrator
5388            // will extract and extend `entries`.
5389            snapshot.pension_journal_entries = pension_snap.journal_entries;
5390        }
5391
5392        // Generate stock-based compensation (ASC 718 / IFRS 2)
5393        if self.phase_config.generate_hr && !employee_ids.is_empty() {
5394            let period_months = self.config.global.period_months;
5395            let period_label = {
5396                let y = start_date.year();
5397                let m = start_date.month();
5398                if period_months >= 12 {
5399                    format!("FY{y}")
5400                } else {
5401                    format!("{y}-{m:02}")
5402                }
5403            };
5404            let reporting_date =
5405                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
5406
5407            let mut stock_comp_gen =
5408                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
5409            let stock_snap = stock_comp_gen.generate(
5410                company_code,
5411                &employee_ids,
5412                start_date,
5413                &period_label,
5414                reporting_date,
5415                currency,
5416            );
5417            snapshot.stock_grant_count = stock_snap.grants.len();
5418            snapshot.stock_grants = stock_snap.grants;
5419            snapshot.stock_comp_expenses = stock_snap.expenses;
5420            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
5421        }
5422
5423        stats.payroll_run_count = snapshot.payroll_run_count;
5424        stats.time_entry_count = snapshot.time_entry_count;
5425        stats.expense_report_count = snapshot.expense_report_count;
5426        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
5427        stats.pension_plan_count = snapshot.pension_plan_count;
5428        stats.stock_grant_count = snapshot.stock_grant_count;
5429
5430        info!(
5431            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
5432            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
5433            snapshot.time_entry_count, snapshot.expense_report_count,
5434            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
5435            snapshot.stock_grant_count
5436        );
5437        self.check_resources_with_log("post-hr")?;
5438
5439        Ok(snapshot)
5440    }
5441
5442    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
5443    fn phase_accounting_standards(
5444        &mut self,
5445        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
5446        journal_entries: &[JournalEntry],
5447        stats: &mut EnhancedGenerationStatistics,
5448    ) -> SynthResult<AccountingStandardsSnapshot> {
5449        if !self.phase_config.generate_accounting_standards {
5450            debug!("Phase 17: Skipped (accounting standards generation disabled)");
5451            return Ok(AccountingStandardsSnapshot::default());
5452        }
5453        info!("Phase 17: Generating Accounting Standards Data");
5454
5455        let seed = self.seed;
5456        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5457            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5458        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5459        let company_code = self
5460            .config
5461            .companies
5462            .first()
5463            .map(|c| c.code.as_str())
5464            .unwrap_or("1000");
5465        let currency = self
5466            .config
5467            .companies
5468            .first()
5469            .map(|c| c.currency.as_str())
5470            .unwrap_or("USD");
5471
5472        // Convert config framework to standards framework.
5473        // If the user explicitly set a framework in the YAML config, use that.
5474        // Otherwise, fall back to the country pack's accounting.framework field,
5475        // and if that is also absent or unrecognised, default to US GAAP.
5476        let framework = match self.config.accounting_standards.framework {
5477            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
5478                datasynth_standards::framework::AccountingFramework::UsGaap
5479            }
5480            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
5481                datasynth_standards::framework::AccountingFramework::Ifrs
5482            }
5483            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
5484                datasynth_standards::framework::AccountingFramework::DualReporting
5485            }
5486            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
5487                datasynth_standards::framework::AccountingFramework::FrenchGaap
5488            }
5489            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
5490                datasynth_standards::framework::AccountingFramework::GermanGaap
5491            }
5492            None => {
5493                // Derive framework from the primary company's country pack
5494                let pack = self.primary_pack();
5495                let pack_fw = pack.accounting.framework.as_str();
5496                match pack_fw {
5497                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
5498                    "dual_reporting" => {
5499                        datasynth_standards::framework::AccountingFramework::DualReporting
5500                    }
5501                    "french_gaap" => {
5502                        datasynth_standards::framework::AccountingFramework::FrenchGaap
5503                    }
5504                    "german_gaap" | "hgb" => {
5505                        datasynth_standards::framework::AccountingFramework::GermanGaap
5506                    }
5507                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
5508                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
5509                }
5510            }
5511        };
5512
5513        let mut snapshot = AccountingStandardsSnapshot::default();
5514
5515        // Revenue recognition
5516        if self.config.accounting_standards.revenue_recognition.enabled {
5517            let customer_ids: Vec<String> = self
5518                .master_data
5519                .customers
5520                .iter()
5521                .map(|c| c.customer_id.clone())
5522                .collect();
5523
5524            if !customer_ids.is_empty() {
5525                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
5526                let contracts = rev_gen.generate(
5527                    company_code,
5528                    &customer_ids,
5529                    start_date,
5530                    end_date,
5531                    currency,
5532                    &self.config.accounting_standards.revenue_recognition,
5533                    framework,
5534                );
5535                snapshot.revenue_contract_count = contracts.len();
5536                snapshot.contracts = contracts;
5537            }
5538        }
5539
5540        // Impairment testing
5541        if self.config.accounting_standards.impairment.enabled {
5542            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
5543                .master_data
5544                .assets
5545                .iter()
5546                .map(|a| {
5547                    (
5548                        a.asset_id.clone(),
5549                        a.description.clone(),
5550                        a.acquisition_cost,
5551                    )
5552                })
5553                .collect();
5554
5555            if !asset_data.is_empty() {
5556                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
5557                let tests = imp_gen.generate(
5558                    company_code,
5559                    &asset_data,
5560                    end_date,
5561                    &self.config.accounting_standards.impairment,
5562                    framework,
5563                );
5564                snapshot.impairment_test_count = tests.len();
5565                snapshot.impairment_tests = tests;
5566            }
5567        }
5568
5569        // Business combinations (IFRS 3 / ASC 805)
5570        if self
5571            .config
5572            .accounting_standards
5573            .business_combinations
5574            .enabled
5575        {
5576            let bc_config = &self.config.accounting_standards.business_combinations;
5577            let framework_str = match framework {
5578                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5579                _ => "US_GAAP",
5580            };
5581            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
5582            let bc_snap = bc_gen.generate(
5583                company_code,
5584                currency,
5585                start_date,
5586                end_date,
5587                bc_config.acquisition_count,
5588                framework_str,
5589            );
5590            snapshot.business_combination_count = bc_snap.combinations.len();
5591            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
5592            snapshot.business_combinations = bc_snap.combinations;
5593        }
5594
5595        // Expected Credit Loss (IFRS 9 / ASC 326)
5596        if self
5597            .config
5598            .accounting_standards
5599            .expected_credit_loss
5600            .enabled
5601        {
5602            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
5603            let framework_str = match framework {
5604                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
5605                _ => "ASC_326",
5606            };
5607
5608            // Use AR aging data from the subledger snapshot if available;
5609            // otherwise generate synthetic bucket exposures.
5610            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5611
5612            let mut ecl_gen = EclGenerator::new(seed + 43);
5613
5614            // Collect combined bucket totals across all company AR aging reports.
5615            let bucket_exposures: Vec<(
5616                datasynth_core::models::subledger::ar::AgingBucket,
5617                rust_decimal::Decimal,
5618            )> = if ar_aging_reports.is_empty() {
5619                // No AR aging data — synthesise plausible bucket exposures.
5620                use datasynth_core::models::subledger::ar::AgingBucket;
5621                vec![
5622                    (
5623                        AgingBucket::Current,
5624                        rust_decimal::Decimal::from(500_000_u32),
5625                    ),
5626                    (
5627                        AgingBucket::Days1To30,
5628                        rust_decimal::Decimal::from(120_000_u32),
5629                    ),
5630                    (
5631                        AgingBucket::Days31To60,
5632                        rust_decimal::Decimal::from(45_000_u32),
5633                    ),
5634                    (
5635                        AgingBucket::Days61To90,
5636                        rust_decimal::Decimal::from(15_000_u32),
5637                    ),
5638                    (
5639                        AgingBucket::Over90Days,
5640                        rust_decimal::Decimal::from(8_000_u32),
5641                    ),
5642                ]
5643            } else {
5644                use datasynth_core::models::subledger::ar::AgingBucket;
5645                // Sum bucket totals from all reports.
5646                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
5647                    std::collections::HashMap::new();
5648                for report in ar_aging_reports {
5649                    for (bucket, amount) in &report.bucket_totals {
5650                        *totals.entry(*bucket).or_default() += amount;
5651                    }
5652                }
5653                AgingBucket::all()
5654                    .into_iter()
5655                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
5656                    .collect()
5657            };
5658
5659            let ecl_snap = ecl_gen.generate(
5660                company_code,
5661                end_date,
5662                &bucket_exposures,
5663                ecl_config,
5664                &period_label,
5665                framework_str,
5666            );
5667
5668            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
5669            snapshot.ecl_models = ecl_snap.ecl_models;
5670            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
5671            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
5672        }
5673
5674        // Provisions and contingencies (IAS 37 / ASC 450)
5675        {
5676            let framework_str = match framework {
5677                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
5678                _ => "US_GAAP",
5679            };
5680
5681            // Compute actual revenue from the journal entries generated so far.
5682            // The `journal_entries` slice passed to this phase contains all GL entries
5683            // up to and including Period Close. Fall back to a minimum of 100_000 to
5684            // avoid degenerate zero-based provision amounts on first-period datasets.
5685            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
5686                .max(rust_decimal::Decimal::from(100_000_u32));
5687
5688            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5689
5690            let mut prov_gen = ProvisionGenerator::new(seed + 44);
5691            let prov_snap = prov_gen.generate(
5692                company_code,
5693                currency,
5694                revenue_proxy,
5695                end_date,
5696                &period_label,
5697                framework_str,
5698            );
5699
5700            snapshot.provision_count = prov_snap.provisions.len();
5701            snapshot.provisions = prov_snap.provisions;
5702            snapshot.provision_movements = prov_snap.movements;
5703            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
5704            snapshot.provision_journal_entries = prov_snap.journal_entries;
5705        }
5706
5707        // IAS 21 Functional Currency Translation
5708        // For each company whose functional currency differs from the presentation
5709        // currency, generate a CurrencyTranslationResult with CTA (OCI).
5710        {
5711            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
5712
5713            let presentation_currency = self
5714                .config
5715                .global
5716                .presentation_currency
5717                .clone()
5718                .unwrap_or_else(|| self.config.global.group_currency.clone());
5719
5720            // Build a minimal rate table populated with approximate rates from
5721            // the FX model base rates (USD-based) so we can do the translation.
5722            let mut rate_table = FxRateTable::new(&presentation_currency);
5723
5724            // Populate with base rates against USD; if presentation_currency is
5725            // not USD we do a best-effort two-step conversion using the table's
5726            // triangulation support.
5727            let base_rates = base_rates_usd();
5728            for (ccy, rate) in &base_rates {
5729                rate_table.add_rate(FxRate::new(
5730                    ccy,
5731                    "USD",
5732                    RateType::Closing,
5733                    end_date,
5734                    *rate,
5735                    "SYNTHETIC",
5736                ));
5737                // Average rate = 98% of closing (approximation).
5738                // 0.98 = 98/100 = Decimal::new(98, 2)
5739                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
5740                rate_table.add_rate(FxRate::new(
5741                    ccy,
5742                    "USD",
5743                    RateType::Average,
5744                    end_date,
5745                    avg,
5746                    "SYNTHETIC",
5747                ));
5748            }
5749
5750            let mut translation_results = Vec::new();
5751            for company in &self.config.companies {
5752                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
5753                // to ensure the translation produces non-trivial CTA amounts.
5754                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
5755                    .max(rust_decimal::Decimal::from(100_000_u32));
5756
5757                let func_ccy = company
5758                    .functional_currency
5759                    .clone()
5760                    .unwrap_or_else(|| company.currency.clone());
5761
5762                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
5763                    &company.code,
5764                    &func_ccy,
5765                    &presentation_currency,
5766                    &ias21_period_label,
5767                    end_date,
5768                    company_revenue,
5769                    &rate_table,
5770                );
5771                translation_results.push(result);
5772            }
5773
5774            snapshot.currency_translation_count = translation_results.len();
5775            snapshot.currency_translation_results = translation_results;
5776        }
5777
5778        stats.revenue_contract_count = snapshot.revenue_contract_count;
5779        stats.impairment_test_count = snapshot.impairment_test_count;
5780        stats.business_combination_count = snapshot.business_combination_count;
5781        stats.ecl_model_count = snapshot.ecl_model_count;
5782        stats.provision_count = snapshot.provision_count;
5783
5784        info!(
5785            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
5786            snapshot.revenue_contract_count,
5787            snapshot.impairment_test_count,
5788            snapshot.business_combination_count,
5789            snapshot.ecl_model_count,
5790            snapshot.provision_count,
5791            snapshot.currency_translation_count
5792        );
5793        self.check_resources_with_log("post-accounting-standards")?;
5794
5795        Ok(snapshot)
5796    }
5797
5798    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
5799    fn phase_manufacturing(
5800        &mut self,
5801        stats: &mut EnhancedGenerationStatistics,
5802    ) -> SynthResult<ManufacturingSnapshot> {
5803        if !self.phase_config.generate_manufacturing {
5804            debug!("Phase 18: Skipped (manufacturing generation disabled)");
5805            return Ok(ManufacturingSnapshot::default());
5806        }
5807        info!("Phase 18: Generating Manufacturing Data");
5808
5809        let seed = self.seed;
5810        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5811            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5812        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5813        let company_code = self
5814            .config
5815            .companies
5816            .first()
5817            .map(|c| c.code.as_str())
5818            .unwrap_or("1000");
5819
5820        let material_data: Vec<(String, String)> = self
5821            .master_data
5822            .materials
5823            .iter()
5824            .map(|m| (m.material_id.clone(), m.description.clone()))
5825            .collect();
5826
5827        if material_data.is_empty() {
5828            debug!("Phase 18: Skipped (no materials available)");
5829            return Ok(ManufacturingSnapshot::default());
5830        }
5831
5832        let mut snapshot = ManufacturingSnapshot::default();
5833
5834        // Generate production orders
5835        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
5836        let production_orders = prod_gen.generate(
5837            company_code,
5838            &material_data,
5839            start_date,
5840            end_date,
5841            &self.config.manufacturing.production_orders,
5842            &self.config.manufacturing.costing,
5843            &self.config.manufacturing.routing,
5844        );
5845        snapshot.production_order_count = production_orders.len();
5846
5847        // Generate quality inspections from production orders
5848        let inspection_data: Vec<(String, String, String)> = production_orders
5849            .iter()
5850            .map(|po| {
5851                (
5852                    po.order_id.clone(),
5853                    po.material_id.clone(),
5854                    po.material_description.clone(),
5855                )
5856            })
5857            .collect();
5858
5859        snapshot.production_orders = production_orders;
5860
5861        if !inspection_data.is_empty() {
5862            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
5863            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
5864            snapshot.quality_inspection_count = inspections.len();
5865            snapshot.quality_inspections = inspections;
5866        }
5867
5868        // Generate cycle counts (one per month)
5869        let storage_locations: Vec<(String, String)> = material_data
5870            .iter()
5871            .enumerate()
5872            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
5873            .collect();
5874
5875        let employee_ids: Vec<String> = self
5876            .master_data
5877            .employees
5878            .iter()
5879            .map(|e| e.employee_id.clone())
5880            .collect();
5881        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
5882            .with_employee_pool(employee_ids);
5883        let mut cycle_count_total = 0usize;
5884        for month in 0..self.config.global.period_months {
5885            let count_date = start_date + chrono::Months::new(month);
5886            let items_per_count = storage_locations.len().clamp(10, 50);
5887            let cc = cc_gen.generate(
5888                company_code,
5889                &storage_locations,
5890                count_date,
5891                items_per_count,
5892            );
5893            snapshot.cycle_counts.push(cc);
5894            cycle_count_total += 1;
5895        }
5896        snapshot.cycle_count_count = cycle_count_total;
5897
5898        // Generate BOM components
5899        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
5900        let bom_components = bom_gen.generate(company_code, &material_data);
5901        snapshot.bom_component_count = bom_components.len();
5902        snapshot.bom_components = bom_components;
5903
5904        // Generate inventory movements
5905        let currency = self
5906            .config
5907            .companies
5908            .first()
5909            .map(|c| c.currency.as_str())
5910            .unwrap_or("USD");
5911        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
5912        let inventory_movements = inv_mov_gen.generate(
5913            company_code,
5914            &material_data,
5915            start_date,
5916            end_date,
5917            2,
5918            currency,
5919        );
5920        snapshot.inventory_movement_count = inventory_movements.len();
5921        snapshot.inventory_movements = inventory_movements;
5922
5923        stats.production_order_count = snapshot.production_order_count;
5924        stats.quality_inspection_count = snapshot.quality_inspection_count;
5925        stats.cycle_count_count = snapshot.cycle_count_count;
5926        stats.bom_component_count = snapshot.bom_component_count;
5927        stats.inventory_movement_count = snapshot.inventory_movement_count;
5928
5929        info!(
5930            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
5931            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
5932            snapshot.bom_component_count, snapshot.inventory_movement_count
5933        );
5934        self.check_resources_with_log("post-manufacturing")?;
5935
5936        Ok(snapshot)
5937    }
5938
5939    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
5940    fn phase_sales_kpi_budgets(
5941        &mut self,
5942        coa: &Arc<ChartOfAccounts>,
5943        financial_reporting: &FinancialReportingSnapshot,
5944        stats: &mut EnhancedGenerationStatistics,
5945    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
5946        if !self.phase_config.generate_sales_kpi_budgets {
5947            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
5948            return Ok(SalesKpiBudgetsSnapshot::default());
5949        }
5950        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
5951
5952        let seed = self.seed;
5953        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5954            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5955        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5956        let company_code = self
5957            .config
5958            .companies
5959            .first()
5960            .map(|c| c.code.as_str())
5961            .unwrap_or("1000");
5962
5963        let mut snapshot = SalesKpiBudgetsSnapshot::default();
5964
5965        // Sales Quotes
5966        if self.config.sales_quotes.enabled {
5967            let customer_data: Vec<(String, String)> = self
5968                .master_data
5969                .customers
5970                .iter()
5971                .map(|c| (c.customer_id.clone(), c.name.clone()))
5972                .collect();
5973            let material_data: Vec<(String, String)> = self
5974                .master_data
5975                .materials
5976                .iter()
5977                .map(|m| (m.material_id.clone(), m.description.clone()))
5978                .collect();
5979
5980            if !customer_data.is_empty() && !material_data.is_empty() {
5981                let employee_ids: Vec<String> = self
5982                    .master_data
5983                    .employees
5984                    .iter()
5985                    .map(|e| e.employee_id.clone())
5986                    .collect();
5987                let customer_ids: Vec<String> = self
5988                    .master_data
5989                    .customers
5990                    .iter()
5991                    .map(|c| c.customer_id.clone())
5992                    .collect();
5993                let company_currency = self
5994                    .config
5995                    .companies
5996                    .first()
5997                    .map(|c| c.currency.as_str())
5998                    .unwrap_or("USD");
5999
6000                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6001                    .with_pools(employee_ids, customer_ids);
6002                let quotes = quote_gen.generate_with_currency(
6003                    company_code,
6004                    &customer_data,
6005                    &material_data,
6006                    start_date,
6007                    end_date,
6008                    &self.config.sales_quotes,
6009                    company_currency,
6010                );
6011                snapshot.sales_quote_count = quotes.len();
6012                snapshot.sales_quotes = quotes;
6013            }
6014        }
6015
6016        // Management KPIs
6017        if self.config.financial_reporting.management_kpis.enabled {
6018            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6019            let mut kpis = kpi_gen.generate(
6020                company_code,
6021                start_date,
6022                end_date,
6023                &self.config.financial_reporting.management_kpis,
6024            );
6025
6026            // Override financial KPIs with actual data from financial statements
6027            {
6028                use rust_decimal::Decimal;
6029
6030                if let Some(income_stmt) =
6031                    financial_reporting.financial_statements.iter().find(|fs| {
6032                        fs.statement_type == StatementType::IncomeStatement
6033                            && fs.company_code == company_code
6034                    })
6035                {
6036                    // Extract revenue and COGS from income statement line items
6037                    let total_revenue: Decimal = income_stmt
6038                        .line_items
6039                        .iter()
6040                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
6041                        .map(|li| li.amount)
6042                        .sum();
6043                    let total_cogs: Decimal = income_stmt
6044                        .line_items
6045                        .iter()
6046                        .filter(|li| {
6047                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6048                                && !li.is_total
6049                        })
6050                        .map(|li| li.amount.abs())
6051                        .sum();
6052                    let total_opex: Decimal = income_stmt
6053                        .line_items
6054                        .iter()
6055                        .filter(|li| {
6056                            li.section.contains("Expense")
6057                                && !li.is_total
6058                                && !li.section.contains("Cost")
6059                        })
6060                        .map(|li| li.amount.abs())
6061                        .sum();
6062
6063                    if total_revenue > Decimal::ZERO {
6064                        let hundred = Decimal::from(100);
6065                        let gross_margin_pct =
6066                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6067                        let operating_income = total_revenue - total_cogs - total_opex;
6068                        let op_margin_pct =
6069                            (operating_income * hundred / total_revenue).round_dp(2);
6070
6071                        // Override gross margin and operating margin KPIs
6072                        for kpi in &mut kpis {
6073                            if kpi.name == "Gross Margin" {
6074                                kpi.value = gross_margin_pct;
6075                            } else if kpi.name == "Operating Margin" {
6076                                kpi.value = op_margin_pct;
6077                            }
6078                        }
6079                    }
6080                }
6081
6082                // Override Current Ratio from balance sheet
6083                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6084                    fs.statement_type == StatementType::BalanceSheet
6085                        && fs.company_code == company_code
6086                }) {
6087                    let current_assets: Decimal = bs
6088                        .line_items
6089                        .iter()
6090                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6091                        .map(|li| li.amount)
6092                        .sum();
6093                    let current_liabilities: Decimal = bs
6094                        .line_items
6095                        .iter()
6096                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6097                        .map(|li| li.amount.abs())
6098                        .sum();
6099
6100                    if current_liabilities > Decimal::ZERO {
6101                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
6102                        for kpi in &mut kpis {
6103                            if kpi.name == "Current Ratio" {
6104                                kpi.value = current_ratio;
6105                            }
6106                        }
6107                    }
6108                }
6109            }
6110
6111            snapshot.kpi_count = kpis.len();
6112            snapshot.kpis = kpis;
6113        }
6114
6115        // Budgets
6116        if self.config.financial_reporting.budgets.enabled {
6117            let account_data: Vec<(String, String)> = coa
6118                .accounts
6119                .iter()
6120                .map(|a| (a.account_number.clone(), a.short_description.clone()))
6121                .collect();
6122
6123            if !account_data.is_empty() {
6124                let fiscal_year = start_date.year() as u32;
6125                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6126                let budget = budget_gen.generate(
6127                    company_code,
6128                    fiscal_year,
6129                    &account_data,
6130                    &self.config.financial_reporting.budgets,
6131                );
6132                snapshot.budget_line_count = budget.line_items.len();
6133                snapshot.budgets.push(budget);
6134            }
6135        }
6136
6137        stats.sales_quote_count = snapshot.sales_quote_count;
6138        stats.kpi_count = snapshot.kpi_count;
6139        stats.budget_line_count = snapshot.budget_line_count;
6140
6141        info!(
6142            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6143            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6144        );
6145        self.check_resources_with_log("post-sales-kpi-budgets")?;
6146
6147        Ok(snapshot)
6148    }
6149
6150    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
6151    fn phase_tax_generation(
6152        &mut self,
6153        document_flows: &DocumentFlowSnapshot,
6154        journal_entries: &[JournalEntry],
6155        stats: &mut EnhancedGenerationStatistics,
6156    ) -> SynthResult<TaxSnapshot> {
6157        if !self.phase_config.generate_tax {
6158            debug!("Phase 20: Skipped (tax generation disabled)");
6159            return Ok(TaxSnapshot::default());
6160        }
6161        info!("Phase 20: Generating Tax Data");
6162
6163        let seed = self.seed;
6164        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6165            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6166        let fiscal_year = start_date.year();
6167        let company_code = self
6168            .config
6169            .companies
6170            .first()
6171            .map(|c| c.code.as_str())
6172            .unwrap_or("1000");
6173
6174        let mut gen =
6175            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
6176
6177        let pack = self.primary_pack().clone();
6178        let (jurisdictions, codes) =
6179            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
6180
6181        // Generate tax provisions for each company
6182        let mut provisions = Vec::new();
6183        if self.config.tax.provisions.enabled {
6184            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
6185            for company in &self.config.companies {
6186                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
6187                let statutory_rate = rust_decimal::Decimal::new(
6188                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
6189                    2,
6190                );
6191                let provision = provision_gen.generate(
6192                    &company.code,
6193                    start_date,
6194                    pre_tax_income,
6195                    statutory_rate,
6196                );
6197                provisions.push(provision);
6198            }
6199        }
6200
6201        // Generate tax lines from document invoices
6202        let mut tax_lines = Vec::new();
6203        if !codes.is_empty() {
6204            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
6205                datasynth_generators::TaxLineGeneratorConfig::default(),
6206                codes.clone(),
6207                seed + 72,
6208            );
6209
6210            // Tax lines from vendor invoices (input tax)
6211            // Use the first company's country as buyer country
6212            let buyer_country = self
6213                .config
6214                .companies
6215                .first()
6216                .map(|c| c.country.as_str())
6217                .unwrap_or("US");
6218            for vi in &document_flows.vendor_invoices {
6219                let lines = tax_line_gen.generate_for_document(
6220                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
6221                    &vi.header.document_id,
6222                    buyer_country, // seller approx same country
6223                    buyer_country,
6224                    vi.payable_amount,
6225                    vi.header.document_date,
6226                    None,
6227                );
6228                tax_lines.extend(lines);
6229            }
6230
6231            // Tax lines from customer invoices (output tax)
6232            for ci in &document_flows.customer_invoices {
6233                let lines = tax_line_gen.generate_for_document(
6234                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
6235                    &ci.header.document_id,
6236                    buyer_country, // seller is the company
6237                    buyer_country,
6238                    ci.total_gross_amount,
6239                    ci.header.document_date,
6240                    None,
6241                );
6242                tax_lines.extend(lines);
6243            }
6244        }
6245
6246        // Generate deferred tax data (IAS 12 / ASC 740) for each company
6247        let deferred_tax = {
6248            let companies: Vec<(&str, &str)> = self
6249                .config
6250                .companies
6251                .iter()
6252                .map(|c| (c.code.as_str(), c.country.as_str()))
6253                .collect();
6254            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 73);
6255            deferred_gen.generate(&companies, start_date, journal_entries)
6256        };
6257
6258        let snapshot = TaxSnapshot {
6259            jurisdiction_count: jurisdictions.len(),
6260            code_count: codes.len(),
6261            jurisdictions,
6262            codes,
6263            tax_provisions: provisions,
6264            tax_lines,
6265            tax_returns: Vec::new(),
6266            withholding_records: Vec::new(),
6267            tax_anomaly_labels: Vec::new(),
6268            deferred_tax,
6269        };
6270
6271        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
6272        stats.tax_code_count = snapshot.code_count;
6273        stats.tax_provision_count = snapshot.tax_provisions.len();
6274        stats.tax_line_count = snapshot.tax_lines.len();
6275
6276        info!(
6277            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs",
6278            snapshot.jurisdiction_count,
6279            snapshot.code_count,
6280            snapshot.tax_provisions.len(),
6281            snapshot.deferred_tax.temporary_differences.len(),
6282            snapshot.deferred_tax.journal_entries.len(),
6283        );
6284        self.check_resources_with_log("post-tax")?;
6285
6286        Ok(snapshot)
6287    }
6288
6289    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
6290    fn phase_esg_generation(
6291        &mut self,
6292        document_flows: &DocumentFlowSnapshot,
6293        stats: &mut EnhancedGenerationStatistics,
6294    ) -> SynthResult<EsgSnapshot> {
6295        if !self.phase_config.generate_esg {
6296            debug!("Phase 21: Skipped (ESG generation disabled)");
6297            return Ok(EsgSnapshot::default());
6298        }
6299        info!("Phase 21: Generating ESG Data");
6300
6301        let seed = self.seed;
6302        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6303            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6304        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6305        let entity_id = self
6306            .config
6307            .companies
6308            .first()
6309            .map(|c| c.code.as_str())
6310            .unwrap_or("1000");
6311
6312        let esg_cfg = &self.config.esg;
6313        let mut snapshot = EsgSnapshot::default();
6314
6315        // Energy consumption (feeds into scope 1 & 2 emissions)
6316        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
6317            esg_cfg.environmental.energy.clone(),
6318            seed + 80,
6319        );
6320        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
6321
6322        // Water usage
6323        let facility_count = esg_cfg.environmental.energy.facility_count;
6324        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
6325        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
6326
6327        // Waste
6328        let mut waste_gen = datasynth_generators::WasteGenerator::new(
6329            seed + 82,
6330            esg_cfg.environmental.waste.diversion_target,
6331            facility_count,
6332        );
6333        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
6334
6335        // Emissions (scope 1, 2, 3)
6336        let mut emission_gen =
6337            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
6338
6339        // Build EnergyInput from energy_records
6340        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
6341            .iter()
6342            .map(|e| datasynth_generators::EnergyInput {
6343                facility_id: e.facility_id.clone(),
6344                energy_type: match e.energy_source {
6345                    EnergySourceType::NaturalGas => {
6346                        datasynth_generators::EnergyInputType::NaturalGas
6347                    }
6348                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
6349                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
6350                    _ => datasynth_generators::EnergyInputType::Electricity,
6351                },
6352                consumption_kwh: e.consumption_kwh,
6353                period: e.period,
6354            })
6355            .collect();
6356
6357        let mut emissions = Vec::new();
6358        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
6359        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
6360
6361        // Scope 3: use vendor spend data from actual payments
6362        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
6363            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6364            for payment in &document_flows.payments {
6365                if payment.is_vendor {
6366                    *totals
6367                        .entry(payment.business_partner_id.clone())
6368                        .or_default() += payment.amount;
6369                }
6370            }
6371            totals
6372        };
6373        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
6374            .master_data
6375            .vendors
6376            .iter()
6377            .map(|v| {
6378                let spend = vendor_payment_totals
6379                    .get(&v.vendor_id)
6380                    .copied()
6381                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
6382                datasynth_generators::VendorSpendInput {
6383                    vendor_id: v.vendor_id.clone(),
6384                    category: format!("{:?}", v.vendor_type).to_lowercase(),
6385                    spend,
6386                    country: v.country.clone(),
6387                }
6388            })
6389            .collect();
6390        if !vendor_spend.is_empty() {
6391            emissions.extend(emission_gen.generate_scope3_purchased_goods(
6392                entity_id,
6393                &vendor_spend,
6394                start_date,
6395                end_date,
6396            ));
6397        }
6398
6399        // Business travel & commuting (scope 3)
6400        let headcount = self.master_data.employees.len() as u32;
6401        if headcount > 0 {
6402            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
6403            emissions.extend(emission_gen.generate_scope3_business_travel(
6404                entity_id,
6405                travel_spend,
6406                start_date,
6407            ));
6408            emissions
6409                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
6410        }
6411
6412        snapshot.emission_count = emissions.len();
6413        snapshot.emissions = emissions;
6414        snapshot.energy = energy_records;
6415
6416        // Social: Workforce diversity, pay equity, safety
6417        let mut workforce_gen =
6418            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
6419        let total_headcount = headcount.max(100);
6420        snapshot.diversity =
6421            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
6422        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
6423        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
6424            entity_id,
6425            facility_count,
6426            start_date,
6427            end_date,
6428        );
6429
6430        // Compute safety metrics
6431        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
6432        let safety_metric = workforce_gen.compute_safety_metrics(
6433            entity_id,
6434            &snapshot.safety_incidents,
6435            total_hours,
6436            start_date,
6437        );
6438        snapshot.safety_metrics = vec![safety_metric];
6439
6440        // Governance
6441        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
6442            seed + 85,
6443            esg_cfg.governance.board_size,
6444            esg_cfg.governance.independence_target,
6445        );
6446        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
6447
6448        // Supplier ESG assessments
6449        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
6450            esg_cfg.supply_chain_esg.clone(),
6451            seed + 86,
6452        );
6453        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
6454            .master_data
6455            .vendors
6456            .iter()
6457            .map(|v| datasynth_generators::VendorInput {
6458                vendor_id: v.vendor_id.clone(),
6459                country: v.country.clone(),
6460                industry: format!("{:?}", v.vendor_type).to_lowercase(),
6461                quality_score: None,
6462            })
6463            .collect();
6464        snapshot.supplier_assessments =
6465            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
6466
6467        // Disclosures
6468        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
6469            seed + 87,
6470            esg_cfg.reporting.clone(),
6471            esg_cfg.climate_scenarios.clone(),
6472        );
6473        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
6474        snapshot.disclosures = disclosure_gen.generate_disclosures(
6475            entity_id,
6476            &snapshot.materiality,
6477            start_date,
6478            end_date,
6479        );
6480        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
6481        snapshot.disclosure_count = snapshot.disclosures.len();
6482
6483        // Anomaly injection
6484        if esg_cfg.anomaly_rate > 0.0 {
6485            let mut anomaly_injector =
6486                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
6487            let mut labels = Vec::new();
6488            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
6489            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
6490            labels.extend(
6491                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
6492            );
6493            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
6494            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
6495            snapshot.anomaly_labels = labels;
6496        }
6497
6498        stats.esg_emission_count = snapshot.emission_count;
6499        stats.esg_disclosure_count = snapshot.disclosure_count;
6500
6501        info!(
6502            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
6503            snapshot.emission_count,
6504            snapshot.disclosure_count,
6505            snapshot.supplier_assessments.len()
6506        );
6507        self.check_resources_with_log("post-esg")?;
6508
6509        Ok(snapshot)
6510    }
6511
6512    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
6513    fn phase_treasury_data(
6514        &mut self,
6515        document_flows: &DocumentFlowSnapshot,
6516        subledger: &SubledgerSnapshot,
6517        intercompany: &IntercompanySnapshot,
6518        stats: &mut EnhancedGenerationStatistics,
6519    ) -> SynthResult<TreasurySnapshot> {
6520        if !self.phase_config.generate_treasury {
6521            debug!("Phase 22: Skipped (treasury generation disabled)");
6522            return Ok(TreasurySnapshot::default());
6523        }
6524        info!("Phase 22: Generating Treasury Data");
6525
6526        let seed = self.seed;
6527        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6528            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6529        let currency = self
6530            .config
6531            .companies
6532            .first()
6533            .map(|c| c.currency.as_str())
6534            .unwrap_or("USD");
6535        let entity_id = self
6536            .config
6537            .companies
6538            .first()
6539            .map(|c| c.code.as_str())
6540            .unwrap_or("1000");
6541
6542        let mut snapshot = TreasurySnapshot::default();
6543
6544        // Generate debt instruments
6545        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
6546            self.config.treasury.debt.clone(),
6547            seed + 90,
6548        );
6549        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
6550
6551        // Generate hedging instruments (IR swaps for floating-rate debt)
6552        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
6553            self.config.treasury.hedging.clone(),
6554            seed + 91,
6555        );
6556        for debt in &snapshot.debt_instruments {
6557            if debt.rate_type == InterestRateType::Variable {
6558                let swap = hedge_gen.generate_ir_swap(
6559                    currency,
6560                    debt.principal,
6561                    debt.origination_date,
6562                    debt.maturity_date,
6563                );
6564                snapshot.hedging_instruments.push(swap);
6565            }
6566        }
6567
6568        // Build FX exposures from foreign-currency payments and generate
6569        // FX forwards + hedge relationship designations via generate() API.
6570        {
6571            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
6572            for payment in &document_flows.payments {
6573                if payment.currency != currency {
6574                    let entry = fx_map
6575                        .entry(payment.currency.clone())
6576                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
6577                    entry.0 += payment.amount;
6578                    // Use the latest settlement date among grouped payments
6579                    if payment.header.document_date > entry.1 {
6580                        entry.1 = payment.header.document_date;
6581                    }
6582                }
6583            }
6584            if !fx_map.is_empty() {
6585                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
6586                    .into_iter()
6587                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
6588                        datasynth_generators::treasury::FxExposure {
6589                            currency_pair: format!("{foreign_ccy}/{currency}"),
6590                            foreign_currency: foreign_ccy,
6591                            net_amount,
6592                            settlement_date,
6593                            description: "AP payment FX exposure".to_string(),
6594                        }
6595                    })
6596                    .collect();
6597                let (fx_instruments, fx_relationships) =
6598                    hedge_gen.generate(start_date, &fx_exposures);
6599                snapshot.hedging_instruments.extend(fx_instruments);
6600                snapshot.hedge_relationships.extend(fx_relationships);
6601            }
6602        }
6603
6604        // Inject anomalies if configured
6605        if self.config.treasury.anomaly_rate > 0.0 {
6606            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
6607                seed + 92,
6608                self.config.treasury.anomaly_rate,
6609            );
6610            let mut labels = Vec::new();
6611            labels.extend(
6612                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
6613            );
6614            snapshot.treasury_anomaly_labels = labels;
6615        }
6616
6617        // Generate cash positions from payment flows
6618        if self.config.treasury.cash_positioning.enabled {
6619            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
6620
6621            // AP payments as outflows
6622            for payment in &document_flows.payments {
6623                cash_flows.push(datasynth_generators::treasury::CashFlow {
6624                    date: payment.header.document_date,
6625                    account_id: format!("{entity_id}-MAIN"),
6626                    amount: payment.amount,
6627                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
6628                });
6629            }
6630
6631            // Customer receipts (from O2C chains) as inflows
6632            for chain in &document_flows.o2c_chains {
6633                if let Some(ref receipt) = chain.customer_receipt {
6634                    cash_flows.push(datasynth_generators::treasury::CashFlow {
6635                        date: receipt.header.document_date,
6636                        account_id: format!("{entity_id}-MAIN"),
6637                        amount: receipt.amount,
6638                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
6639                    });
6640                }
6641                // Remainder receipts (follow-up to partial payments)
6642                for receipt in &chain.remainder_receipts {
6643                    cash_flows.push(datasynth_generators::treasury::CashFlow {
6644                        date: receipt.header.document_date,
6645                        account_id: format!("{entity_id}-MAIN"),
6646                        amount: receipt.amount,
6647                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
6648                    });
6649                }
6650            }
6651
6652            if !cash_flows.is_empty() {
6653                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
6654                    self.config.treasury.cash_positioning.clone(),
6655                    seed + 93,
6656                );
6657                let account_id = format!("{entity_id}-MAIN");
6658                snapshot.cash_positions = cash_gen.generate(
6659                    entity_id,
6660                    &account_id,
6661                    currency,
6662                    &cash_flows,
6663                    start_date,
6664                    start_date + chrono::Months::new(self.config.global.period_months),
6665                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
6666                );
6667            }
6668        }
6669
6670        // Generate cash forecasts from AR/AP aging
6671        if self.config.treasury.cash_forecasting.enabled {
6672            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6673
6674            // Build AR aging items from subledger AR invoices
6675            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
6676                .ar_invoices
6677                .iter()
6678                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
6679                .map(|inv| {
6680                    let days_past_due = if inv.due_date < end_date {
6681                        (end_date - inv.due_date).num_days().max(0) as u32
6682                    } else {
6683                        0
6684                    };
6685                    datasynth_generators::treasury::ArAgingItem {
6686                        expected_date: inv.due_date,
6687                        amount: inv.amount_remaining,
6688                        days_past_due,
6689                        document_id: inv.invoice_number.clone(),
6690                    }
6691                })
6692                .collect();
6693
6694            // Build AP aging items from subledger AP invoices
6695            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
6696                .ap_invoices
6697                .iter()
6698                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
6699                .map(|inv| datasynth_generators::treasury::ApAgingItem {
6700                    payment_date: inv.due_date,
6701                    amount: inv.amount_remaining,
6702                    document_id: inv.invoice_number.clone(),
6703                })
6704                .collect();
6705
6706            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
6707                self.config.treasury.cash_forecasting.clone(),
6708                seed + 94,
6709            );
6710            let forecast = forecast_gen.generate(
6711                entity_id,
6712                currency,
6713                end_date,
6714                &ar_items,
6715                &ap_items,
6716                &[], // scheduled disbursements - empty for now
6717            );
6718            snapshot.cash_forecasts.push(forecast);
6719        }
6720
6721        // Generate cash pools and sweeps
6722        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
6723            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6724            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
6725                self.config.treasury.cash_pooling.clone(),
6726                seed + 95,
6727            );
6728
6729            // Create a pool from available accounts
6730            let account_ids: Vec<String> = snapshot
6731                .cash_positions
6732                .iter()
6733                .map(|cp| cp.bank_account_id.clone())
6734                .collect::<std::collections::HashSet<_>>()
6735                .into_iter()
6736                .collect();
6737
6738            if let Some(pool) =
6739                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
6740            {
6741                // Generate sweeps - build participant balances from last cash position per account
6742                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
6743                for cp in &snapshot.cash_positions {
6744                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
6745                }
6746
6747                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
6748                    latest_balances
6749                        .into_iter()
6750                        .filter(|(id, _)| pool.participant_accounts.contains(id))
6751                        .map(
6752                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
6753                                account_id: id,
6754                                balance,
6755                            },
6756                        )
6757                        .collect();
6758
6759                let sweeps =
6760                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
6761                snapshot.cash_pool_sweeps = sweeps;
6762                snapshot.cash_pools.push(pool);
6763            }
6764        }
6765
6766        // Generate bank guarantees
6767        if self.config.treasury.bank_guarantees.enabled {
6768            let vendor_names: Vec<String> = self
6769                .master_data
6770                .vendors
6771                .iter()
6772                .map(|v| v.name.clone())
6773                .collect();
6774            if !vendor_names.is_empty() {
6775                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
6776                    self.config.treasury.bank_guarantees.clone(),
6777                    seed + 96,
6778                );
6779                snapshot.bank_guarantees =
6780                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
6781            }
6782        }
6783
6784        // Generate netting runs from intercompany matched pairs
6785        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
6786            let entity_ids: Vec<String> = self
6787                .config
6788                .companies
6789                .iter()
6790                .map(|c| c.code.clone())
6791                .collect();
6792            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
6793                .matched_pairs
6794                .iter()
6795                .map(|mp| {
6796                    (
6797                        mp.seller_company.clone(),
6798                        mp.buyer_company.clone(),
6799                        mp.amount,
6800                    )
6801                })
6802                .collect();
6803            if entity_ids.len() >= 2 {
6804                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
6805                    self.config.treasury.netting.clone(),
6806                    seed + 97,
6807                );
6808                snapshot.netting_runs = netting_gen.generate(
6809                    &entity_ids,
6810                    currency,
6811                    start_date,
6812                    self.config.global.period_months,
6813                    &ic_amounts,
6814                );
6815            }
6816        }
6817
6818        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
6819        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
6820        stats.cash_position_count = snapshot.cash_positions.len();
6821        stats.cash_forecast_count = snapshot.cash_forecasts.len();
6822        stats.cash_pool_count = snapshot.cash_pools.len();
6823
6824        info!(
6825            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
6826            snapshot.debt_instruments.len(),
6827            snapshot.hedging_instruments.len(),
6828            snapshot.cash_positions.len(),
6829            snapshot.cash_forecasts.len(),
6830            snapshot.cash_pools.len(),
6831            snapshot.bank_guarantees.len(),
6832            snapshot.netting_runs.len(),
6833        );
6834        self.check_resources_with_log("post-treasury")?;
6835
6836        Ok(snapshot)
6837    }
6838
6839    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
6840    fn phase_project_accounting(
6841        &mut self,
6842        document_flows: &DocumentFlowSnapshot,
6843        hr: &HrSnapshot,
6844        stats: &mut EnhancedGenerationStatistics,
6845    ) -> SynthResult<ProjectAccountingSnapshot> {
6846        if !self.phase_config.generate_project_accounting {
6847            debug!("Phase 23: Skipped (project accounting disabled)");
6848            return Ok(ProjectAccountingSnapshot::default());
6849        }
6850        info!("Phase 23: Generating Project Accounting Data");
6851
6852        let seed = self.seed;
6853        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6854            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6855        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6856        let company_code = self
6857            .config
6858            .companies
6859            .first()
6860            .map(|c| c.code.as_str())
6861            .unwrap_or("1000");
6862
6863        let mut snapshot = ProjectAccountingSnapshot::default();
6864
6865        // Generate projects with WBS hierarchies
6866        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
6867            self.config.project_accounting.clone(),
6868            seed + 95,
6869        );
6870        let pool = project_gen.generate(company_code, start_date, end_date);
6871        snapshot.projects = pool.projects.clone();
6872
6873        // Link source documents to projects for cost allocation
6874        {
6875            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
6876                Vec::new();
6877
6878            // Time entries
6879            for te in &hr.time_entries {
6880                let total_hours = te.hours_regular + te.hours_overtime;
6881                if total_hours > 0.0 {
6882                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6883                        id: te.entry_id.clone(),
6884                        entity_id: company_code.to_string(),
6885                        date: te.date,
6886                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
6887                            .unwrap_or(rust_decimal::Decimal::ZERO),
6888                        source_type: CostSourceType::TimeEntry,
6889                        hours: Some(
6890                            rust_decimal::Decimal::from_f64_retain(total_hours)
6891                                .unwrap_or(rust_decimal::Decimal::ZERO),
6892                        ),
6893                    });
6894                }
6895            }
6896
6897            // Expense reports
6898            for er in &hr.expense_reports {
6899                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6900                    id: er.report_id.clone(),
6901                    entity_id: company_code.to_string(),
6902                    date: er.submission_date,
6903                    amount: er.total_amount,
6904                    source_type: CostSourceType::ExpenseReport,
6905                    hours: None,
6906                });
6907            }
6908
6909            // Purchase orders
6910            for po in &document_flows.purchase_orders {
6911                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6912                    id: po.header.document_id.clone(),
6913                    entity_id: company_code.to_string(),
6914                    date: po.header.document_date,
6915                    amount: po.total_net_amount,
6916                    source_type: CostSourceType::PurchaseOrder,
6917                    hours: None,
6918                });
6919            }
6920
6921            // Vendor invoices
6922            for vi in &document_flows.vendor_invoices {
6923                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
6924                    id: vi.header.document_id.clone(),
6925                    entity_id: company_code.to_string(),
6926                    date: vi.header.document_date,
6927                    amount: vi.payable_amount,
6928                    source_type: CostSourceType::VendorInvoice,
6929                    hours: None,
6930                });
6931            }
6932
6933            if !source_docs.is_empty() && !pool.projects.is_empty() {
6934                let mut cost_gen =
6935                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
6936                        self.config.project_accounting.cost_allocation.clone(),
6937                        seed + 99,
6938                    );
6939                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
6940            }
6941        }
6942
6943        // Generate change orders
6944        if self.config.project_accounting.change_orders.enabled {
6945            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
6946                self.config.project_accounting.change_orders.clone(),
6947                seed + 96,
6948            );
6949            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
6950        }
6951
6952        // Generate milestones
6953        if self.config.project_accounting.milestones.enabled {
6954            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
6955                self.config.project_accounting.milestones.clone(),
6956                seed + 97,
6957            );
6958            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
6959        }
6960
6961        // Generate earned value metrics (needs cost lines, so only if we have projects)
6962        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
6963            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
6964                self.config.project_accounting.earned_value.clone(),
6965                seed + 98,
6966            );
6967            snapshot.earned_value_metrics =
6968                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
6969        }
6970
6971        stats.project_count = snapshot.projects.len();
6972        stats.project_change_order_count = snapshot.change_orders.len();
6973        stats.project_cost_line_count = snapshot.cost_lines.len();
6974
6975        info!(
6976            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
6977            snapshot.projects.len(),
6978            snapshot.change_orders.len(),
6979            snapshot.milestones.len(),
6980            snapshot.earned_value_metrics.len()
6981        );
6982        self.check_resources_with_log("post-project-accounting")?;
6983
6984        Ok(snapshot)
6985    }
6986
6987    /// Phase 24: Generate process evolution and organizational events.
6988    fn phase_evolution_events(
6989        &mut self,
6990        stats: &mut EnhancedGenerationStatistics,
6991    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
6992        if !self.phase_config.generate_evolution_events {
6993            debug!("Phase 24: Skipped (evolution events disabled)");
6994            return Ok((Vec::new(), Vec::new()));
6995        }
6996        info!("Phase 24: Generating Process Evolution + Organizational Events");
6997
6998        let seed = self.seed;
6999        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7000            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7001        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7002
7003        // Process evolution events
7004        let mut proc_gen =
7005            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
7006                seed + 100,
7007            );
7008        let process_events = proc_gen.generate_events(start_date, end_date);
7009
7010        // Organizational events
7011        let company_codes: Vec<String> = self
7012            .config
7013            .companies
7014            .iter()
7015            .map(|c| c.code.clone())
7016            .collect();
7017        let mut org_gen =
7018            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
7019                seed + 101,
7020            );
7021        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
7022
7023        stats.process_evolution_event_count = process_events.len();
7024        stats.organizational_event_count = org_events.len();
7025
7026        info!(
7027            "Evolution events generated: {} process evolution, {} organizational",
7028            process_events.len(),
7029            org_events.len()
7030        );
7031        self.check_resources_with_log("post-evolution-events")?;
7032
7033        Ok((process_events, org_events))
7034    }
7035
7036    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
7037    /// data recovery, and regulatory changes).
7038    fn phase_disruption_events(
7039        &self,
7040        stats: &mut EnhancedGenerationStatistics,
7041    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
7042        if !self.config.organizational_events.enabled {
7043            debug!("Phase 24b: Skipped (organizational events disabled)");
7044            return Ok(Vec::new());
7045        }
7046        info!("Phase 24b: Generating Disruption Events");
7047
7048        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7049            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7050        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7051
7052        let company_codes: Vec<String> = self
7053            .config
7054            .companies
7055            .iter()
7056            .map(|c| c.code.clone())
7057            .collect();
7058
7059        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
7060        let events = gen.generate(start_date, end_date, &company_codes);
7061
7062        stats.disruption_event_count = events.len();
7063        info!("Disruption events generated: {} events", events.len());
7064        self.check_resources_with_log("post-disruption-events")?;
7065
7066        Ok(events)
7067    }
7068
7069    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
7070    ///
7071    /// Produces paired examples where each pair contains the original clean JE
7072    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
7073    /// split transaction). Useful for training anomaly detection models with
7074    /// known ground truth.
7075    fn phase_counterfactuals(
7076        &self,
7077        journal_entries: &[JournalEntry],
7078        stats: &mut EnhancedGenerationStatistics,
7079    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
7080        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
7081            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
7082            return Ok(Vec::new());
7083        }
7084        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
7085
7086        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
7087
7088        let mut gen = CounterfactualGenerator::new(self.seed + 110);
7089
7090        // Rotating set of specs to produce diverse mutation types
7091        let specs = [
7092            CounterfactualSpec::ScaleAmount { factor: 2.5 },
7093            CounterfactualSpec::ShiftDate { days: -14 },
7094            CounterfactualSpec::SelfApprove,
7095            CounterfactualSpec::SplitTransaction { split_count: 3 },
7096        ];
7097
7098        let pairs: Vec<_> = journal_entries
7099            .iter()
7100            .enumerate()
7101            .map(|(i, je)| {
7102                let spec = &specs[i % specs.len()];
7103                gen.generate(je, spec)
7104            })
7105            .collect();
7106
7107        stats.counterfactual_pair_count = pairs.len();
7108        info!(
7109            "Counterfactual pairs generated: {} pairs from {} journal entries",
7110            pairs.len(),
7111            journal_entries.len()
7112        );
7113        self.check_resources_with_log("post-counterfactuals")?;
7114
7115        Ok(pairs)
7116    }
7117
7118    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
7119    ///
7120    /// Uses the anomaly labels (from Phase 8) to determine which documents are
7121    /// fraudulent, then generates probabilistic red flags on all chain documents.
7122    /// Non-fraud documents also receive red flags at a lower rate (false positives)
7123    /// to produce realistic ML training data.
7124    fn phase_red_flags(
7125        &self,
7126        anomaly_labels: &AnomalyLabels,
7127        document_flows: &DocumentFlowSnapshot,
7128        stats: &mut EnhancedGenerationStatistics,
7129    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
7130        if !self.config.fraud.enabled {
7131            debug!("Phase 26: Skipped (fraud generation disabled)");
7132            return Ok(Vec::new());
7133        }
7134        info!("Phase 26: Generating Fraud Red-Flag Indicators");
7135
7136        use datasynth_generators::fraud::RedFlagGenerator;
7137
7138        let generator = RedFlagGenerator::new();
7139        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
7140
7141        // Build a set of document IDs that are known-fraudulent from anomaly labels.
7142        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
7143            .labels
7144            .iter()
7145            .filter(|label| label.anomaly_type.is_intentional())
7146            .map(|label| label.document_id.as_str())
7147            .collect();
7148
7149        let mut flags = Vec::new();
7150
7151        // Iterate P2P chains: use the purchase order document ID as the chain key.
7152        for chain in &document_flows.p2p_chains {
7153            let doc_id = &chain.purchase_order.header.document_id;
7154            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7155            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7156        }
7157
7158        // Iterate O2C chains: use the sales order document ID as the chain key.
7159        for chain in &document_flows.o2c_chains {
7160            let doc_id = &chain.sales_order.header.document_id;
7161            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
7162            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
7163        }
7164
7165        stats.red_flag_count = flags.len();
7166        info!(
7167            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
7168            flags.len(),
7169            document_flows.p2p_chains.len(),
7170            document_flows.o2c_chains.len(),
7171            fraud_doc_ids.len()
7172        );
7173        self.check_resources_with_log("post-red-flags")?;
7174
7175        Ok(flags)
7176    }
7177
7178    /// Phase 26b: Generate collusion rings from employee/vendor pools.
7179    ///
7180    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
7181    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
7182    /// advance them over the simulation period.
7183    fn phase_collusion_rings(
7184        &mut self,
7185        stats: &mut EnhancedGenerationStatistics,
7186    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
7187        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
7188            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
7189            return Ok(Vec::new());
7190        }
7191        info!("Phase 26b: Generating Collusion Rings");
7192
7193        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7194            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7195        let months = self.config.global.period_months;
7196
7197        let employee_ids: Vec<String> = self
7198            .master_data
7199            .employees
7200            .iter()
7201            .map(|e| e.employee_id.clone())
7202            .collect();
7203        let vendor_ids: Vec<String> = self
7204            .master_data
7205            .vendors
7206            .iter()
7207            .map(|v| v.vendor_id.clone())
7208            .collect();
7209
7210        let mut generator =
7211            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
7212        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
7213
7214        stats.collusion_ring_count = rings.len();
7215        info!(
7216            "Collusion rings generated: {} rings, total members: {}",
7217            rings.len(),
7218            rings
7219                .iter()
7220                .map(datasynth_generators::fraud::CollusionRing::size)
7221                .sum::<usize>()
7222        );
7223        self.check_resources_with_log("post-collusion-rings")?;
7224
7225        Ok(rings)
7226    }
7227
7228    /// Phase 27: Generate bi-temporal version chains for vendor entities.
7229    ///
7230    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
7231    /// master data changes over time, supporting bi-temporal audit queries.
7232    fn phase_temporal_attributes(
7233        &mut self,
7234        stats: &mut EnhancedGenerationStatistics,
7235    ) -> SynthResult<
7236        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
7237    > {
7238        if !self.config.temporal_attributes.enabled {
7239            debug!("Phase 27: Skipped (temporal attributes disabled)");
7240            return Ok(Vec::new());
7241        }
7242        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
7243
7244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7246
7247        // Build a TemporalAttributeConfig from the user's config.
7248        // Since Phase 27 is already gated on temporal_attributes.enabled,
7249        // default to enabling version chains so users get actual mutations.
7250        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
7251            || self.config.temporal_attributes.enabled;
7252        let temporal_config = {
7253            let ta = &self.config.temporal_attributes;
7254            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
7255                .enabled(ta.enabled)
7256                .closed_probability(ta.valid_time.closed_probability)
7257                .avg_validity_days(ta.valid_time.avg_validity_days)
7258                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
7259                .with_version_chains(if generate_version_chains {
7260                    ta.avg_versions_per_entity
7261                } else {
7262                    1.0
7263                })
7264                .build()
7265        };
7266        // Apply backdating settings if configured
7267        let temporal_config = if self
7268            .config
7269            .temporal_attributes
7270            .transaction_time
7271            .allow_backdating
7272        {
7273            let mut c = temporal_config;
7274            c.transaction_time.allow_backdating = true;
7275            c.transaction_time.backdating_probability = self
7276                .config
7277                .temporal_attributes
7278                .transaction_time
7279                .backdating_probability;
7280            c.transaction_time.max_backdate_days = self
7281                .config
7282                .temporal_attributes
7283                .transaction_time
7284                .max_backdate_days;
7285            c
7286        } else {
7287            temporal_config
7288        };
7289        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
7290            temporal_config,
7291            self.seed + 130,
7292            start_date,
7293        );
7294
7295        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
7296            self.seed + 130,
7297            datasynth_core::GeneratorType::Vendor,
7298        );
7299
7300        let chains: Vec<_> = self
7301            .master_data
7302            .vendors
7303            .iter()
7304            .map(|vendor| {
7305                let id = uuid_factory.next();
7306                gen.generate_version_chain(vendor.clone(), id)
7307            })
7308            .collect();
7309
7310        stats.temporal_version_chain_count = chains.len();
7311        info!("Temporal version chains generated: {} chains", chains.len());
7312        self.check_resources_with_log("post-temporal-attributes")?;
7313
7314        Ok(chains)
7315    }
7316
7317    /// Phase 28: Build entity relationship graph and cross-process links.
7318    ///
7319    /// Part 1 (gated on `relationship_strength.enabled`): builds an
7320    /// `EntityGraph` from master-data vendor/customer entities and
7321    /// journal-entry-derived transaction summaries.
7322    ///
7323    /// Part 2 (gated on `cross_process_links.enabled`): extracts
7324    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
7325    /// generates inventory-movement cross-process links.
7326    fn phase_entity_relationships(
7327        &self,
7328        journal_entries: &[JournalEntry],
7329        document_flows: &DocumentFlowSnapshot,
7330        stats: &mut EnhancedGenerationStatistics,
7331    ) -> SynthResult<(
7332        Option<datasynth_core::models::EntityGraph>,
7333        Vec<datasynth_core::models::CrossProcessLink>,
7334    )> {
7335        use datasynth_generators::relationships::{
7336            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
7337            TransactionSummary,
7338        };
7339
7340        let rs_enabled = self.config.relationship_strength.enabled;
7341        let cpl_enabled = self.config.cross_process_links.enabled
7342            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
7343
7344        if !rs_enabled && !cpl_enabled {
7345            debug!(
7346                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
7347            );
7348            return Ok((None, Vec::new()));
7349        }
7350
7351        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
7352
7353        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7354            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7355
7356        let company_code = self
7357            .config
7358            .companies
7359            .first()
7360            .map(|c| c.code.as_str())
7361            .unwrap_or("1000");
7362
7363        // Build the generator with matching config flags
7364        let gen_config = EntityGraphConfig {
7365            enabled: rs_enabled,
7366            cross_process: datasynth_generators::relationships::CrossProcessConfig {
7367                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
7368                enable_return_flows: false,
7369                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
7370                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
7371                // Use higher link rate for small datasets to avoid probabilistic empty results
7372                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
7373                    1.0
7374                } else {
7375                    0.30
7376                },
7377                ..Default::default()
7378            },
7379            strength_config: datasynth_generators::relationships::StrengthConfig {
7380                transaction_volume_weight: self
7381                    .config
7382                    .relationship_strength
7383                    .calculation
7384                    .transaction_volume_weight,
7385                transaction_count_weight: self
7386                    .config
7387                    .relationship_strength
7388                    .calculation
7389                    .transaction_count_weight,
7390                duration_weight: self
7391                    .config
7392                    .relationship_strength
7393                    .calculation
7394                    .relationship_duration_weight,
7395                recency_weight: self.config.relationship_strength.calculation.recency_weight,
7396                mutual_connections_weight: self
7397                    .config
7398                    .relationship_strength
7399                    .calculation
7400                    .mutual_connections_weight,
7401                recency_half_life_days: self
7402                    .config
7403                    .relationship_strength
7404                    .calculation
7405                    .recency_half_life_days,
7406            },
7407            ..Default::default()
7408        };
7409
7410        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
7411
7412        // --- Part 1: Entity Relationship Graph ---
7413        let entity_graph = if rs_enabled {
7414            // Build EntitySummary lists from master data
7415            let vendor_summaries: Vec<EntitySummary> = self
7416                .master_data
7417                .vendors
7418                .iter()
7419                .map(|v| {
7420                    EntitySummary::new(
7421                        &v.vendor_id,
7422                        &v.name,
7423                        datasynth_core::models::GraphEntityType::Vendor,
7424                        start_date,
7425                    )
7426                })
7427                .collect();
7428
7429            let customer_summaries: Vec<EntitySummary> = self
7430                .master_data
7431                .customers
7432                .iter()
7433                .map(|c| {
7434                    EntitySummary::new(
7435                        &c.customer_id,
7436                        &c.name,
7437                        datasynth_core::models::GraphEntityType::Customer,
7438                        start_date,
7439                    )
7440                })
7441                .collect();
7442
7443            // Build transaction summaries from journal entries.
7444            // Key = (company_code, trading_partner) for entries that have a
7445            // trading partner.  This captures intercompany flows and any JE
7446            // whose line items carry a trading_partner reference.
7447            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
7448                std::collections::HashMap::new();
7449
7450            for je in journal_entries {
7451                let cc = je.header.company_code.clone();
7452                let posting_date = je.header.posting_date;
7453                for line in &je.lines {
7454                    if let Some(ref tp) = line.trading_partner {
7455                        let amount = if line.debit_amount > line.credit_amount {
7456                            line.debit_amount
7457                        } else {
7458                            line.credit_amount
7459                        };
7460                        let entry = txn_summaries
7461                            .entry((cc.clone(), tp.clone()))
7462                            .or_insert_with(|| TransactionSummary {
7463                                total_volume: rust_decimal::Decimal::ZERO,
7464                                transaction_count: 0,
7465                                first_transaction_date: posting_date,
7466                                last_transaction_date: posting_date,
7467                                related_entities: std::collections::HashSet::new(),
7468                            });
7469                        entry.total_volume += amount;
7470                        entry.transaction_count += 1;
7471                        if posting_date < entry.first_transaction_date {
7472                            entry.first_transaction_date = posting_date;
7473                        }
7474                        if posting_date > entry.last_transaction_date {
7475                            entry.last_transaction_date = posting_date;
7476                        }
7477                        entry.related_entities.insert(cc.clone());
7478                    }
7479                }
7480            }
7481
7482            // Also extract transaction relationships from document flow chains.
7483            // P2P chains: Company → Vendor relationships
7484            for chain in &document_flows.p2p_chains {
7485                let cc = chain.purchase_order.header.company_code.clone();
7486                let vendor_id = chain.purchase_order.vendor_id.clone();
7487                let po_date = chain.purchase_order.header.document_date;
7488                let amount = chain.purchase_order.total_net_amount;
7489
7490                let entry = txn_summaries
7491                    .entry((cc.clone(), vendor_id))
7492                    .or_insert_with(|| TransactionSummary {
7493                        total_volume: rust_decimal::Decimal::ZERO,
7494                        transaction_count: 0,
7495                        first_transaction_date: po_date,
7496                        last_transaction_date: po_date,
7497                        related_entities: std::collections::HashSet::new(),
7498                    });
7499                entry.total_volume += amount;
7500                entry.transaction_count += 1;
7501                if po_date < entry.first_transaction_date {
7502                    entry.first_transaction_date = po_date;
7503                }
7504                if po_date > entry.last_transaction_date {
7505                    entry.last_transaction_date = po_date;
7506                }
7507                entry.related_entities.insert(cc);
7508            }
7509
7510            // O2C chains: Company → Customer relationships
7511            for chain in &document_flows.o2c_chains {
7512                let cc = chain.sales_order.header.company_code.clone();
7513                let customer_id = chain.sales_order.customer_id.clone();
7514                let so_date = chain.sales_order.header.document_date;
7515                let amount = chain.sales_order.total_net_amount;
7516
7517                let entry = txn_summaries
7518                    .entry((cc.clone(), customer_id))
7519                    .or_insert_with(|| TransactionSummary {
7520                        total_volume: rust_decimal::Decimal::ZERO,
7521                        transaction_count: 0,
7522                        first_transaction_date: so_date,
7523                        last_transaction_date: so_date,
7524                        related_entities: std::collections::HashSet::new(),
7525                    });
7526                entry.total_volume += amount;
7527                entry.transaction_count += 1;
7528                if so_date < entry.first_transaction_date {
7529                    entry.first_transaction_date = so_date;
7530                }
7531                if so_date > entry.last_transaction_date {
7532                    entry.last_transaction_date = so_date;
7533                }
7534                entry.related_entities.insert(cc);
7535            }
7536
7537            let as_of_date = journal_entries
7538                .last()
7539                .map(|je| je.header.posting_date)
7540                .unwrap_or(start_date);
7541
7542            let graph = gen.generate_entity_graph(
7543                company_code,
7544                as_of_date,
7545                &vendor_summaries,
7546                &customer_summaries,
7547                &txn_summaries,
7548            );
7549
7550            info!(
7551                "Entity relationship graph: {} nodes, {} edges",
7552                graph.nodes.len(),
7553                graph.edges.len()
7554            );
7555            stats.entity_relationship_node_count = graph.nodes.len();
7556            stats.entity_relationship_edge_count = graph.edges.len();
7557            Some(graph)
7558        } else {
7559            None
7560        };
7561
7562        // --- Part 2: Cross-Process Links ---
7563        let cross_process_links = if cpl_enabled {
7564            // Build GoodsReceiptRef from P2P chains
7565            let gr_refs: Vec<GoodsReceiptRef> = document_flows
7566                .p2p_chains
7567                .iter()
7568                .flat_map(|chain| {
7569                    let vendor_id = chain.purchase_order.vendor_id.clone();
7570                    let cc = chain.purchase_order.header.company_code.clone();
7571                    chain.goods_receipts.iter().flat_map(move |gr| {
7572                        gr.items.iter().filter_map({
7573                            let doc_id = gr.header.document_id.clone();
7574                            let v_id = vendor_id.clone();
7575                            let company = cc.clone();
7576                            let receipt_date = gr.header.document_date;
7577                            move |item| {
7578                                item.base
7579                                    .material_id
7580                                    .as_ref()
7581                                    .map(|mat_id| GoodsReceiptRef {
7582                                        document_id: doc_id.clone(),
7583                                        material_id: mat_id.clone(),
7584                                        quantity: item.base.quantity,
7585                                        receipt_date,
7586                                        vendor_id: v_id.clone(),
7587                                        company_code: company.clone(),
7588                                    })
7589                            }
7590                        })
7591                    })
7592                })
7593                .collect();
7594
7595            // Build DeliveryRef from O2C chains
7596            let del_refs: Vec<DeliveryRef> = document_flows
7597                .o2c_chains
7598                .iter()
7599                .flat_map(|chain| {
7600                    let customer_id = chain.sales_order.customer_id.clone();
7601                    let cc = chain.sales_order.header.company_code.clone();
7602                    chain.deliveries.iter().flat_map(move |del| {
7603                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
7604                        del.items.iter().filter_map({
7605                            let doc_id = del.header.document_id.clone();
7606                            let c_id = customer_id.clone();
7607                            let company = cc.clone();
7608                            move |item| {
7609                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
7610                                    document_id: doc_id.clone(),
7611                                    material_id: mat_id.clone(),
7612                                    quantity: item.base.quantity,
7613                                    delivery_date,
7614                                    customer_id: c_id.clone(),
7615                                    company_code: company.clone(),
7616                                })
7617                            }
7618                        })
7619                    })
7620                })
7621                .collect();
7622
7623            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
7624            info!("Cross-process links generated: {} links", links.len());
7625            stats.cross_process_link_count = links.len();
7626            links
7627        } else {
7628            Vec::new()
7629        };
7630
7631        self.check_resources_with_log("post-entity-relationships")?;
7632        Ok((entity_graph, cross_process_links))
7633    }
7634
7635    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
7636    fn phase_industry_data(
7637        &self,
7638        stats: &mut EnhancedGenerationStatistics,
7639    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
7640        if !self.config.industry_specific.enabled {
7641            return None;
7642        }
7643        info!("Phase 29: Generating industry-specific data");
7644        let output = datasynth_generators::industry::factory::generate_industry_output(
7645            self.config.global.industry,
7646        );
7647        stats.industry_gl_account_count = output.gl_accounts.len();
7648        info!(
7649            "Industry data generated: {} GL accounts for {:?}",
7650            output.gl_accounts.len(),
7651            self.config.global.industry
7652        );
7653        Some(output)
7654    }
7655
7656    /// Phase 3b: Generate opening balances for each company.
7657    fn phase_opening_balances(
7658        &mut self,
7659        coa: &Arc<ChartOfAccounts>,
7660        stats: &mut EnhancedGenerationStatistics,
7661    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
7662        if !self.config.balance.generate_opening_balances {
7663            debug!("Phase 3b: Skipped (opening balance generation disabled)");
7664            return Ok(Vec::new());
7665        }
7666        info!("Phase 3b: Generating Opening Balances");
7667
7668        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7669            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7670        let fiscal_year = start_date.year();
7671
7672        let industry = match self.config.global.industry {
7673            IndustrySector::Manufacturing => IndustryType::Manufacturing,
7674            IndustrySector::Retail => IndustryType::Retail,
7675            IndustrySector::FinancialServices => IndustryType::Financial,
7676            IndustrySector::Healthcare => IndustryType::Healthcare,
7677            IndustrySector::Technology => IndustryType::Technology,
7678            _ => IndustryType::Manufacturing,
7679        };
7680
7681        let config = datasynth_generators::OpeningBalanceConfig {
7682            industry,
7683            ..Default::default()
7684        };
7685        let mut gen =
7686            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
7687
7688        let mut results = Vec::new();
7689        for company in &self.config.companies {
7690            let spec = OpeningBalanceSpec::new(
7691                company.code.clone(),
7692                start_date,
7693                fiscal_year,
7694                company.currency.clone(),
7695                rust_decimal::Decimal::new(10_000_000, 0),
7696                industry,
7697            );
7698            let ob = gen.generate(&spec, coa, start_date, &company.code);
7699            results.push(ob);
7700        }
7701
7702        stats.opening_balance_count = results.len();
7703        info!("Opening balances generated: {} companies", results.len());
7704        self.check_resources_with_log("post-opening-balances")?;
7705
7706        Ok(results)
7707    }
7708
7709    /// Phase 9b: Reconcile GL control accounts to subledger balances.
7710    fn phase_subledger_reconciliation(
7711        &mut self,
7712        subledger: &SubledgerSnapshot,
7713        entries: &[JournalEntry],
7714        stats: &mut EnhancedGenerationStatistics,
7715    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
7716        if !self.config.balance.reconcile_subledgers {
7717            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
7718            return Ok(Vec::new());
7719        }
7720        info!("Phase 9b: Reconciling GL to subledger balances");
7721
7722        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7723            .map(|d| d + chrono::Months::new(self.config.global.period_months))
7724            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7725
7726        // Build GL balance map from journal entries using a balance tracker
7727        let tracker_config = BalanceTrackerConfig {
7728            validate_on_each_entry: false,
7729            track_history: false,
7730            fail_on_validation_error: false,
7731            ..Default::default()
7732        };
7733        let recon_currency = self
7734            .config
7735            .companies
7736            .first()
7737            .map(|c| c.currency.clone())
7738            .unwrap_or_else(|| "USD".to_string());
7739        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
7740        let validation_errors = tracker.apply_entries(entries);
7741        if !validation_errors.is_empty() {
7742            warn!(
7743                error_count = validation_errors.len(),
7744                "Balance tracker encountered validation errors during subledger reconciliation"
7745            );
7746            for err in &validation_errors {
7747                debug!("Balance validation error: {:?}", err);
7748            }
7749        }
7750
7751        let mut engine = datasynth_generators::ReconciliationEngine::new(
7752            datasynth_generators::ReconciliationConfig::default(),
7753        );
7754
7755        let mut results = Vec::new();
7756        let company_code = self
7757            .config
7758            .companies
7759            .first()
7760            .map(|c| c.code.as_str())
7761            .unwrap_or("1000");
7762
7763        // Reconcile AR
7764        if !subledger.ar_invoices.is_empty() {
7765            let gl_balance = tracker
7766                .get_account_balance(
7767                    company_code,
7768                    datasynth_core::accounts::control_accounts::AR_CONTROL,
7769                )
7770                .map(|b| b.closing_balance)
7771                .unwrap_or_default();
7772            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
7773            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
7774        }
7775
7776        // Reconcile AP
7777        if !subledger.ap_invoices.is_empty() {
7778            let gl_balance = tracker
7779                .get_account_balance(
7780                    company_code,
7781                    datasynth_core::accounts::control_accounts::AP_CONTROL,
7782                )
7783                .map(|b| b.closing_balance)
7784                .unwrap_or_default();
7785            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
7786            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
7787        }
7788
7789        // Reconcile FA
7790        if !subledger.fa_records.is_empty() {
7791            let gl_asset_balance = tracker
7792                .get_account_balance(
7793                    company_code,
7794                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
7795                )
7796                .map(|b| b.closing_balance)
7797                .unwrap_or_default();
7798            let gl_accum_depr_balance = tracker
7799                .get_account_balance(
7800                    company_code,
7801                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
7802                )
7803                .map(|b| b.closing_balance)
7804                .unwrap_or_default();
7805            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
7806                subledger.fa_records.iter().collect();
7807            let (asset_recon, depr_recon) = engine.reconcile_fa(
7808                company_code,
7809                end_date,
7810                gl_asset_balance,
7811                gl_accum_depr_balance,
7812                &fa_refs,
7813            );
7814            results.push(asset_recon);
7815            results.push(depr_recon);
7816        }
7817
7818        // Reconcile Inventory
7819        if !subledger.inventory_positions.is_empty() {
7820            let gl_balance = tracker
7821                .get_account_balance(
7822                    company_code,
7823                    datasynth_core::accounts::control_accounts::INVENTORY,
7824                )
7825                .map(|b| b.closing_balance)
7826                .unwrap_or_default();
7827            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
7828                subledger.inventory_positions.iter().collect();
7829            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
7830        }
7831
7832        stats.subledger_reconciliation_count = results.len();
7833        info!(
7834            "Subledger reconciliation complete: {} reconciliations",
7835            results.len()
7836        );
7837        self.check_resources_with_log("post-subledger-reconciliation")?;
7838
7839        Ok(results)
7840    }
7841
7842    /// Generate the chart of accounts.
7843    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
7844        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
7845
7846        let coa_framework = self.resolve_coa_framework();
7847
7848        let mut gen = ChartOfAccountsGenerator::new(
7849            self.config.chart_of_accounts.complexity,
7850            self.config.global.industry,
7851            self.seed,
7852        )
7853        .with_coa_framework(coa_framework);
7854
7855        let coa = Arc::new(gen.generate());
7856        self.coa = Some(Arc::clone(&coa));
7857
7858        if let Some(pb) = pb {
7859            pb.finish_with_message("Chart of Accounts complete");
7860        }
7861
7862        Ok(coa)
7863    }
7864
7865    /// Generate master data entities.
7866    fn generate_master_data(&mut self) -> SynthResult<()> {
7867        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7868            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7869        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7870
7871        let total = self.config.companies.len() as u64 * 5; // 5 entity types
7872        let pb = self.create_progress_bar(total, "Generating Master Data");
7873
7874        // Resolve country pack once for all companies (uses primary company's country)
7875        let pack = self.primary_pack().clone();
7876
7877        // Capture config values needed inside the parallel closure
7878        let vendors_per_company = self.phase_config.vendors_per_company;
7879        let customers_per_company = self.phase_config.customers_per_company;
7880        let materials_per_company = self.phase_config.materials_per_company;
7881        let assets_per_company = self.phase_config.assets_per_company;
7882        let coa_framework = self.resolve_coa_framework();
7883
7884        // Generate all master data in parallel across companies.
7885        // Each company's data is independent, making this embarrassingly parallel.
7886        let per_company_results: Vec<_> = self
7887            .config
7888            .companies
7889            .par_iter()
7890            .enumerate()
7891            .map(|(i, company)| {
7892                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
7893                let pack = pack.clone();
7894
7895                // Generate vendors (offset counter so IDs are globally unique across companies)
7896                let mut vendor_gen = VendorGenerator::new(company_seed);
7897                vendor_gen.set_country_pack(pack.clone());
7898                vendor_gen.set_coa_framework(coa_framework);
7899                vendor_gen.set_counter_offset(i * vendors_per_company);
7900                let vendor_pool =
7901                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
7902
7903                // Generate customers (offset counter so IDs are globally unique across companies)
7904                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
7905                customer_gen.set_country_pack(pack.clone());
7906                customer_gen.set_coa_framework(coa_framework);
7907                customer_gen.set_counter_offset(i * customers_per_company);
7908                let customer_pool = customer_gen.generate_customer_pool(
7909                    customers_per_company,
7910                    &company.code,
7911                    start_date,
7912                );
7913
7914                // Generate materials (offset counter so IDs are globally unique across companies)
7915                let mut material_gen = MaterialGenerator::new(company_seed + 200);
7916                material_gen.set_country_pack(pack.clone());
7917                material_gen.set_counter_offset(i * materials_per_company);
7918                let material_pool = material_gen.generate_material_pool(
7919                    materials_per_company,
7920                    &company.code,
7921                    start_date,
7922                );
7923
7924                // Generate fixed assets
7925                let mut asset_gen = AssetGenerator::new(company_seed + 300);
7926                let asset_pool = asset_gen.generate_asset_pool(
7927                    assets_per_company,
7928                    &company.code,
7929                    (start_date, end_date),
7930                );
7931
7932                // Generate employees
7933                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
7934                employee_gen.set_country_pack(pack);
7935                let employee_pool =
7936                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
7937
7938                (
7939                    vendor_pool.vendors,
7940                    customer_pool.customers,
7941                    material_pool.materials,
7942                    asset_pool.assets,
7943                    employee_pool.employees,
7944                )
7945            })
7946            .collect();
7947
7948        // Aggregate results from all companies
7949        for (vendors, customers, materials, assets, employees) in per_company_results {
7950            self.master_data.vendors.extend(vendors);
7951            self.master_data.customers.extend(customers);
7952            self.master_data.materials.extend(materials);
7953            self.master_data.assets.extend(assets);
7954            self.master_data.employees.extend(employees);
7955        }
7956
7957        if let Some(pb) = &pb {
7958            pb.inc(total);
7959        }
7960        if let Some(pb) = pb {
7961            pb.finish_with_message("Master data generation complete");
7962        }
7963
7964        Ok(())
7965    }
7966
7967    /// Generate document flows (P2P and O2C).
7968    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
7969        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7970            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7971
7972        // Generate P2P chains
7973        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
7974        let months = (self.config.global.period_months as usize).max(1);
7975        let p2p_count = self
7976            .phase_config
7977            .p2p_chains
7978            .min(self.master_data.vendors.len() * 2 * months);
7979        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
7980
7981        // Convert P2P config from schema to generator config
7982        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
7983        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
7984        p2p_gen.set_country_pack(self.primary_pack().clone());
7985
7986        for i in 0..p2p_count {
7987            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
7988            let materials: Vec<&Material> = self
7989                .master_data
7990                .materials
7991                .iter()
7992                .skip(i % self.master_data.materials.len().max(1))
7993                .take(2.min(self.master_data.materials.len()))
7994                .collect();
7995
7996            if materials.is_empty() {
7997                continue;
7998            }
7999
8000            let company = &self.config.companies[i % self.config.companies.len()];
8001            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
8002            let fiscal_period = po_date.month() as u8;
8003            let created_by = if self.master_data.employees.is_empty() {
8004                "SYSTEM"
8005            } else {
8006                self.master_data.employees[i % self.master_data.employees.len()]
8007                    .user_id
8008                    .as_str()
8009            };
8010
8011            let chain = p2p_gen.generate_chain(
8012                &company.code,
8013                vendor,
8014                &materials,
8015                po_date,
8016                start_date.year() as u16,
8017                fiscal_period,
8018                created_by,
8019            );
8020
8021            // Flatten documents
8022            flows.purchase_orders.push(chain.purchase_order.clone());
8023            flows.goods_receipts.extend(chain.goods_receipts.clone());
8024            if let Some(vi) = &chain.vendor_invoice {
8025                flows.vendor_invoices.push(vi.clone());
8026            }
8027            if let Some(payment) = &chain.payment {
8028                flows.payments.push(payment.clone());
8029            }
8030            for remainder in &chain.remainder_payments {
8031                flows.payments.push(remainder.clone());
8032            }
8033            flows.p2p_chains.push(chain);
8034
8035            if let Some(pb) = &pb {
8036                pb.inc(1);
8037            }
8038        }
8039
8040        if let Some(pb) = pb {
8041            pb.finish_with_message("P2P document flows complete");
8042        }
8043
8044        // Generate O2C chains
8045        // Cap at ~2 SOs per customer per month to keep order volume realistic
8046        let o2c_count = self
8047            .phase_config
8048            .o2c_chains
8049            .min(self.master_data.customers.len() * 2 * months);
8050        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
8051
8052        // Convert O2C config from schema to generator config
8053        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
8054        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
8055        o2c_gen.set_country_pack(self.primary_pack().clone());
8056
8057        for i in 0..o2c_count {
8058            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
8059            let materials: Vec<&Material> = self
8060                .master_data
8061                .materials
8062                .iter()
8063                .skip(i % self.master_data.materials.len().max(1))
8064                .take(2.min(self.master_data.materials.len()))
8065                .collect();
8066
8067            if materials.is_empty() {
8068                continue;
8069            }
8070
8071            let company = &self.config.companies[i % self.config.companies.len()];
8072            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
8073            let fiscal_period = so_date.month() as u8;
8074            let created_by = if self.master_data.employees.is_empty() {
8075                "SYSTEM"
8076            } else {
8077                self.master_data.employees[i % self.master_data.employees.len()]
8078                    .user_id
8079                    .as_str()
8080            };
8081
8082            let chain = o2c_gen.generate_chain(
8083                &company.code,
8084                customer,
8085                &materials,
8086                so_date,
8087                start_date.year() as u16,
8088                fiscal_period,
8089                created_by,
8090            );
8091
8092            // Flatten documents
8093            flows.sales_orders.push(chain.sales_order.clone());
8094            flows.deliveries.extend(chain.deliveries.clone());
8095            if let Some(ci) = &chain.customer_invoice {
8096                flows.customer_invoices.push(ci.clone());
8097            }
8098            if let Some(receipt) = &chain.customer_receipt {
8099                flows.payments.push(receipt.clone());
8100            }
8101            // Extract remainder receipts (follow-up to partial payments)
8102            for receipt in &chain.remainder_receipts {
8103                flows.payments.push(receipt.clone());
8104            }
8105            flows.o2c_chains.push(chain);
8106
8107            if let Some(pb) = &pb {
8108                pb.inc(1);
8109            }
8110        }
8111
8112        if let Some(pb) = pb {
8113            pb.finish_with_message("O2C document flows complete");
8114        }
8115
8116        Ok(())
8117    }
8118
8119    /// Generate journal entries using parallel generation across multiple cores.
8120    fn generate_journal_entries(
8121        &mut self,
8122        coa: &Arc<ChartOfAccounts>,
8123    ) -> SynthResult<Vec<JournalEntry>> {
8124        use datasynth_core::traits::ParallelGenerator;
8125
8126        let total = self.calculate_total_transactions();
8127        let pb = self.create_progress_bar(total, "Generating Journal Entries");
8128
8129        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8130            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8131        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8132
8133        let company_codes: Vec<String> = self
8134            .config
8135            .companies
8136            .iter()
8137            .map(|c| c.code.clone())
8138            .collect();
8139
8140        let generator = JournalEntryGenerator::new_with_params(
8141            self.config.transactions.clone(),
8142            Arc::clone(coa),
8143            company_codes,
8144            start_date,
8145            end_date,
8146            self.seed,
8147        );
8148
8149        // Connect generated master data to ensure JEs reference real entities
8150        // Enable persona-based error injection for realistic human behavior
8151        // Pass fraud configuration for fraud injection
8152        let je_pack = self.primary_pack();
8153
8154        let mut generator = generator
8155            .with_master_data(
8156                &self.master_data.vendors,
8157                &self.master_data.customers,
8158                &self.master_data.materials,
8159            )
8160            .with_country_pack_names(je_pack)
8161            .with_country_pack_temporal(
8162                self.config.temporal_patterns.clone(),
8163                self.seed + 200,
8164                je_pack,
8165            )
8166            .with_persona_errors(true)
8167            .with_fraud_config(self.config.fraud.clone());
8168
8169        // Apply temporal drift if configured
8170        if self.config.temporal.enabled {
8171            let drift_config = self.config.temporal.to_core_config();
8172            generator = generator.with_drift_config(drift_config, self.seed + 100);
8173        }
8174
8175        // Check memory limit at start
8176        self.check_memory_limit()?;
8177
8178        // Determine parallelism: use available cores, but cap at total entries
8179        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
8180
8181        // Use parallel generation for datasets with 10K+ entries.
8182        // Below this threshold, the statistical properties of a single-seeded
8183        // generator (e.g. Benford compliance) are better preserved.
8184        let entries = if total >= 10_000 && num_threads > 1 {
8185            // Parallel path: split the generator across cores and generate in parallel.
8186            // Each sub-generator gets a unique seed for deterministic, independent generation.
8187            let sub_generators = generator.split(num_threads);
8188            let entries_per_thread = total as usize / num_threads;
8189            let remainder = total as usize % num_threads;
8190
8191            let batches: Vec<Vec<JournalEntry>> = sub_generators
8192                .into_par_iter()
8193                .enumerate()
8194                .map(|(i, mut gen)| {
8195                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
8196                    gen.generate_batch(count)
8197                })
8198                .collect();
8199
8200            // Merge all batches into a single Vec
8201            let entries = JournalEntryGenerator::merge_results(batches);
8202
8203            if let Some(pb) = &pb {
8204                pb.inc(total);
8205            }
8206            entries
8207        } else {
8208            // Sequential path for small datasets (< 1000 entries)
8209            let mut entries = Vec::with_capacity(total as usize);
8210            for _ in 0..total {
8211                let entry = generator.generate();
8212                entries.push(entry);
8213                if let Some(pb) = &pb {
8214                    pb.inc(1);
8215                }
8216            }
8217            entries
8218        };
8219
8220        if let Some(pb) = pb {
8221            pb.finish_with_message("Journal entries complete");
8222        }
8223
8224        Ok(entries)
8225    }
8226
8227    /// Generate journal entries from document flows.
8228    ///
8229    /// This creates proper GL entries for each document in the P2P and O2C flows,
8230    /// ensuring that document activity is reflected in the general ledger.
8231    fn generate_jes_from_document_flows(
8232        &mut self,
8233        flows: &DocumentFlowSnapshot,
8234    ) -> SynthResult<Vec<JournalEntry>> {
8235        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
8236        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
8237
8238        let je_config = match self.resolve_coa_framework() {
8239            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
8240            CoAFramework::GermanSkr04 => {
8241                let fa = datasynth_core::FrameworkAccounts::german_gaap();
8242                DocumentFlowJeConfig::from(&fa)
8243            }
8244            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
8245        };
8246
8247        let populate_fec = je_config.populate_fec_fields;
8248        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
8249
8250        // Build auxiliary account lookup from vendor/customer master data so that
8251        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
8252        // PCG "4010001") instead of raw partner IDs.
8253        if populate_fec {
8254            let mut aux_lookup = std::collections::HashMap::new();
8255            for vendor in &self.master_data.vendors {
8256                if let Some(ref aux) = vendor.auxiliary_gl_account {
8257                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
8258                }
8259            }
8260            for customer in &self.master_data.customers {
8261                if let Some(ref aux) = customer.auxiliary_gl_account {
8262                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
8263                }
8264            }
8265            if !aux_lookup.is_empty() {
8266                generator.set_auxiliary_account_lookup(aux_lookup);
8267            }
8268        }
8269
8270        let mut entries = Vec::new();
8271
8272        // Generate JEs from P2P chains
8273        for chain in &flows.p2p_chains {
8274            let chain_entries = generator.generate_from_p2p_chain(chain);
8275            entries.extend(chain_entries);
8276            if let Some(pb) = &pb {
8277                pb.inc(1);
8278            }
8279        }
8280
8281        // Generate JEs from O2C chains
8282        for chain in &flows.o2c_chains {
8283            let chain_entries = generator.generate_from_o2c_chain(chain);
8284            entries.extend(chain_entries);
8285            if let Some(pb) = &pb {
8286                pb.inc(1);
8287            }
8288        }
8289
8290        if let Some(pb) = pb {
8291            pb.finish_with_message(format!(
8292                "Generated {} JEs from document flows",
8293                entries.len()
8294            ));
8295        }
8296
8297        Ok(entries)
8298    }
8299
8300    /// Generate journal entries from payroll runs.
8301    ///
8302    /// Creates one JE per payroll run:
8303    /// - DR Salaries & Wages (6100) for gross pay
8304    /// - CR Payroll Clearing (9100) for gross pay
8305    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
8306        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
8307
8308        let mut jes = Vec::with_capacity(payroll_runs.len());
8309
8310        for run in payroll_runs {
8311            let mut je = JournalEntry::new_simple(
8312                format!("JE-PAYROLL-{}", run.payroll_id),
8313                run.company_code.clone(),
8314                run.run_date,
8315                format!("Payroll {}", run.payroll_id),
8316            );
8317
8318            // Debit Salaries & Wages for gross pay
8319            je.add_line(JournalEntryLine {
8320                line_number: 1,
8321                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
8322                debit_amount: run.total_gross,
8323                reference: Some(run.payroll_id.clone()),
8324                text: Some(format!(
8325                    "Payroll {} ({} employees)",
8326                    run.payroll_id, run.employee_count
8327                )),
8328                ..Default::default()
8329            });
8330
8331            // Credit Payroll Clearing for gross pay
8332            je.add_line(JournalEntryLine {
8333                line_number: 2,
8334                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
8335                credit_amount: run.total_gross,
8336                reference: Some(run.payroll_id.clone()),
8337                ..Default::default()
8338            });
8339
8340            jes.push(je);
8341        }
8342
8343        jes
8344    }
8345
8346    /// Generate journal entries from production orders.
8347    ///
8348    /// Creates one JE per completed production order:
8349    /// - DR Raw Materials (5100) for material consumption (actual_cost)
8350    /// - CR Inventory (1200) for material consumption
8351    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
8352        use datasynth_core::accounts::{control_accounts, expense_accounts};
8353        use datasynth_core::models::ProductionOrderStatus;
8354
8355        let mut jes = Vec::new();
8356
8357        for order in production_orders {
8358            // Only generate JEs for completed or closed orders
8359            if !matches!(
8360                order.status,
8361                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
8362            ) {
8363                continue;
8364            }
8365
8366            let mut je = JournalEntry::new_simple(
8367                format!("JE-MFG-{}", order.order_id),
8368                order.company_code.clone(),
8369                order.actual_end.unwrap_or(order.planned_end),
8370                format!(
8371                    "Production Order {} - {}",
8372                    order.order_id, order.material_description
8373                ),
8374            );
8375
8376            // Debit Raw Materials / Manufacturing expense for actual cost
8377            je.add_line(JournalEntryLine {
8378                line_number: 1,
8379                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
8380                debit_amount: order.actual_cost,
8381                reference: Some(order.order_id.clone()),
8382                text: Some(format!(
8383                    "Material consumption for {}",
8384                    order.material_description
8385                )),
8386                quantity: Some(order.actual_quantity),
8387                unit: Some("EA".to_string()),
8388                ..Default::default()
8389            });
8390
8391            // Credit Inventory for material consumption
8392            je.add_line(JournalEntryLine {
8393                line_number: 2,
8394                gl_account: control_accounts::INVENTORY.to_string(),
8395                credit_amount: order.actual_cost,
8396                reference: Some(order.order_id.clone()),
8397                ..Default::default()
8398            });
8399
8400            jes.push(je);
8401        }
8402
8403        jes
8404    }
8405
8406    /// Link document flows to subledger records.
8407    ///
8408    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
8409    /// ensuring subledger data is coherent with document flow data.
8410    fn link_document_flows_to_subledgers(
8411        &mut self,
8412        flows: &DocumentFlowSnapshot,
8413    ) -> SynthResult<SubledgerSnapshot> {
8414        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
8415        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
8416
8417        // Build vendor/customer name maps from master data for realistic subledger names
8418        let vendor_names: std::collections::HashMap<String, String> = self
8419            .master_data
8420            .vendors
8421            .iter()
8422            .map(|v| (v.vendor_id.clone(), v.name.clone()))
8423            .collect();
8424        let customer_names: std::collections::HashMap<String, String> = self
8425            .master_data
8426            .customers
8427            .iter()
8428            .map(|c| (c.customer_id.clone(), c.name.clone()))
8429            .collect();
8430
8431        let mut linker = DocumentFlowLinker::new()
8432            .with_vendor_names(vendor_names)
8433            .with_customer_names(customer_names);
8434
8435        // Convert vendor invoices to AP invoices
8436        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
8437        if let Some(pb) = &pb {
8438            pb.inc(flows.vendor_invoices.len() as u64);
8439        }
8440
8441        // Convert customer invoices to AR invoices
8442        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
8443        if let Some(pb) = &pb {
8444            pb.inc(flows.customer_invoices.len() as u64);
8445        }
8446
8447        if let Some(pb) = pb {
8448            pb.finish_with_message(format!(
8449                "Linked {} AP and {} AR invoices",
8450                ap_invoices.len(),
8451                ar_invoices.len()
8452            ));
8453        }
8454
8455        Ok(SubledgerSnapshot {
8456            ap_invoices,
8457            ar_invoices,
8458            fa_records: Vec::new(),
8459            inventory_positions: Vec::new(),
8460            inventory_movements: Vec::new(),
8461            // Aging reports are computed after payment settlement in phase_document_flows.
8462            ar_aging_reports: Vec::new(),
8463            ap_aging_reports: Vec::new(),
8464            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
8465            depreciation_runs: Vec::new(),
8466            inventory_valuations: Vec::new(),
8467        })
8468    }
8469
8470    /// Generate OCPM events from document flows.
8471    ///
8472    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
8473    /// capturing the object-centric process perspective.
8474    #[allow(clippy::too_many_arguments)]
8475    fn generate_ocpm_events(
8476        &mut self,
8477        flows: &DocumentFlowSnapshot,
8478        sourcing: &SourcingSnapshot,
8479        hr: &HrSnapshot,
8480        manufacturing: &ManufacturingSnapshot,
8481        banking: &BankingSnapshot,
8482        audit: &AuditSnapshot,
8483        financial_reporting: &FinancialReportingSnapshot,
8484    ) -> SynthResult<OcpmSnapshot> {
8485        let total_chains = flows.p2p_chains.len()
8486            + flows.o2c_chains.len()
8487            + sourcing.sourcing_projects.len()
8488            + hr.payroll_runs.len()
8489            + manufacturing.production_orders.len()
8490            + banking.customers.len()
8491            + audit.engagements.len()
8492            + financial_reporting.bank_reconciliations.len();
8493        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
8494
8495        // Create OCPM event log with standard types
8496        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
8497        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
8498
8499        // Configure the OCPM generator
8500        let ocpm_config = OcpmGeneratorConfig {
8501            generate_p2p: true,
8502            generate_o2c: true,
8503            generate_s2c: !sourcing.sourcing_projects.is_empty(),
8504            generate_h2r: !hr.payroll_runs.is_empty(),
8505            generate_mfg: !manufacturing.production_orders.is_empty(),
8506            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
8507            generate_bank: !banking.customers.is_empty(),
8508            generate_audit: !audit.engagements.is_empty(),
8509            happy_path_rate: 0.75,
8510            exception_path_rate: 0.20,
8511            error_path_rate: 0.05,
8512            add_duration_variability: true,
8513            duration_std_dev_factor: 0.3,
8514        };
8515        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
8516        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
8517
8518        // Get available users for resource assignment
8519        let available_users: Vec<String> = self
8520            .master_data
8521            .employees
8522            .iter()
8523            .take(20)
8524            .map(|e| e.user_id.clone())
8525            .collect();
8526
8527        // Deterministic base date from config (avoids Utc::now() non-determinism)
8528        let fallback_date =
8529            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
8530        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8531            .unwrap_or(fallback_date);
8532        let base_midnight = base_date
8533            .and_hms_opt(0, 0, 0)
8534            .expect("midnight is always valid");
8535        let base_datetime =
8536            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
8537
8538        // Helper closure to add case results to event log
8539        let add_result = |event_log: &mut OcpmEventLog,
8540                          result: datasynth_ocpm::CaseGenerationResult| {
8541            for event in result.events {
8542                event_log.add_event(event);
8543            }
8544            for object in result.objects {
8545                event_log.add_object(object);
8546            }
8547            for relationship in result.relationships {
8548                event_log.add_relationship(relationship);
8549            }
8550            for corr in result.correlation_events {
8551                event_log.add_correlation_event(corr);
8552            }
8553            event_log.add_case(result.case_trace);
8554        };
8555
8556        // Generate events from P2P chains
8557        for chain in &flows.p2p_chains {
8558            let po = &chain.purchase_order;
8559            let documents = P2pDocuments::new(
8560                &po.header.document_id,
8561                &po.vendor_id,
8562                &po.header.company_code,
8563                po.total_net_amount,
8564                &po.header.currency,
8565                &ocpm_uuid_factory,
8566            )
8567            .with_goods_receipt(
8568                chain
8569                    .goods_receipts
8570                    .first()
8571                    .map(|gr| gr.header.document_id.as_str())
8572                    .unwrap_or(""),
8573                &ocpm_uuid_factory,
8574            )
8575            .with_invoice(
8576                chain
8577                    .vendor_invoice
8578                    .as_ref()
8579                    .map(|vi| vi.header.document_id.as_str())
8580                    .unwrap_or(""),
8581                &ocpm_uuid_factory,
8582            )
8583            .with_payment(
8584                chain
8585                    .payment
8586                    .as_ref()
8587                    .map(|p| p.header.document_id.as_str())
8588                    .unwrap_or(""),
8589                &ocpm_uuid_factory,
8590            );
8591
8592            let start_time =
8593                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
8594            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
8595            add_result(&mut event_log, result);
8596
8597            if let Some(pb) = &pb {
8598                pb.inc(1);
8599            }
8600        }
8601
8602        // Generate events from O2C chains
8603        for chain in &flows.o2c_chains {
8604            let so = &chain.sales_order;
8605            let documents = O2cDocuments::new(
8606                &so.header.document_id,
8607                &so.customer_id,
8608                &so.header.company_code,
8609                so.total_net_amount,
8610                &so.header.currency,
8611                &ocpm_uuid_factory,
8612            )
8613            .with_delivery(
8614                chain
8615                    .deliveries
8616                    .first()
8617                    .map(|d| d.header.document_id.as_str())
8618                    .unwrap_or(""),
8619                &ocpm_uuid_factory,
8620            )
8621            .with_invoice(
8622                chain
8623                    .customer_invoice
8624                    .as_ref()
8625                    .map(|ci| ci.header.document_id.as_str())
8626                    .unwrap_or(""),
8627                &ocpm_uuid_factory,
8628            )
8629            .with_receipt(
8630                chain
8631                    .customer_receipt
8632                    .as_ref()
8633                    .map(|r| r.header.document_id.as_str())
8634                    .unwrap_or(""),
8635                &ocpm_uuid_factory,
8636            );
8637
8638            let start_time =
8639                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
8640            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
8641            add_result(&mut event_log, result);
8642
8643            if let Some(pb) = &pb {
8644                pb.inc(1);
8645            }
8646        }
8647
8648        // Generate events from S2C sourcing projects
8649        for project in &sourcing.sourcing_projects {
8650            // Find vendor from contracts or qualifications
8651            let vendor_id = sourcing
8652                .contracts
8653                .iter()
8654                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
8655                .map(|c| c.vendor_id.clone())
8656                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
8657                .or_else(|| {
8658                    self.master_data
8659                        .vendors
8660                        .first()
8661                        .map(|v| v.vendor_id.clone())
8662                })
8663                .unwrap_or_else(|| "V000".to_string());
8664            let mut docs = S2cDocuments::new(
8665                &project.project_id,
8666                &vendor_id,
8667                &project.company_code,
8668                project.estimated_annual_spend,
8669                &ocpm_uuid_factory,
8670            );
8671            // Link RFx if available
8672            if let Some(rfx) = sourcing
8673                .rfx_events
8674                .iter()
8675                .find(|r| r.sourcing_project_id == project.project_id)
8676            {
8677                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
8678                // Link winning bid (status == Accepted)
8679                if let Some(bid) = sourcing.bids.iter().find(|b| {
8680                    b.rfx_id == rfx.rfx_id
8681                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
8682                }) {
8683                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
8684                }
8685            }
8686            // Link contract
8687            if let Some(contract) = sourcing
8688                .contracts
8689                .iter()
8690                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
8691            {
8692                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
8693            }
8694            let start_time = base_datetime - chrono::Duration::days(90);
8695            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
8696            add_result(&mut event_log, result);
8697
8698            if let Some(pb) = &pb {
8699                pb.inc(1);
8700            }
8701        }
8702
8703        // Generate events from H2R payroll runs
8704        for run in &hr.payroll_runs {
8705            // Use first matching payroll line item's employee, or fallback
8706            let employee_id = hr
8707                .payroll_line_items
8708                .iter()
8709                .find(|li| li.payroll_id == run.payroll_id)
8710                .map(|li| li.employee_id.as_str())
8711                .unwrap_or("EMP000");
8712            let docs = H2rDocuments::new(
8713                &run.payroll_id,
8714                employee_id,
8715                &run.company_code,
8716                run.total_gross,
8717                &ocpm_uuid_factory,
8718            )
8719            .with_time_entries(
8720                hr.time_entries
8721                    .iter()
8722                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
8723                    .take(5)
8724                    .map(|t| t.entry_id.as_str())
8725                    .collect(),
8726            );
8727            let start_time = base_datetime - chrono::Duration::days(30);
8728            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
8729            add_result(&mut event_log, result);
8730
8731            if let Some(pb) = &pb {
8732                pb.inc(1);
8733            }
8734        }
8735
8736        // Generate events from MFG production orders
8737        for order in &manufacturing.production_orders {
8738            let mut docs = MfgDocuments::new(
8739                &order.order_id,
8740                &order.material_id,
8741                &order.company_code,
8742                order.planned_quantity,
8743                &ocpm_uuid_factory,
8744            )
8745            .with_operations(
8746                order
8747                    .operations
8748                    .iter()
8749                    .map(|o| format!("OP-{:04}", o.operation_number))
8750                    .collect::<Vec<_>>()
8751                    .iter()
8752                    .map(std::string::String::as_str)
8753                    .collect(),
8754            );
8755            // Link quality inspection if available (via reference_id matching order_id)
8756            if let Some(insp) = manufacturing
8757                .quality_inspections
8758                .iter()
8759                .find(|i| i.reference_id == order.order_id)
8760            {
8761                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
8762            }
8763            // Link cycle count if available (match by material_id in items)
8764            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
8765                cc.items
8766                    .iter()
8767                    .any(|item| item.material_id == order.material_id)
8768            }) {
8769                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
8770            }
8771            let start_time = base_datetime - chrono::Duration::days(60);
8772            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
8773            add_result(&mut event_log, result);
8774
8775            if let Some(pb) = &pb {
8776                pb.inc(1);
8777            }
8778        }
8779
8780        // Generate events from Banking customers
8781        for customer in &banking.customers {
8782            let customer_id_str = customer.customer_id.to_string();
8783            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
8784            // Link accounts (primary_owner_id matches customer_id)
8785            if let Some(account) = banking
8786                .accounts
8787                .iter()
8788                .find(|a| a.primary_owner_id == customer.customer_id)
8789            {
8790                let account_id_str = account.account_id.to_string();
8791                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
8792                // Link transactions for this account
8793                let txn_strs: Vec<String> = banking
8794                    .transactions
8795                    .iter()
8796                    .filter(|t| t.account_id == account.account_id)
8797                    .take(10)
8798                    .map(|t| t.transaction_id.to_string())
8799                    .collect();
8800                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
8801                let txn_amounts: Vec<rust_decimal::Decimal> = banking
8802                    .transactions
8803                    .iter()
8804                    .filter(|t| t.account_id == account.account_id)
8805                    .take(10)
8806                    .map(|t| t.amount)
8807                    .collect();
8808                if !txn_ids.is_empty() {
8809                    docs = docs.with_transactions(txn_ids, txn_amounts);
8810                }
8811            }
8812            let start_time = base_datetime - chrono::Duration::days(180);
8813            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
8814            add_result(&mut event_log, result);
8815
8816            if let Some(pb) = &pb {
8817                pb.inc(1);
8818            }
8819        }
8820
8821        // Generate events from Audit engagements
8822        for engagement in &audit.engagements {
8823            let engagement_id_str = engagement.engagement_id.to_string();
8824            let docs = AuditDocuments::new(
8825                &engagement_id_str,
8826                &engagement.client_entity_id,
8827                &ocpm_uuid_factory,
8828            )
8829            .with_workpapers(
8830                audit
8831                    .workpapers
8832                    .iter()
8833                    .filter(|w| w.engagement_id == engagement.engagement_id)
8834                    .take(10)
8835                    .map(|w| w.workpaper_id.to_string())
8836                    .collect::<Vec<_>>()
8837                    .iter()
8838                    .map(std::string::String::as_str)
8839                    .collect(),
8840            )
8841            .with_evidence(
8842                audit
8843                    .evidence
8844                    .iter()
8845                    .filter(|e| e.engagement_id == engagement.engagement_id)
8846                    .take(10)
8847                    .map(|e| e.evidence_id.to_string())
8848                    .collect::<Vec<_>>()
8849                    .iter()
8850                    .map(std::string::String::as_str)
8851                    .collect(),
8852            )
8853            .with_risks(
8854                audit
8855                    .risk_assessments
8856                    .iter()
8857                    .filter(|r| r.engagement_id == engagement.engagement_id)
8858                    .take(5)
8859                    .map(|r| r.risk_id.to_string())
8860                    .collect::<Vec<_>>()
8861                    .iter()
8862                    .map(std::string::String::as_str)
8863                    .collect(),
8864            )
8865            .with_findings(
8866                audit
8867                    .findings
8868                    .iter()
8869                    .filter(|f| f.engagement_id == engagement.engagement_id)
8870                    .take(5)
8871                    .map(|f| f.finding_id.to_string())
8872                    .collect::<Vec<_>>()
8873                    .iter()
8874                    .map(std::string::String::as_str)
8875                    .collect(),
8876            )
8877            .with_judgments(
8878                audit
8879                    .judgments
8880                    .iter()
8881                    .filter(|j| j.engagement_id == engagement.engagement_id)
8882                    .take(5)
8883                    .map(|j| j.judgment_id.to_string())
8884                    .collect::<Vec<_>>()
8885                    .iter()
8886                    .map(std::string::String::as_str)
8887                    .collect(),
8888            );
8889            let start_time = base_datetime - chrono::Duration::days(120);
8890            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
8891            add_result(&mut event_log, result);
8892
8893            if let Some(pb) = &pb {
8894                pb.inc(1);
8895            }
8896        }
8897
8898        // Generate events from Bank Reconciliations
8899        for recon in &financial_reporting.bank_reconciliations {
8900            let docs = BankReconDocuments::new(
8901                &recon.reconciliation_id,
8902                &recon.bank_account_id,
8903                &recon.company_code,
8904                recon.bank_ending_balance,
8905                &ocpm_uuid_factory,
8906            )
8907            .with_statement_lines(
8908                recon
8909                    .statement_lines
8910                    .iter()
8911                    .take(20)
8912                    .map(|l| l.line_id.as_str())
8913                    .collect(),
8914            )
8915            .with_reconciling_items(
8916                recon
8917                    .reconciling_items
8918                    .iter()
8919                    .take(10)
8920                    .map(|i| i.item_id.as_str())
8921                    .collect(),
8922            );
8923            let start_time = base_datetime - chrono::Duration::days(30);
8924            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
8925            add_result(&mut event_log, result);
8926
8927            if let Some(pb) = &pb {
8928                pb.inc(1);
8929            }
8930        }
8931
8932        // Compute process variants
8933        event_log.compute_variants();
8934
8935        let summary = event_log.summary();
8936
8937        if let Some(pb) = pb {
8938            pb.finish_with_message(format!(
8939                "Generated {} OCPM events, {} objects",
8940                summary.event_count, summary.object_count
8941            ));
8942        }
8943
8944        Ok(OcpmSnapshot {
8945            event_count: summary.event_count,
8946            object_count: summary.object_count,
8947            case_count: summary.case_count,
8948            event_log: Some(event_log),
8949        })
8950    }
8951
8952    /// Inject anomalies into journal entries.
8953    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
8954        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
8955
8956        // Read anomaly rates from config instead of using hardcoded values.
8957        // Priority: anomaly_injection config > fraud config > default 0.02
8958        let total_rate = if self.config.anomaly_injection.enabled {
8959            self.config.anomaly_injection.rates.total_rate
8960        } else if self.config.fraud.enabled {
8961            self.config.fraud.fraud_rate
8962        } else {
8963            0.02
8964        };
8965
8966        let fraud_rate = if self.config.anomaly_injection.enabled {
8967            self.config.anomaly_injection.rates.fraud_rate
8968        } else {
8969            AnomalyRateConfig::default().fraud_rate
8970        };
8971
8972        let error_rate = if self.config.anomaly_injection.enabled {
8973            self.config.anomaly_injection.rates.error_rate
8974        } else {
8975            AnomalyRateConfig::default().error_rate
8976        };
8977
8978        let process_issue_rate = if self.config.anomaly_injection.enabled {
8979            self.config.anomaly_injection.rates.process_rate
8980        } else {
8981            AnomalyRateConfig::default().process_issue_rate
8982        };
8983
8984        let anomaly_config = AnomalyInjectorConfig {
8985            rates: AnomalyRateConfig {
8986                total_rate,
8987                fraud_rate,
8988                error_rate,
8989                process_issue_rate,
8990                ..Default::default()
8991            },
8992            seed: self.seed + 5000,
8993            ..Default::default()
8994        };
8995
8996        let mut injector = AnomalyInjector::new(anomaly_config);
8997        let result = injector.process_entries(entries);
8998
8999        if let Some(pb) = &pb {
9000            pb.inc(entries.len() as u64);
9001            pb.finish_with_message("Anomaly injection complete");
9002        }
9003
9004        let mut by_type = HashMap::new();
9005        for label in &result.labels {
9006            *by_type
9007                .entry(format!("{:?}", label.anomaly_type))
9008                .or_insert(0) += 1;
9009        }
9010
9011        Ok(AnomalyLabels {
9012            labels: result.labels,
9013            summary: Some(result.summary),
9014            by_type,
9015        })
9016    }
9017
9018    /// Validate journal entries using running balance tracker.
9019    ///
9020    /// Applies all entries to the balance tracker and validates:
9021    /// - Each entry is internally balanced (debits = credits)
9022    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
9023    ///
9024    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
9025    /// excluded from balance validation as they may be intentionally unbalanced.
9026    fn validate_journal_entries(
9027        &mut self,
9028        entries: &[JournalEntry],
9029    ) -> SynthResult<BalanceValidationResult> {
9030        // Filter out entries with human errors as they may be intentionally unbalanced
9031        let clean_entries: Vec<&JournalEntry> = entries
9032            .iter()
9033            .filter(|e| {
9034                e.header
9035                    .header_text
9036                    .as_ref()
9037                    .map(|t| !t.contains("[HUMAN_ERROR:"))
9038                    .unwrap_or(true)
9039            })
9040            .collect();
9041
9042        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
9043
9044        // Configure tracker to not fail on errors (collect them instead)
9045        let config = BalanceTrackerConfig {
9046            validate_on_each_entry: false,   // We'll validate at the end
9047            track_history: false,            // Skip history for performance
9048            fail_on_validation_error: false, // Collect errors, don't fail
9049            ..Default::default()
9050        };
9051        let validation_currency = self
9052            .config
9053            .companies
9054            .first()
9055            .map(|c| c.currency.clone())
9056            .unwrap_or_else(|| "USD".to_string());
9057
9058        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
9059
9060        // Apply clean entries (without human errors)
9061        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
9062        let errors = tracker.apply_entries(&clean_refs);
9063
9064        if let Some(pb) = &pb {
9065            pb.inc(entries.len() as u64);
9066        }
9067
9068        // Check if any entries were unbalanced
9069        // Note: When fail_on_validation_error is false, errors are stored in tracker
9070        let has_unbalanced = tracker
9071            .get_validation_errors()
9072            .iter()
9073            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
9074
9075        // Validate balance sheet for each company
9076        // Include both returned errors and collected validation errors
9077        let mut all_errors = errors;
9078        all_errors.extend(tracker.get_validation_errors().iter().cloned());
9079        let company_codes: Vec<String> = self
9080            .config
9081            .companies
9082            .iter()
9083            .map(|c| c.code.clone())
9084            .collect();
9085
9086        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9087            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9088            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9089
9090        for company_code in &company_codes {
9091            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
9092                all_errors.push(e);
9093            }
9094        }
9095
9096        // Get statistics after all mutable operations are done
9097        let stats = tracker.get_statistics();
9098
9099        // Determine if balanced overall
9100        let is_balanced = all_errors.is_empty();
9101
9102        if let Some(pb) = pb {
9103            let msg = if is_balanced {
9104                "Balance validation passed"
9105            } else {
9106                "Balance validation completed with errors"
9107            };
9108            pb.finish_with_message(msg);
9109        }
9110
9111        Ok(BalanceValidationResult {
9112            validated: true,
9113            is_balanced,
9114            entries_processed: stats.entries_processed,
9115            total_debits: stats.total_debits,
9116            total_credits: stats.total_credits,
9117            accounts_tracked: stats.accounts_tracked,
9118            companies_tracked: stats.companies_tracked,
9119            validation_errors: all_errors,
9120            has_unbalanced_entries: has_unbalanced,
9121        })
9122    }
9123
9124    /// Inject data quality variations into journal entries.
9125    ///
9126    /// Applies typos, missing values, and format variations to make
9127    /// the synthetic data more realistic for testing data cleaning pipelines.
9128    fn inject_data_quality(
9129        &mut self,
9130        entries: &mut [JournalEntry],
9131    ) -> SynthResult<DataQualityStats> {
9132        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
9133
9134        // Build config from user-specified schema settings when data_quality is enabled;
9135        // otherwise fall back to the low-rate minimal() preset.
9136        let config = if self.config.data_quality.enabled {
9137            let dq = &self.config.data_quality;
9138            DataQualityConfig {
9139                enable_missing_values: dq.missing_values.enabled,
9140                missing_values: datasynth_generators::MissingValueConfig {
9141                    global_rate: dq.effective_missing_rate(),
9142                    ..Default::default()
9143                },
9144                enable_format_variations: dq.format_variations.enabled,
9145                format_variations: datasynth_generators::FormatVariationConfig {
9146                    date_variation_rate: dq.format_variations.dates.rate,
9147                    amount_variation_rate: dq.format_variations.amounts.rate,
9148                    identifier_variation_rate: dq.format_variations.identifiers.rate,
9149                    ..Default::default()
9150                },
9151                enable_duplicates: dq.duplicates.enabled,
9152                duplicates: datasynth_generators::DuplicateConfig {
9153                    duplicate_rate: dq.effective_duplicate_rate(),
9154                    ..Default::default()
9155                },
9156                enable_typos: dq.typos.enabled,
9157                typos: datasynth_generators::TypoConfig {
9158                    char_error_rate: dq.effective_typo_rate(),
9159                    ..Default::default()
9160                },
9161                enable_encoding_issues: dq.encoding_issues.enabled,
9162                encoding_issue_rate: dq.encoding_issues.rate,
9163                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
9164                track_statistics: true,
9165            }
9166        } else {
9167            DataQualityConfig::minimal()
9168        };
9169        let mut injector = DataQualityInjector::new(config);
9170
9171        // Wire country pack for locale-aware format baselines
9172        injector.set_country_pack(self.primary_pack().clone());
9173
9174        // Build context for missing value decisions
9175        let context = HashMap::new();
9176
9177        for entry in entries.iter_mut() {
9178            // Process header_text field (common target for typos)
9179            if let Some(text) = &entry.header.header_text {
9180                let processed = injector.process_text_field(
9181                    "header_text",
9182                    text,
9183                    &entry.header.document_id.to_string(),
9184                    &context,
9185                );
9186                match processed {
9187                    Some(new_text) if new_text != *text => {
9188                        entry.header.header_text = Some(new_text);
9189                    }
9190                    None => {
9191                        entry.header.header_text = None; // Missing value
9192                    }
9193                    _ => {}
9194                }
9195            }
9196
9197            // Process reference field
9198            if let Some(ref_text) = &entry.header.reference {
9199                let processed = injector.process_text_field(
9200                    "reference",
9201                    ref_text,
9202                    &entry.header.document_id.to_string(),
9203                    &context,
9204                );
9205                match processed {
9206                    Some(new_text) if new_text != *ref_text => {
9207                        entry.header.reference = Some(new_text);
9208                    }
9209                    None => {
9210                        entry.header.reference = None;
9211                    }
9212                    _ => {}
9213                }
9214            }
9215
9216            // Process user_persona field (potential for typos in user IDs)
9217            let user_persona = entry.header.user_persona.clone();
9218            if let Some(processed) = injector.process_text_field(
9219                "user_persona",
9220                &user_persona,
9221                &entry.header.document_id.to_string(),
9222                &context,
9223            ) {
9224                if processed != user_persona {
9225                    entry.header.user_persona = processed;
9226                }
9227            }
9228
9229            // Process line items
9230            for line in &mut entry.lines {
9231                // Process line description if present
9232                if let Some(ref text) = line.line_text {
9233                    let processed = injector.process_text_field(
9234                        "line_text",
9235                        text,
9236                        &entry.header.document_id.to_string(),
9237                        &context,
9238                    );
9239                    match processed {
9240                        Some(new_text) if new_text != *text => {
9241                            line.line_text = Some(new_text);
9242                        }
9243                        None => {
9244                            line.line_text = None;
9245                        }
9246                        _ => {}
9247                    }
9248                }
9249
9250                // Process cost_center if present
9251                if let Some(cc) = &line.cost_center {
9252                    let processed = injector.process_text_field(
9253                        "cost_center",
9254                        cc,
9255                        &entry.header.document_id.to_string(),
9256                        &context,
9257                    );
9258                    match processed {
9259                        Some(new_cc) if new_cc != *cc => {
9260                            line.cost_center = Some(new_cc);
9261                        }
9262                        None => {
9263                            line.cost_center = None;
9264                        }
9265                        _ => {}
9266                    }
9267                }
9268            }
9269
9270            if let Some(pb) = &pb {
9271                pb.inc(1);
9272            }
9273        }
9274
9275        if let Some(pb) = pb {
9276            pb.finish_with_message("Data quality injection complete");
9277        }
9278
9279        Ok(injector.stats().clone())
9280    }
9281
9282    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
9283    ///
9284    /// Creates complete audit documentation for each company in the configuration,
9285    /// following ISA standards:
9286    /// - ISA 210/220: Engagement acceptance and terms
9287    /// - ISA 230: Audit documentation (workpapers)
9288    /// - ISA 265: Control deficiencies (findings)
9289    /// - ISA 315/330: Risk assessment and response
9290    /// - ISA 500: Audit evidence
9291    /// - ISA 200: Professional judgment
9292    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
9293        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9294            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9295        let fiscal_year = start_date.year() as u16;
9296        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
9297
9298        // Calculate rough total revenue from entries for materiality
9299        let total_revenue: rust_decimal::Decimal = entries
9300            .iter()
9301            .flat_map(|e| e.lines.iter())
9302            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
9303            .map(|l| l.credit_amount)
9304            .sum();
9305
9306        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
9307        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
9308
9309        let mut snapshot = AuditSnapshot::default();
9310
9311        // Initialize generators
9312        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
9313        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
9314        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
9315        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
9316        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
9317        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
9318        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
9319        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
9320        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
9321        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
9322        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
9323        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
9324
9325        // Get list of accounts from CoA for risk assessment
9326        let accounts: Vec<String> = self
9327            .coa
9328            .as_ref()
9329            .map(|coa| {
9330                coa.get_postable_accounts()
9331                    .iter()
9332                    .map(|acc| acc.account_code().to_string())
9333                    .collect()
9334            })
9335            .unwrap_or_default();
9336
9337        // Generate engagements for each company
9338        for (i, company) in self.config.companies.iter().enumerate() {
9339            // Calculate company-specific revenue (proportional to volume weight)
9340            let company_revenue = total_revenue
9341                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
9342
9343            // Generate engagements for this company
9344            let engagements_for_company =
9345                self.phase_config.audit_engagements / self.config.companies.len().max(1);
9346            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
9347                1
9348            } else {
9349                0
9350            };
9351
9352            for _eng_idx in 0..(engagements_for_company + extra) {
9353                // Generate the engagement
9354                let mut engagement = engagement_gen.generate_engagement(
9355                    &company.code,
9356                    &company.name,
9357                    fiscal_year,
9358                    period_end,
9359                    company_revenue,
9360                    None, // Use default engagement type
9361                );
9362
9363                // Replace synthetic team IDs with real employee IDs from master data
9364                if !self.master_data.employees.is_empty() {
9365                    let emp_count = self.master_data.employees.len();
9366                    // Use employee IDs deterministically based on engagement index
9367                    let base = (i * 10 + _eng_idx) % emp_count;
9368                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
9369                        .employee_id
9370                        .clone();
9371                    engagement.engagement_manager_id = self.master_data.employees
9372                        [(base + 1) % emp_count]
9373                        .employee_id
9374                        .clone();
9375                    let real_team: Vec<String> = engagement
9376                        .team_member_ids
9377                        .iter()
9378                        .enumerate()
9379                        .map(|(j, _)| {
9380                            self.master_data.employees[(base + 2 + j) % emp_count]
9381                                .employee_id
9382                                .clone()
9383                        })
9384                        .collect();
9385                    engagement.team_member_ids = real_team;
9386                }
9387
9388                if let Some(pb) = &pb {
9389                    pb.inc(1);
9390                }
9391
9392                // Get team members from the engagement
9393                let team_members: Vec<String> = engagement.team_member_ids.clone();
9394
9395                // Generate workpapers for the engagement
9396                let workpapers =
9397                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
9398
9399                for wp in &workpapers {
9400                    if let Some(pb) = &pb {
9401                        pb.inc(1);
9402                    }
9403
9404                    // Generate evidence for each workpaper
9405                    let evidence = evidence_gen.generate_evidence_for_workpaper(
9406                        wp,
9407                        &team_members,
9408                        wp.preparer_date,
9409                    );
9410
9411                    for _ in &evidence {
9412                        if let Some(pb) = &pb {
9413                            pb.inc(1);
9414                        }
9415                    }
9416
9417                    snapshot.evidence.extend(evidence);
9418                }
9419
9420                // Generate risk assessments for the engagement
9421                let risks =
9422                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
9423
9424                for _ in &risks {
9425                    if let Some(pb) = &pb {
9426                        pb.inc(1);
9427                    }
9428                }
9429                snapshot.risk_assessments.extend(risks);
9430
9431                // Generate findings for the engagement
9432                let findings = finding_gen.generate_findings_for_engagement(
9433                    &engagement,
9434                    &workpapers,
9435                    &team_members,
9436                );
9437
9438                for _ in &findings {
9439                    if let Some(pb) = &pb {
9440                        pb.inc(1);
9441                    }
9442                }
9443                snapshot.findings.extend(findings);
9444
9445                // Generate professional judgments for the engagement
9446                let judgments =
9447                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
9448
9449                for _ in &judgments {
9450                    if let Some(pb) = &pb {
9451                        pb.inc(1);
9452                    }
9453                }
9454                snapshot.judgments.extend(judgments);
9455
9456                // ISA 505: External confirmations and responses
9457                let (confs, resps) =
9458                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
9459                snapshot.confirmations.extend(confs);
9460                snapshot.confirmation_responses.extend(resps);
9461
9462                // ISA 330: Procedure steps per workpaper
9463                let team_pairs: Vec<(String, String)> = team_members
9464                    .iter()
9465                    .map(|id| {
9466                        let name = self
9467                            .master_data
9468                            .employees
9469                            .iter()
9470                            .find(|e| e.employee_id == *id)
9471                            .map(|e| e.display_name.clone())
9472                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
9473                        (id.clone(), name)
9474                    })
9475                    .collect();
9476                for wp in &workpapers {
9477                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
9478                    snapshot.procedure_steps.extend(steps);
9479                }
9480
9481                // ISA 530: Samples per workpaper
9482                for wp in &workpapers {
9483                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
9484                        snapshot.samples.push(sample);
9485                    }
9486                }
9487
9488                // ISA 520: Analytical procedures
9489                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
9490                snapshot.analytical_results.extend(analytical);
9491
9492                // ISA 610: Internal audit function and reports
9493                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
9494                snapshot.ia_functions.push(ia_func);
9495                snapshot.ia_reports.extend(ia_reports);
9496
9497                // ISA 550: Related parties and transactions
9498                let vendor_names: Vec<String> = self
9499                    .master_data
9500                    .vendors
9501                    .iter()
9502                    .map(|v| v.name.clone())
9503                    .collect();
9504                let customer_names: Vec<String> = self
9505                    .master_data
9506                    .customers
9507                    .iter()
9508                    .map(|c| c.name.clone())
9509                    .collect();
9510                let (parties, rp_txns) =
9511                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
9512                snapshot.related_parties.extend(parties);
9513                snapshot.related_party_transactions.extend(rp_txns);
9514
9515                // Add workpapers after findings since findings need them
9516                snapshot.workpapers.extend(workpapers);
9517                snapshot.engagements.push(engagement);
9518            }
9519        }
9520
9521        // ----------------------------------------------------------------
9522        // ISA 600: Group audit — component auditors, plan, instructions, reports
9523        // ----------------------------------------------------------------
9524        if self.config.companies.len() > 1 {
9525            // Use materiality from the first engagement if available, otherwise
9526            // derive a reasonable figure from total revenue.
9527            let group_materiality = snapshot
9528                .engagements
9529                .first()
9530                .map(|e| e.materiality)
9531                .unwrap_or_else(|| {
9532                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
9533                    total_revenue * pct
9534                });
9535
9536            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
9537            let group_engagement_id = snapshot
9538                .engagements
9539                .first()
9540                .map(|e| e.engagement_id.to_string())
9541                .unwrap_or_else(|| "GROUP-ENG".to_string());
9542
9543            let component_snapshot = component_gen.generate(
9544                &self.config.companies,
9545                group_materiality,
9546                &group_engagement_id,
9547                period_end,
9548            );
9549
9550            snapshot.component_auditors = component_snapshot.component_auditors;
9551            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
9552            snapshot.component_instructions = component_snapshot.component_instructions;
9553            snapshot.component_reports = component_snapshot.component_reports;
9554
9555            info!(
9556                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
9557                snapshot.component_auditors.len(),
9558                snapshot.component_instructions.len(),
9559                snapshot.component_reports.len(),
9560            );
9561        }
9562
9563        // ----------------------------------------------------------------
9564        // ISA 210: Engagement letters — one per engagement
9565        // ----------------------------------------------------------------
9566        {
9567            let applicable_framework = self
9568                .config
9569                .accounting_standards
9570                .framework
9571                .as_ref()
9572                .map(|f| format!("{f:?}"))
9573                .unwrap_or_else(|| "IFRS".to_string());
9574
9575            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
9576            let entity_count = self.config.companies.len();
9577
9578            for engagement in &snapshot.engagements {
9579                let company = self
9580                    .config
9581                    .companies
9582                    .iter()
9583                    .find(|c| c.code == engagement.client_entity_id);
9584                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
9585                let letter_date = engagement.planning_start;
9586                let letter = letter_gen.generate(
9587                    &engagement.engagement_id.to_string(),
9588                    &engagement.client_name,
9589                    entity_count,
9590                    engagement.period_end_date,
9591                    currency,
9592                    &applicable_framework,
9593                    letter_date,
9594                );
9595                snapshot.engagement_letters.push(letter);
9596            }
9597
9598            info!(
9599                "ISA 210 engagement letters: {} generated",
9600                snapshot.engagement_letters.len()
9601            );
9602        }
9603
9604        // ----------------------------------------------------------------
9605        // ISA 560 / IAS 10: Subsequent events
9606        // ----------------------------------------------------------------
9607        {
9608            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
9609            let entity_codes: Vec<String> = self
9610                .config
9611                .companies
9612                .iter()
9613                .map(|c| c.code.clone())
9614                .collect();
9615            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
9616            info!(
9617                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
9618                subsequent.len(),
9619                subsequent
9620                    .iter()
9621                    .filter(|e| matches!(
9622                        e.classification,
9623                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
9624                    ))
9625                    .count(),
9626                subsequent
9627                    .iter()
9628                    .filter(|e| matches!(
9629                        e.classification,
9630                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
9631                    ))
9632                    .count(),
9633            );
9634            snapshot.subsequent_events = subsequent;
9635        }
9636
9637        // ----------------------------------------------------------------
9638        // ISA 402: Service organization controls
9639        // ----------------------------------------------------------------
9640        {
9641            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
9642            let entity_codes: Vec<String> = self
9643                .config
9644                .companies
9645                .iter()
9646                .map(|c| c.code.clone())
9647                .collect();
9648            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
9649            info!(
9650                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
9651                soc_snapshot.service_organizations.len(),
9652                soc_snapshot.soc_reports.len(),
9653                soc_snapshot.user_entity_controls.len(),
9654            );
9655            snapshot.service_organizations = soc_snapshot.service_organizations;
9656            snapshot.soc_reports = soc_snapshot.soc_reports;
9657            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
9658        }
9659
9660        // ----------------------------------------------------------------
9661        // ISA 570: Going concern assessments
9662        // ----------------------------------------------------------------
9663        {
9664            use datasynth_generators::audit::going_concern_generator::{
9665                GoingConcernGenerator, GoingConcernInput,
9666            };
9667            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
9668            let entity_codes: Vec<String> = self
9669                .config
9670                .companies
9671                .iter()
9672                .map(|c| c.code.clone())
9673                .collect();
9674            // Assessment date = period end + 75 days (typical sign-off window).
9675            let assessment_date = period_end + chrono::Duration::days(75);
9676            let period_label = format!("FY{}", period_end.year());
9677
9678            // Build financial inputs from actual journal entries.
9679            //
9680            // We derive approximate P&L, working capital, and operating cash flow
9681            // by aggregating GL account balances from the journal entry population.
9682            // Account ranges used (standard chart):
9683            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
9684            //   Expenses:        6xxx (debit-normal)
9685            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
9686            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
9687            //   Operating CF:    net income adjusted for D&A (rough proxy)
9688            let gc_inputs: Vec<GoingConcernInput> = self
9689                .config
9690                .companies
9691                .iter()
9692                .map(|company| {
9693                    let code = &company.code;
9694                    let mut revenue = rust_decimal::Decimal::ZERO;
9695                    let mut expenses = rust_decimal::Decimal::ZERO;
9696                    let mut current_assets = rust_decimal::Decimal::ZERO;
9697                    let mut current_liabs = rust_decimal::Decimal::ZERO;
9698                    let mut total_debt = rust_decimal::Decimal::ZERO;
9699
9700                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
9701                        for line in &je.lines {
9702                            let acct = line.gl_account.as_str();
9703                            let net = line.debit_amount - line.credit_amount;
9704                            if acct.starts_with('4') {
9705                                // Revenue accounts: credit-normal, so negative net = revenue earned
9706                                revenue -= net;
9707                            } else if acct.starts_with('6') {
9708                                // Expense accounts: debit-normal
9709                                expenses += net;
9710                            }
9711                            // Balance sheet accounts for working capital
9712                            if acct.starts_with('1') {
9713                                // Current asset accounts (1000–1499)
9714                                if let Ok(n) = acct.parse::<u32>() {
9715                                    if (1000..=1499).contains(&n) {
9716                                        current_assets += net;
9717                                    }
9718                                }
9719                            } else if acct.starts_with('2') {
9720                                if let Ok(n) = acct.parse::<u32>() {
9721                                    if (2000..=2499).contains(&n) {
9722                                        // Current liabilities
9723                                        current_liabs -= net; // credit-normal
9724                                    } else if (2500..=2999).contains(&n) {
9725                                        // Long-term debt
9726                                        total_debt -= net;
9727                                    }
9728                                }
9729                            }
9730                        }
9731                    }
9732
9733                    let net_income = revenue - expenses;
9734                    let working_capital = current_assets - current_liabs;
9735                    // Rough operating CF proxy: net income (full accrual CF calculation
9736                    // is done separately in the cash flow statement generator)
9737                    let operating_cash_flow = net_income;
9738
9739                    GoingConcernInput {
9740                        entity_code: code.clone(),
9741                        net_income,
9742                        working_capital,
9743                        operating_cash_flow,
9744                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
9745                        assessment_date,
9746                    }
9747                })
9748                .collect();
9749
9750            let assessments = if gc_inputs.is_empty() {
9751                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
9752            } else {
9753                gc_gen.generate_for_entities_with_inputs(
9754                    &entity_codes,
9755                    &gc_inputs,
9756                    assessment_date,
9757                    &period_label,
9758                )
9759            };
9760            info!(
9761                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
9762                assessments.len(),
9763                assessments.iter().filter(|a| matches!(
9764                    a.auditor_conclusion,
9765                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
9766                )).count(),
9767                assessments.iter().filter(|a| matches!(
9768                    a.auditor_conclusion,
9769                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
9770                )).count(),
9771                assessments.iter().filter(|a| matches!(
9772                    a.auditor_conclusion,
9773                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
9774                )).count(),
9775            );
9776            snapshot.going_concern_assessments = assessments;
9777        }
9778
9779        // ----------------------------------------------------------------
9780        // ISA 540: Accounting estimates
9781        // ----------------------------------------------------------------
9782        {
9783            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
9784            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
9785            let entity_codes: Vec<String> = self
9786                .config
9787                .companies
9788                .iter()
9789                .map(|c| c.code.clone())
9790                .collect();
9791            let estimates = est_gen.generate_for_entities(&entity_codes);
9792            info!(
9793                "ISA 540 accounting estimates: {} estimates across {} entities \
9794                 ({} with retrospective reviews, {} with auditor point estimates)",
9795                estimates.len(),
9796                entity_codes.len(),
9797                estimates
9798                    .iter()
9799                    .filter(|e| e.retrospective_review.is_some())
9800                    .count(),
9801                estimates
9802                    .iter()
9803                    .filter(|e| e.auditor_point_estimate.is_some())
9804                    .count(),
9805            );
9806            snapshot.accounting_estimates = estimates;
9807        }
9808
9809        // ----------------------------------------------------------------
9810        // ISA 700/701/705/706: Audit opinions (one per engagement)
9811        // ----------------------------------------------------------------
9812        {
9813            use datasynth_generators::audit::audit_opinion_generator::{
9814                AuditOpinionGenerator, AuditOpinionInput,
9815            };
9816
9817            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
9818
9819            // Build inputs — one per engagement, linking findings and going concern.
9820            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
9821                .engagements
9822                .iter()
9823                .map(|eng| {
9824                    // Collect findings for this engagement.
9825                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
9826                        .findings
9827                        .iter()
9828                        .filter(|f| f.engagement_id == eng.engagement_id)
9829                        .cloned()
9830                        .collect();
9831
9832                    // Going concern for this entity.
9833                    let gc = snapshot
9834                        .going_concern_assessments
9835                        .iter()
9836                        .find(|g| g.entity_code == eng.client_entity_id)
9837                        .cloned();
9838
9839                    // Component reports relevant to this engagement.
9840                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
9841                        snapshot.component_reports.clone();
9842
9843                    let auditor = self
9844                        .master_data
9845                        .employees
9846                        .first()
9847                        .map(|e| e.display_name.clone())
9848                        .unwrap_or_else(|| "Global Audit LLP".into());
9849
9850                    let partner = self
9851                        .master_data
9852                        .employees
9853                        .get(1)
9854                        .map(|e| e.display_name.clone())
9855                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
9856
9857                    AuditOpinionInput {
9858                        entity_code: eng.client_entity_id.clone(),
9859                        entity_name: eng.client_name.clone(),
9860                        engagement_id: eng.engagement_id,
9861                        period_end: eng.period_end_date,
9862                        findings: eng_findings,
9863                        going_concern: gc,
9864                        component_reports: comp_reports,
9865                        // Mark as US-listed when audit standards include PCAOB.
9866                        is_us_listed: {
9867                            let fw = &self.config.audit_standards.isa_compliance.framework;
9868                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
9869                        },
9870                        auditor_name: auditor,
9871                        engagement_partner: partner,
9872                    }
9873                })
9874                .collect();
9875
9876            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
9877
9878            for go in &generated_opinions {
9879                snapshot
9880                    .key_audit_matters
9881                    .extend(go.key_audit_matters.clone());
9882            }
9883            snapshot.audit_opinions = generated_opinions
9884                .into_iter()
9885                .map(|go| go.opinion)
9886                .collect();
9887
9888            info!(
9889                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
9890                snapshot.audit_opinions.len(),
9891                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
9892                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
9893                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
9894                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
9895            );
9896        }
9897
9898        // ----------------------------------------------------------------
9899        // SOX 302 / 404 assessments
9900        // ----------------------------------------------------------------
9901        {
9902            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
9903
9904            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
9905
9906            for (i, company) in self.config.companies.iter().enumerate() {
9907                // Collect findings for this company's engagements.
9908                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
9909                    .engagements
9910                    .iter()
9911                    .filter(|e| e.client_entity_id == company.code)
9912                    .map(|e| e.engagement_id)
9913                    .collect();
9914
9915                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
9916                    .findings
9917                    .iter()
9918                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
9919                    .cloned()
9920                    .collect();
9921
9922                // Derive executive names from employee list.
9923                let emp_count = self.master_data.employees.len();
9924                let ceo_name = if emp_count > 0 {
9925                    self.master_data.employees[i % emp_count]
9926                        .display_name
9927                        .clone()
9928                } else {
9929                    format!("CEO of {}", company.name)
9930                };
9931                let cfo_name = if emp_count > 1 {
9932                    self.master_data.employees[(i + 1) % emp_count]
9933                        .display_name
9934                        .clone()
9935                } else {
9936                    format!("CFO of {}", company.name)
9937                };
9938
9939                // Use engagement materiality if available.
9940                let materiality = snapshot
9941                    .engagements
9942                    .iter()
9943                    .find(|e| e.client_entity_id == company.code)
9944                    .map(|e| e.materiality)
9945                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
9946
9947                let input = SoxGeneratorInput {
9948                    company_code: company.code.clone(),
9949                    company_name: company.name.clone(),
9950                    fiscal_year,
9951                    period_end,
9952                    findings: company_findings,
9953                    ceo_name,
9954                    cfo_name,
9955                    materiality_threshold: materiality,
9956                    revenue_percent: rust_decimal::Decimal::from(100),
9957                    assets_percent: rust_decimal::Decimal::from(100),
9958                    significant_accounts: vec![
9959                        "Revenue".into(),
9960                        "Accounts Receivable".into(),
9961                        "Inventory".into(),
9962                        "Fixed Assets".into(),
9963                        "Accounts Payable".into(),
9964                    ],
9965                };
9966
9967                let (certs, assessment) = sox_gen.generate(&input);
9968                snapshot.sox_302_certifications.extend(certs);
9969                snapshot.sox_404_assessments.push(assessment);
9970            }
9971
9972            info!(
9973                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
9974                snapshot.sox_302_certifications.len(),
9975                snapshot.sox_404_assessments.len(),
9976                snapshot
9977                    .sox_404_assessments
9978                    .iter()
9979                    .filter(|a| a.icfr_effective)
9980                    .count(),
9981                snapshot
9982                    .sox_404_assessments
9983                    .iter()
9984                    .filter(|a| !a.icfr_effective)
9985                    .count(),
9986            );
9987        }
9988
9989        // ----------------------------------------------------------------
9990        // ISA 320: Materiality calculations (one per entity)
9991        // ----------------------------------------------------------------
9992        {
9993            use datasynth_generators::audit::materiality_generator::{
9994                MaterialityGenerator, MaterialityInput,
9995            };
9996
9997            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
9998
9999            // Compute per-company financials from JEs.
10000            // Asset accounts start with '1', revenue with '4',
10001            // expense accounts with '5' or '6'.
10002            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
10003
10004            for company in &self.config.companies {
10005                let company_code = company.code.clone();
10006
10007                // Revenue: credit-side entries on 4xxx accounts
10008                let company_revenue: rust_decimal::Decimal = entries
10009                    .iter()
10010                    .filter(|e| e.company_code() == company_code)
10011                    .flat_map(|e| e.lines.iter())
10012                    .filter(|l| l.account_code.starts_with('4'))
10013                    .map(|l| l.credit_amount)
10014                    .sum();
10015
10016                // Total assets: debit balances on 1xxx accounts
10017                let total_assets: rust_decimal::Decimal = entries
10018                    .iter()
10019                    .filter(|e| e.company_code() == company_code)
10020                    .flat_map(|e| e.lines.iter())
10021                    .filter(|l| l.account_code.starts_with('1'))
10022                    .map(|l| l.debit_amount)
10023                    .sum();
10024
10025                // Expenses: debit-side entries on 5xxx/6xxx accounts
10026                let total_expenses: rust_decimal::Decimal = entries
10027                    .iter()
10028                    .filter(|e| e.company_code() == company_code)
10029                    .flat_map(|e| e.lines.iter())
10030                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
10031                    .map(|l| l.debit_amount)
10032                    .sum();
10033
10034                // Equity: credit balances on 3xxx accounts
10035                let equity: rust_decimal::Decimal = entries
10036                    .iter()
10037                    .filter(|e| e.company_code() == company_code)
10038                    .flat_map(|e| e.lines.iter())
10039                    .filter(|l| l.account_code.starts_with('3'))
10040                    .map(|l| l.credit_amount)
10041                    .sum();
10042
10043                let pretax_income = company_revenue - total_expenses;
10044
10045                // If no company-specific data, fall back to proportional share
10046                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
10047                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
10048                        .unwrap_or(rust_decimal::Decimal::ONE);
10049                    (
10050                        total_revenue * w,
10051                        total_revenue * w * rust_decimal::Decimal::from(3),
10052                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
10053                        total_revenue * w * rust_decimal::Decimal::from(2),
10054                    )
10055                } else {
10056                    (company_revenue, total_assets, pretax_income, equity)
10057                };
10058
10059                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
10060
10061                materiality_inputs.push(MaterialityInput {
10062                    entity_code: company_code,
10063                    period: format!("FY{}", fiscal_year),
10064                    revenue: rev,
10065                    pretax_income: pti,
10066                    total_assets: assets,
10067                    equity: eq,
10068                    gross_profit,
10069                });
10070            }
10071
10072            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
10073
10074            info!(
10075                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
10076                 {} total assets, {} equity benchmarks)",
10077                snapshot.materiality_calculations.len(),
10078                snapshot
10079                    .materiality_calculations
10080                    .iter()
10081                    .filter(|m| matches!(
10082                        m.benchmark,
10083                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
10084                    ))
10085                    .count(),
10086                snapshot
10087                    .materiality_calculations
10088                    .iter()
10089                    .filter(|m| matches!(
10090                        m.benchmark,
10091                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
10092                    ))
10093                    .count(),
10094                snapshot
10095                    .materiality_calculations
10096                    .iter()
10097                    .filter(|m| matches!(
10098                        m.benchmark,
10099                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
10100                    ))
10101                    .count(),
10102                snapshot
10103                    .materiality_calculations
10104                    .iter()
10105                    .filter(|m| matches!(
10106                        m.benchmark,
10107                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
10108                    ))
10109                    .count(),
10110            );
10111        }
10112
10113        // ----------------------------------------------------------------
10114        // ISA 315: Combined Risk Assessments (per entity, per account area)
10115        // ----------------------------------------------------------------
10116        {
10117            use datasynth_generators::audit::cra_generator::CraGenerator;
10118
10119            let mut cra_gen = CraGenerator::new(self.seed + 8315);
10120
10121            for company in &self.config.companies {
10122                let cras = cra_gen.generate_for_entity(&company.code, None);
10123                snapshot.combined_risk_assessments.extend(cras);
10124            }
10125
10126            let significant_count = snapshot
10127                .combined_risk_assessments
10128                .iter()
10129                .filter(|c| c.significant_risk)
10130                .count();
10131            let high_cra_count = snapshot
10132                .combined_risk_assessments
10133                .iter()
10134                .filter(|c| {
10135                    matches!(
10136                        c.combined_risk,
10137                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
10138                    )
10139                })
10140                .count();
10141
10142            info!(
10143                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
10144                snapshot.combined_risk_assessments.len(),
10145                significant_count,
10146                high_cra_count,
10147            );
10148        }
10149
10150        // ----------------------------------------------------------------
10151        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
10152        // ----------------------------------------------------------------
10153        {
10154            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
10155
10156            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
10157
10158            // Group CRAs by entity and use per-entity tolerable error from materiality
10159            for company in &self.config.companies {
10160                let entity_code = company.code.clone();
10161
10162                // Find tolerable error for this entity (= performance materiality)
10163                let tolerable_error = snapshot
10164                    .materiality_calculations
10165                    .iter()
10166                    .find(|m| m.entity_code == entity_code)
10167                    .map(|m| m.tolerable_error);
10168
10169                // Collect CRAs for this entity
10170                let entity_cras: Vec<_> = snapshot
10171                    .combined_risk_assessments
10172                    .iter()
10173                    .filter(|c| c.entity_code == entity_code)
10174                    .cloned()
10175                    .collect();
10176
10177                if !entity_cras.is_empty() {
10178                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
10179                    snapshot.sampling_plans.extend(plans);
10180                    snapshot.sampled_items.extend(items);
10181                }
10182            }
10183
10184            let misstatement_count = snapshot
10185                .sampled_items
10186                .iter()
10187                .filter(|i| i.misstatement_found)
10188                .count();
10189
10190            info!(
10191                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
10192                snapshot.sampling_plans.len(),
10193                snapshot.sampled_items.len(),
10194                misstatement_count,
10195            );
10196        }
10197
10198        // ----------------------------------------------------------------
10199        // ISA 315: Significant Classes of Transactions (SCOTS)
10200        // ----------------------------------------------------------------
10201        {
10202            use datasynth_generators::audit::scots_generator::{
10203                ScotsGenerator, ScotsGeneratorConfig,
10204            };
10205
10206            let ic_enabled = self.config.intercompany.enabled;
10207
10208            let config = ScotsGeneratorConfig {
10209                intercompany_enabled: ic_enabled,
10210                ..ScotsGeneratorConfig::default()
10211            };
10212            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
10213
10214            for company in &self.config.companies {
10215                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
10216                snapshot
10217                    .significant_transaction_classes
10218                    .extend(entity_scots);
10219            }
10220
10221            let estimation_count = snapshot
10222                .significant_transaction_classes
10223                .iter()
10224                .filter(|s| {
10225                    matches!(
10226                        s.transaction_type,
10227                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
10228                    )
10229                })
10230                .count();
10231
10232            info!(
10233                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
10234                snapshot.significant_transaction_classes.len(),
10235                estimation_count,
10236            );
10237        }
10238
10239        // ----------------------------------------------------------------
10240        // ISA 520: Unusual Item Markers
10241        // ----------------------------------------------------------------
10242        {
10243            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
10244
10245            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
10246            let entity_codes: Vec<String> = self
10247                .config
10248                .companies
10249                .iter()
10250                .map(|c| c.code.clone())
10251                .collect();
10252            let unusual_flags =
10253                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
10254            info!(
10255                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
10256                unusual_flags.len(),
10257                unusual_flags
10258                    .iter()
10259                    .filter(|f| matches!(
10260                        f.severity,
10261                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
10262                    ))
10263                    .count(),
10264                unusual_flags
10265                    .iter()
10266                    .filter(|f| matches!(
10267                        f.severity,
10268                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
10269                    ))
10270                    .count(),
10271                unusual_flags
10272                    .iter()
10273                    .filter(|f| matches!(
10274                        f.severity,
10275                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
10276                    ))
10277                    .count(),
10278            );
10279            snapshot.unusual_items = unusual_flags;
10280        }
10281
10282        // ----------------------------------------------------------------
10283        // ISA 520: Analytical Relationships
10284        // ----------------------------------------------------------------
10285        {
10286            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
10287
10288            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
10289            let entity_codes: Vec<String> = self
10290                .config
10291                .companies
10292                .iter()
10293                .map(|c| c.code.clone())
10294                .collect();
10295            let current_period_label = format!("FY{fiscal_year}");
10296            let prior_period_label = format!("FY{}", fiscal_year - 1);
10297            let analytical_rels = ar_gen.generate_for_entities(
10298                &entity_codes,
10299                entries,
10300                &current_period_label,
10301                &prior_period_label,
10302            );
10303            let out_of_range = analytical_rels
10304                .iter()
10305                .filter(|r| !r.within_expected_range)
10306                .count();
10307            info!(
10308                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
10309                analytical_rels.len(),
10310                out_of_range,
10311            );
10312            snapshot.analytical_relationships = analytical_rels;
10313        }
10314
10315        if let Some(pb) = pb {
10316            pb.finish_with_message(format!(
10317                "Audit data: {} engagements, {} workpapers, {} evidence, \
10318                 {} confirmations, {} procedure steps, {} samples, \
10319                 {} analytical, {} IA funcs, {} related parties, \
10320                 {} component auditors, {} letters, {} subsequent events, \
10321                 {} service orgs, {} going concern, {} accounting estimates, \
10322                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
10323                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
10324                 {} unusual items, {} analytical relationships",
10325                snapshot.engagements.len(),
10326                snapshot.workpapers.len(),
10327                snapshot.evidence.len(),
10328                snapshot.confirmations.len(),
10329                snapshot.procedure_steps.len(),
10330                snapshot.samples.len(),
10331                snapshot.analytical_results.len(),
10332                snapshot.ia_functions.len(),
10333                snapshot.related_parties.len(),
10334                snapshot.component_auditors.len(),
10335                snapshot.engagement_letters.len(),
10336                snapshot.subsequent_events.len(),
10337                snapshot.service_organizations.len(),
10338                snapshot.going_concern_assessments.len(),
10339                snapshot.accounting_estimates.len(),
10340                snapshot.audit_opinions.len(),
10341                snapshot.key_audit_matters.len(),
10342                snapshot.sox_302_certifications.len(),
10343                snapshot.sox_404_assessments.len(),
10344                snapshot.materiality_calculations.len(),
10345                snapshot.combined_risk_assessments.len(),
10346                snapshot.sampling_plans.len(),
10347                snapshot.significant_transaction_classes.len(),
10348                snapshot.unusual_items.len(),
10349                snapshot.analytical_relationships.len(),
10350            ));
10351        }
10352
10353        Ok(snapshot)
10354    }
10355
10356    /// Export journal entries as graph data for ML training and network reconstruction.
10357    ///
10358    /// Builds a transaction graph where:
10359    /// - Nodes are GL accounts
10360    /// - Edges are money flows from credit to debit accounts
10361    /// - Edge attributes include amount, date, business process, anomaly flags
10362    fn export_graphs(
10363        &mut self,
10364        entries: &[JournalEntry],
10365        _coa: &Arc<ChartOfAccounts>,
10366        stats: &mut EnhancedGenerationStatistics,
10367    ) -> SynthResult<GraphExportSnapshot> {
10368        let pb = self.create_progress_bar(100, "Exporting Graphs");
10369
10370        let mut snapshot = GraphExportSnapshot::default();
10371
10372        // Get output directory
10373        let output_dir = self
10374            .output_path
10375            .clone()
10376            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10377        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
10378
10379        // Process each graph type configuration
10380        for graph_type in &self.config.graph_export.graph_types {
10381            if let Some(pb) = &pb {
10382                pb.inc(10);
10383            }
10384
10385            // Build transaction graph
10386            let graph_config = TransactionGraphConfig {
10387                include_vendors: false,
10388                include_customers: false,
10389                create_debit_credit_edges: true,
10390                include_document_nodes: graph_type.include_document_nodes,
10391                min_edge_weight: graph_type.min_edge_weight,
10392                aggregate_parallel_edges: graph_type.aggregate_edges,
10393                framework: None,
10394            };
10395
10396            let mut builder = TransactionGraphBuilder::new(graph_config);
10397            builder.add_journal_entries(entries);
10398            let graph = builder.build();
10399
10400            // Update stats
10401            stats.graph_node_count += graph.node_count();
10402            stats.graph_edge_count += graph.edge_count();
10403
10404            if let Some(pb) = &pb {
10405                pb.inc(40);
10406            }
10407
10408            // Export to each configured format
10409            for format in &self.config.graph_export.formats {
10410                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
10411
10412                // Create output directory
10413                if let Err(e) = std::fs::create_dir_all(&format_dir) {
10414                    warn!("Failed to create graph output directory: {}", e);
10415                    continue;
10416                }
10417
10418                match format {
10419                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
10420                        let pyg_config = PyGExportConfig {
10421                            common: datasynth_graph::CommonExportConfig {
10422                                export_node_features: true,
10423                                export_edge_features: true,
10424                                export_node_labels: true,
10425                                export_edge_labels: true,
10426                                export_masks: true,
10427                                train_ratio: self.config.graph_export.train_ratio,
10428                                val_ratio: self.config.graph_export.validation_ratio,
10429                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
10430                            },
10431                            one_hot_categoricals: false,
10432                        };
10433
10434                        let exporter = PyGExporter::new(pyg_config);
10435                        match exporter.export(&graph, &format_dir) {
10436                            Ok(metadata) => {
10437                                snapshot.exports.insert(
10438                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
10439                                    GraphExportInfo {
10440                                        name: graph_type.name.clone(),
10441                                        format: "pytorch_geometric".to_string(),
10442                                        output_path: format_dir.clone(),
10443                                        node_count: metadata.num_nodes,
10444                                        edge_count: metadata.num_edges,
10445                                    },
10446                                );
10447                                snapshot.graph_count += 1;
10448                            }
10449                            Err(e) => {
10450                                warn!("Failed to export PyTorch Geometric graph: {}", e);
10451                            }
10452                        }
10453                    }
10454                    datasynth_config::schema::GraphExportFormat::Neo4j => {
10455                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
10456
10457                        let neo4j_config = Neo4jExportConfig {
10458                            export_node_properties: true,
10459                            export_edge_properties: true,
10460                            export_features: true,
10461                            generate_cypher: true,
10462                            generate_admin_import: true,
10463                            database_name: "synth".to_string(),
10464                            cypher_batch_size: 1000,
10465                        };
10466
10467                        let exporter = Neo4jExporter::new(neo4j_config);
10468                        match exporter.export(&graph, &format_dir) {
10469                            Ok(metadata) => {
10470                                snapshot.exports.insert(
10471                                    format!("{}_{}", graph_type.name, "neo4j"),
10472                                    GraphExportInfo {
10473                                        name: graph_type.name.clone(),
10474                                        format: "neo4j".to_string(),
10475                                        output_path: format_dir.clone(),
10476                                        node_count: metadata.num_nodes,
10477                                        edge_count: metadata.num_edges,
10478                                    },
10479                                );
10480                                snapshot.graph_count += 1;
10481                            }
10482                            Err(e) => {
10483                                warn!("Failed to export Neo4j graph: {}", e);
10484                            }
10485                        }
10486                    }
10487                    datasynth_config::schema::GraphExportFormat::Dgl => {
10488                        use datasynth_graph::{DGLExportConfig, DGLExporter};
10489
10490                        let dgl_config = DGLExportConfig {
10491                            common: datasynth_graph::CommonExportConfig {
10492                                export_node_features: true,
10493                                export_edge_features: true,
10494                                export_node_labels: true,
10495                                export_edge_labels: true,
10496                                export_masks: true,
10497                                train_ratio: self.config.graph_export.train_ratio,
10498                                val_ratio: self.config.graph_export.validation_ratio,
10499                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
10500                            },
10501                            heterogeneous: false,
10502                            include_pickle_script: true, // DGL ecosystem standard helper
10503                        };
10504
10505                        let exporter = DGLExporter::new(dgl_config);
10506                        match exporter.export(&graph, &format_dir) {
10507                            Ok(metadata) => {
10508                                snapshot.exports.insert(
10509                                    format!("{}_{}", graph_type.name, "dgl"),
10510                                    GraphExportInfo {
10511                                        name: graph_type.name.clone(),
10512                                        format: "dgl".to_string(),
10513                                        output_path: format_dir.clone(),
10514                                        node_count: metadata.common.num_nodes,
10515                                        edge_count: metadata.common.num_edges,
10516                                    },
10517                                );
10518                                snapshot.graph_count += 1;
10519                            }
10520                            Err(e) => {
10521                                warn!("Failed to export DGL graph: {}", e);
10522                            }
10523                        }
10524                    }
10525                    datasynth_config::schema::GraphExportFormat::RustGraph => {
10526                        use datasynth_graph::{
10527                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
10528                        };
10529
10530                        let rustgraph_config = RustGraphExportConfig {
10531                            include_features: true,
10532                            include_temporal: true,
10533                            include_labels: true,
10534                            source_name: "datasynth".to_string(),
10535                            batch_id: None,
10536                            output_format: RustGraphOutputFormat::JsonLines,
10537                            export_node_properties: true,
10538                            export_edge_properties: true,
10539                            pretty_print: false,
10540                        };
10541
10542                        let exporter = RustGraphExporter::new(rustgraph_config);
10543                        match exporter.export(&graph, &format_dir) {
10544                            Ok(metadata) => {
10545                                snapshot.exports.insert(
10546                                    format!("{}_{}", graph_type.name, "rustgraph"),
10547                                    GraphExportInfo {
10548                                        name: graph_type.name.clone(),
10549                                        format: "rustgraph".to_string(),
10550                                        output_path: format_dir.clone(),
10551                                        node_count: metadata.num_nodes,
10552                                        edge_count: metadata.num_edges,
10553                                    },
10554                                );
10555                                snapshot.graph_count += 1;
10556                            }
10557                            Err(e) => {
10558                                warn!("Failed to export RustGraph: {}", e);
10559                            }
10560                        }
10561                    }
10562                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
10563                        // Hypergraph export is handled separately in Phase 10b
10564                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
10565                    }
10566                }
10567            }
10568
10569            if let Some(pb) = &pb {
10570                pb.inc(40);
10571            }
10572        }
10573
10574        stats.graph_export_count = snapshot.graph_count;
10575        snapshot.exported = snapshot.graph_count > 0;
10576
10577        if let Some(pb) = pb {
10578            pb.finish_with_message(format!(
10579                "Graphs exported: {} graphs ({} nodes, {} edges)",
10580                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
10581            ));
10582        }
10583
10584        Ok(snapshot)
10585    }
10586
10587    /// Build additional graph types (banking, approval, entity) when relevant data
10588    /// is available. These run as a late phase because the data they need (banking
10589    /// snapshot, intercompany snapshot) is only generated after the main graph
10590    /// export phase.
10591    fn build_additional_graphs(
10592        &self,
10593        banking: &BankingSnapshot,
10594        intercompany: &IntercompanySnapshot,
10595        entries: &[JournalEntry],
10596        stats: &mut EnhancedGenerationStatistics,
10597    ) {
10598        let output_dir = self
10599            .output_path
10600            .clone()
10601            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10602        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
10603
10604        // Banking graph: build when banking customers and transactions exist
10605        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
10606            info!("Phase 10c: Building banking network graph");
10607            let config = BankingGraphConfig::default();
10608            let mut builder = BankingGraphBuilder::new(config);
10609            builder.add_customers(&banking.customers);
10610            builder.add_accounts(&banking.accounts, &banking.customers);
10611            builder.add_transactions(&banking.transactions);
10612            let graph = builder.build();
10613
10614            let node_count = graph.node_count();
10615            let edge_count = graph.edge_count();
10616            stats.graph_node_count += node_count;
10617            stats.graph_edge_count += edge_count;
10618
10619            // Export as PyG if configured
10620            for format in &self.config.graph_export.formats {
10621                if matches!(
10622                    format,
10623                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
10624                ) {
10625                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
10626                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
10627                        warn!("Failed to create banking graph output dir: {}", e);
10628                        continue;
10629                    }
10630                    let pyg_config = PyGExportConfig::default();
10631                    let exporter = PyGExporter::new(pyg_config);
10632                    if let Err(e) = exporter.export(&graph, &format_dir) {
10633                        warn!("Failed to export banking graph as PyG: {}", e);
10634                    } else {
10635                        info!(
10636                            "Banking network graph exported: {} nodes, {} edges",
10637                            node_count, edge_count
10638                        );
10639                    }
10640                }
10641            }
10642        }
10643
10644        // Approval graph: build from journal entry approval workflows
10645        let approval_entries: Vec<_> = entries
10646            .iter()
10647            .filter(|je| je.header.approval_workflow.is_some())
10648            .collect();
10649
10650        if !approval_entries.is_empty() {
10651            info!(
10652                "Phase 10c: Building approval network graph ({} entries with approvals)",
10653                approval_entries.len()
10654            );
10655            let config = ApprovalGraphConfig::default();
10656            let mut builder = ApprovalGraphBuilder::new(config);
10657
10658            for je in &approval_entries {
10659                if let Some(ref wf) = je.header.approval_workflow {
10660                    for action in &wf.actions {
10661                        let record = datasynth_core::models::ApprovalRecord {
10662                            approval_id: format!(
10663                                "APR-{}-{}",
10664                                je.header.document_id, action.approval_level
10665                            ),
10666                            document_number: je.header.document_id.to_string(),
10667                            document_type: "JE".to_string(),
10668                            company_code: je.company_code().to_string(),
10669                            requester_id: wf.preparer_id.clone(),
10670                            requester_name: Some(wf.preparer_name.clone()),
10671                            approver_id: action.actor_id.clone(),
10672                            approver_name: action.actor_name.clone(),
10673                            approval_date: je.posting_date(),
10674                            action: format!("{:?}", action.action),
10675                            amount: wf.amount,
10676                            approval_limit: None,
10677                            comments: action.comments.clone(),
10678                            delegation_from: None,
10679                            is_auto_approved: false,
10680                        };
10681                        builder.add_approval(&record);
10682                    }
10683                }
10684            }
10685
10686            let graph = builder.build();
10687            let node_count = graph.node_count();
10688            let edge_count = graph.edge_count();
10689            stats.graph_node_count += node_count;
10690            stats.graph_edge_count += edge_count;
10691
10692            // Export as PyG if configured
10693            for format in &self.config.graph_export.formats {
10694                if matches!(
10695                    format,
10696                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
10697                ) {
10698                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
10699                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
10700                        warn!("Failed to create approval graph output dir: {}", e);
10701                        continue;
10702                    }
10703                    let pyg_config = PyGExportConfig::default();
10704                    let exporter = PyGExporter::new(pyg_config);
10705                    if let Err(e) = exporter.export(&graph, &format_dir) {
10706                        warn!("Failed to export approval graph as PyG: {}", e);
10707                    } else {
10708                        info!(
10709                            "Approval network graph exported: {} nodes, {} edges",
10710                            node_count, edge_count
10711                        );
10712                    }
10713                }
10714            }
10715        }
10716
10717        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
10718        if self.config.companies.len() >= 2 {
10719            info!(
10720                "Phase 10c: Building entity relationship graph ({} companies)",
10721                self.config.companies.len()
10722            );
10723
10724            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10725                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
10726
10727            // Map CompanyConfig → Company objects
10728            let parent_code = &self.config.companies[0].code;
10729            let mut companies: Vec<datasynth_core::models::Company> =
10730                Vec::with_capacity(self.config.companies.len());
10731
10732            // First company is the parent
10733            let first = &self.config.companies[0];
10734            companies.push(datasynth_core::models::Company::parent(
10735                &first.code,
10736                &first.name,
10737                &first.country,
10738                &first.currency,
10739            ));
10740
10741            // Remaining companies are subsidiaries (100% owned by parent)
10742            for cc in self.config.companies.iter().skip(1) {
10743                companies.push(datasynth_core::models::Company::subsidiary(
10744                    &cc.code,
10745                    &cc.name,
10746                    &cc.country,
10747                    &cc.currency,
10748                    parent_code,
10749                    rust_decimal::Decimal::from(100),
10750                ));
10751            }
10752
10753            // Build IntercompanyRelationship records (same logic as phase_intercompany)
10754            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
10755                self.config
10756                    .companies
10757                    .iter()
10758                    .skip(1)
10759                    .enumerate()
10760                    .map(|(i, cc)| {
10761                        let mut rel =
10762                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
10763                                format!("REL{:03}", i + 1),
10764                                parent_code.clone(),
10765                                cc.code.clone(),
10766                                rust_decimal::Decimal::from(100),
10767                                start_date,
10768                            );
10769                        rel.functional_currency = cc.currency.clone();
10770                        rel
10771                    })
10772                    .collect();
10773
10774            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
10775            builder.add_companies(&companies);
10776            builder.add_ownership_relationships(&relationships);
10777
10778            // Thread IC matched-pair transaction edges into the entity graph
10779            for pair in &intercompany.matched_pairs {
10780                builder.add_intercompany_edge(
10781                    &pair.seller_company,
10782                    &pair.buyer_company,
10783                    pair.amount,
10784                    &format!("{:?}", pair.transaction_type),
10785                );
10786            }
10787
10788            let graph = builder.build();
10789            let node_count = graph.node_count();
10790            let edge_count = graph.edge_count();
10791            stats.graph_node_count += node_count;
10792            stats.graph_edge_count += edge_count;
10793
10794            // Export as PyG if configured
10795            for format in &self.config.graph_export.formats {
10796                if matches!(
10797                    format,
10798                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
10799                ) {
10800                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
10801                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
10802                        warn!("Failed to create entity graph output dir: {}", e);
10803                        continue;
10804                    }
10805                    let pyg_config = PyGExportConfig::default();
10806                    let exporter = PyGExporter::new(pyg_config);
10807                    if let Err(e) = exporter.export(&graph, &format_dir) {
10808                        warn!("Failed to export entity graph as PyG: {}", e);
10809                    } else {
10810                        info!(
10811                            "Entity relationship graph exported: {} nodes, {} edges",
10812                            node_count, edge_count
10813                        );
10814                    }
10815                }
10816            }
10817        } else {
10818            debug!(
10819                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
10820                self.config.companies.len()
10821            );
10822        }
10823    }
10824
10825    /// Export a multi-layer hypergraph for RustGraph integration.
10826    ///
10827    /// Builds a 3-layer hypergraph:
10828    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
10829    /// - Layer 2: Process Events (all process family document flows + OCPM events)
10830    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
10831    #[allow(clippy::too_many_arguments)]
10832    fn export_hypergraph(
10833        &self,
10834        coa: &Arc<ChartOfAccounts>,
10835        entries: &[JournalEntry],
10836        document_flows: &DocumentFlowSnapshot,
10837        sourcing: &SourcingSnapshot,
10838        hr: &HrSnapshot,
10839        manufacturing: &ManufacturingSnapshot,
10840        banking: &BankingSnapshot,
10841        audit: &AuditSnapshot,
10842        financial_reporting: &FinancialReportingSnapshot,
10843        ocpm: &OcpmSnapshot,
10844        compliance: &ComplianceRegulationsSnapshot,
10845        stats: &mut EnhancedGenerationStatistics,
10846    ) -> SynthResult<HypergraphExportInfo> {
10847        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
10848        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
10849        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
10850        use datasynth_graph::models::hypergraph::AggregationStrategy;
10851
10852        let hg_settings = &self.config.graph_export.hypergraph;
10853
10854        // Parse aggregation strategy from config string
10855        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
10856            "truncate" => AggregationStrategy::Truncate,
10857            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
10858            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
10859            "importance_sample" => AggregationStrategy::ImportanceSample,
10860            _ => AggregationStrategy::PoolByCounterparty,
10861        };
10862
10863        let builder_config = HypergraphConfig {
10864            max_nodes: hg_settings.max_nodes,
10865            aggregation_strategy,
10866            include_coso: hg_settings.governance_layer.include_coso,
10867            include_controls: hg_settings.governance_layer.include_controls,
10868            include_sox: hg_settings.governance_layer.include_sox,
10869            include_vendors: hg_settings.governance_layer.include_vendors,
10870            include_customers: hg_settings.governance_layer.include_customers,
10871            include_employees: hg_settings.governance_layer.include_employees,
10872            include_p2p: hg_settings.process_layer.include_p2p,
10873            include_o2c: hg_settings.process_layer.include_o2c,
10874            include_s2c: hg_settings.process_layer.include_s2c,
10875            include_h2r: hg_settings.process_layer.include_h2r,
10876            include_mfg: hg_settings.process_layer.include_mfg,
10877            include_bank: hg_settings.process_layer.include_bank,
10878            include_audit: hg_settings.process_layer.include_audit,
10879            include_r2r: hg_settings.process_layer.include_r2r,
10880            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
10881            docs_per_counterparty_threshold: hg_settings
10882                .process_layer
10883                .docs_per_counterparty_threshold,
10884            include_accounts: hg_settings.accounting_layer.include_accounts,
10885            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
10886            include_cross_layer_edges: hg_settings.cross_layer.enabled,
10887            include_compliance: self.config.compliance_regulations.enabled,
10888            include_tax: true,
10889            include_treasury: true,
10890            include_esg: true,
10891            include_project: true,
10892            include_intercompany: true,
10893            include_temporal_events: true,
10894        };
10895
10896        let mut builder = HypergraphBuilder::new(builder_config);
10897
10898        // Layer 1: Governance & Controls
10899        builder.add_coso_framework();
10900
10901        // Add controls if available (generated during JE generation)
10902        // Controls are generated per-company; we use the standard set
10903        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
10904            let controls = InternalControl::standard_controls();
10905            builder.add_controls(&controls);
10906        }
10907
10908        // Add master data
10909        builder.add_vendors(&self.master_data.vendors);
10910        builder.add_customers(&self.master_data.customers);
10911        builder.add_employees(&self.master_data.employees);
10912
10913        // Layer 2: Process Events (all process families)
10914        builder.add_p2p_documents(
10915            &document_flows.purchase_orders,
10916            &document_flows.goods_receipts,
10917            &document_flows.vendor_invoices,
10918            &document_flows.payments,
10919        );
10920        builder.add_o2c_documents(
10921            &document_flows.sales_orders,
10922            &document_flows.deliveries,
10923            &document_flows.customer_invoices,
10924        );
10925        builder.add_s2c_documents(
10926            &sourcing.sourcing_projects,
10927            &sourcing.qualifications,
10928            &sourcing.rfx_events,
10929            &sourcing.bids,
10930            &sourcing.bid_evaluations,
10931            &sourcing.contracts,
10932        );
10933        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
10934        builder.add_mfg_documents(
10935            &manufacturing.production_orders,
10936            &manufacturing.quality_inspections,
10937            &manufacturing.cycle_counts,
10938        );
10939        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
10940        builder.add_audit_documents(
10941            &audit.engagements,
10942            &audit.workpapers,
10943            &audit.findings,
10944            &audit.evidence,
10945            &audit.risk_assessments,
10946            &audit.judgments,
10947        );
10948        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
10949
10950        // OCPM events as hyperedges
10951        if let Some(ref event_log) = ocpm.event_log {
10952            builder.add_ocpm_events(event_log);
10953        }
10954
10955        // Compliance regulations as cross-layer nodes
10956        if self.config.compliance_regulations.enabled
10957            && hg_settings.governance_layer.include_controls
10958        {
10959            // Reconstruct ComplianceStandard objects from the registry
10960            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
10961            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
10962                .standard_records
10963                .iter()
10964                .filter_map(|r| {
10965                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
10966                    registry.get(&sid).cloned()
10967                })
10968                .collect();
10969
10970            builder.add_compliance_regulations(
10971                &standards,
10972                &compliance.findings,
10973                &compliance.filings,
10974            );
10975        }
10976
10977        // Layer 3: Accounting Network
10978        builder.add_accounts(coa);
10979        builder.add_journal_entries_as_hyperedges(entries);
10980
10981        // Build the hypergraph
10982        let hypergraph = builder.build();
10983
10984        // Export
10985        let output_dir = self
10986            .output_path
10987            .clone()
10988            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
10989        let hg_dir = output_dir
10990            .join(&self.config.graph_export.output_subdirectory)
10991            .join(&hg_settings.output_subdirectory);
10992
10993        // Branch on output format
10994        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
10995            "unified" => {
10996                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
10997                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
10998                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
10999                })?;
11000                (
11001                    metadata.num_nodes,
11002                    metadata.num_edges,
11003                    metadata.num_hyperedges,
11004                )
11005            }
11006            _ => {
11007                // "native" or any unrecognized format → use existing exporter
11008                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
11009                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
11010                    SynthError::generation(format!("Hypergraph export failed: {e}"))
11011                })?;
11012                (
11013                    metadata.num_nodes,
11014                    metadata.num_edges,
11015                    metadata.num_hyperedges,
11016                )
11017            }
11018        };
11019
11020        // Stream to RustGraph ingest endpoint if configured
11021        #[cfg(feature = "streaming")]
11022        if let Some(ref target_url) = hg_settings.stream_target {
11023            use crate::stream_client::{StreamClient, StreamConfig};
11024            use std::io::Write as _;
11025
11026            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
11027            let stream_config = StreamConfig {
11028                target_url: target_url.clone(),
11029                batch_size: hg_settings.stream_batch_size,
11030                api_key,
11031                ..StreamConfig::default()
11032            };
11033
11034            match StreamClient::new(stream_config) {
11035                Ok(mut client) => {
11036                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
11037                    match exporter.export_to_writer(&hypergraph, &mut client) {
11038                        Ok(_) => {
11039                            if let Err(e) = client.flush() {
11040                                warn!("Failed to flush stream client: {}", e);
11041                            } else {
11042                                info!("Streamed {} records to {}", client.total_sent(), target_url);
11043                            }
11044                        }
11045                        Err(e) => {
11046                            warn!("Streaming export failed: {}", e);
11047                        }
11048                    }
11049                }
11050                Err(e) => {
11051                    warn!("Failed to create stream client: {}", e);
11052                }
11053            }
11054        }
11055
11056        // Update stats
11057        stats.graph_node_count += num_nodes;
11058        stats.graph_edge_count += num_edges;
11059        stats.graph_export_count += 1;
11060
11061        Ok(HypergraphExportInfo {
11062            node_count: num_nodes,
11063            edge_count: num_edges,
11064            hyperedge_count: num_hyperedges,
11065            output_path: hg_dir,
11066        })
11067    }
11068
11069    /// Generate banking KYC/AML data.
11070    ///
11071    /// Creates banking customers, accounts, and transactions with AML typology injection.
11072    /// Uses the BankingOrchestrator from synth-banking crate.
11073    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
11074        let pb = self.create_progress_bar(100, "Generating Banking Data");
11075
11076        // Build the banking orchestrator from config
11077        let orchestrator = BankingOrchestratorBuilder::new()
11078            .config(self.config.banking.clone())
11079            .seed(self.seed + 9000)
11080            .country_pack(self.primary_pack().clone())
11081            .build();
11082
11083        if let Some(pb) = &pb {
11084            pb.inc(10);
11085        }
11086
11087        // Generate the banking data
11088        let result = orchestrator.generate();
11089
11090        if let Some(pb) = &pb {
11091            pb.inc(90);
11092            pb.finish_with_message(format!(
11093                "Banking: {} customers, {} transactions",
11094                result.customers.len(),
11095                result.transactions.len()
11096            ));
11097        }
11098
11099        // Cross-reference banking customers with core master data so that
11100        // banking customer names align with the enterprise customer list.
11101        // We rotate through core customers, overlaying their name and country
11102        // onto the generated banking customers where possible.
11103        let mut banking_customers = result.customers;
11104        let core_customers = &self.master_data.customers;
11105        if !core_customers.is_empty() {
11106            for (i, bc) in banking_customers.iter_mut().enumerate() {
11107                let core = &core_customers[i % core_customers.len()];
11108                bc.name = CustomerName::business(&core.name);
11109                bc.residence_country = core.country.clone();
11110                bc.enterprise_customer_id = Some(core.customer_id.clone());
11111            }
11112            debug!(
11113                "Cross-referenced {} banking customers with {} core customers",
11114                banking_customers.len(),
11115                core_customers.len()
11116            );
11117        }
11118
11119        Ok(BankingSnapshot {
11120            customers: banking_customers,
11121            accounts: result.accounts,
11122            transactions: result.transactions,
11123            transaction_labels: result.transaction_labels,
11124            customer_labels: result.customer_labels,
11125            account_labels: result.account_labels,
11126            relationship_labels: result.relationship_labels,
11127            narratives: result.narratives,
11128            suspicious_count: result.stats.suspicious_count,
11129            scenario_count: result.scenarios.len(),
11130        })
11131    }
11132
11133    /// Calculate total transactions to generate.
11134    fn calculate_total_transactions(&self) -> u64 {
11135        let months = self.config.global.period_months as f64;
11136        self.config
11137            .companies
11138            .iter()
11139            .map(|c| {
11140                let annual = c.annual_transaction_volume.count() as f64;
11141                let weighted = annual * c.volume_weight;
11142                (weighted * months / 12.0) as u64
11143            })
11144            .sum()
11145    }
11146
11147    /// Create a progress bar if progress display is enabled.
11148    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
11149        if !self.phase_config.show_progress {
11150            return None;
11151        }
11152
11153        let pb = if let Some(mp) = &self.multi_progress {
11154            mp.add(ProgressBar::new(total))
11155        } else {
11156            ProgressBar::new(total)
11157        };
11158
11159        pb.set_style(
11160            ProgressStyle::default_bar()
11161                .template(&format!(
11162                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
11163                ))
11164                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
11165                .progress_chars("#>-"),
11166        );
11167
11168        Some(pb)
11169    }
11170
11171    /// Get the generated chart of accounts.
11172    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
11173        self.coa.clone()
11174    }
11175
11176    /// Get the generated master data.
11177    pub fn get_master_data(&self) -> &MasterDataSnapshot {
11178        &self.master_data
11179    }
11180
11181    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
11182    fn phase_compliance_regulations(
11183        &mut self,
11184        _stats: &mut EnhancedGenerationStatistics,
11185    ) -> SynthResult<ComplianceRegulationsSnapshot> {
11186        if !self.phase_config.generate_compliance_regulations {
11187            return Ok(ComplianceRegulationsSnapshot::default());
11188        }
11189
11190        info!("Phase: Generating Compliance Regulations Data");
11191
11192        let cr_config = &self.config.compliance_regulations;
11193
11194        // Determine jurisdictions: from config or inferred from companies
11195        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
11196            self.config
11197                .companies
11198                .iter()
11199                .map(|c| c.country.clone())
11200                .collect::<std::collections::HashSet<_>>()
11201                .into_iter()
11202                .collect()
11203        } else {
11204            cr_config.jurisdictions.clone()
11205        };
11206
11207        // Determine reference date
11208        let fallback_date =
11209            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
11210        let reference_date = cr_config
11211            .reference_date
11212            .as_ref()
11213            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
11214            .unwrap_or_else(|| {
11215                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11216                    .unwrap_or(fallback_date)
11217            });
11218
11219        // Generate standards registry data
11220        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
11221        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
11222        let cross_reference_records = reg_gen.generate_cross_reference_records();
11223        let jurisdiction_records =
11224            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
11225
11226        info!(
11227            "  Standards: {} records, {} cross-references, {} jurisdictions",
11228            standard_records.len(),
11229            cross_reference_records.len(),
11230            jurisdiction_records.len()
11231        );
11232
11233        // Generate audit procedures (if enabled)
11234        let audit_procedures = if cr_config.audit_procedures.enabled {
11235            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
11236                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
11237                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
11238                confidence_level: cr_config.audit_procedures.confidence_level,
11239                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
11240            };
11241            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
11242                self.seed + 9000,
11243                proc_config,
11244            );
11245            let registry = reg_gen.registry();
11246            let mut all_procs = Vec::new();
11247            for jurisdiction in &jurisdictions {
11248                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
11249                all_procs.extend(procs);
11250            }
11251            info!("  Audit procedures: {}", all_procs.len());
11252            all_procs
11253        } else {
11254            Vec::new()
11255        };
11256
11257        // Generate compliance findings (if enabled)
11258        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
11259            let finding_config =
11260                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
11261                    finding_rate: cr_config.findings.finding_rate,
11262                    material_weakness_rate: cr_config.findings.material_weakness_rate,
11263                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
11264                    generate_remediation: cr_config.findings.generate_remediation,
11265                };
11266            let mut finding_gen =
11267                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
11268                    self.seed + 9100,
11269                    finding_config,
11270                );
11271            let mut all_findings = Vec::new();
11272            for company in &self.config.companies {
11273                let company_findings =
11274                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
11275                all_findings.extend(company_findings);
11276            }
11277            info!("  Compliance findings: {}", all_findings.len());
11278            all_findings
11279        } else {
11280            Vec::new()
11281        };
11282
11283        // Generate regulatory filings (if enabled)
11284        let filings = if cr_config.filings.enabled {
11285            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
11286                filing_types: cr_config.filings.filing_types.clone(),
11287                generate_status_progression: cr_config.filings.generate_status_progression,
11288            };
11289            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
11290                self.seed + 9200,
11291                filing_config,
11292            );
11293            let company_codes: Vec<String> = self
11294                .config
11295                .companies
11296                .iter()
11297                .map(|c| c.code.clone())
11298                .collect();
11299            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11300                .unwrap_or(fallback_date);
11301            let filings = filing_gen.generate_filings(
11302                &company_codes,
11303                &jurisdictions,
11304                start_date,
11305                self.config.global.period_months,
11306            );
11307            info!("  Regulatory filings: {}", filings.len());
11308            filings
11309        } else {
11310            Vec::new()
11311        };
11312
11313        // Build compliance graph (if enabled)
11314        let compliance_graph = if cr_config.graph.enabled {
11315            let graph_config = datasynth_graph::ComplianceGraphConfig {
11316                include_standard_nodes: cr_config.graph.include_compliance_nodes,
11317                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
11318                include_cross_references: cr_config.graph.include_cross_references,
11319                include_supersession_edges: cr_config.graph.include_supersession_edges,
11320                include_account_links: cr_config.graph.include_account_links,
11321                include_control_links: cr_config.graph.include_control_links,
11322                include_company_links: cr_config.graph.include_company_links,
11323            };
11324            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
11325
11326            // Add standard nodes
11327            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
11328                .iter()
11329                .map(|r| datasynth_graph::StandardNodeInput {
11330                    standard_id: r.standard_id.clone(),
11331                    title: r.title.clone(),
11332                    category: r.category.clone(),
11333                    domain: r.domain.clone(),
11334                    is_active: r.is_active,
11335                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
11336                    applicable_account_types: r.applicable_account_types.clone(),
11337                    applicable_processes: r.applicable_processes.clone(),
11338                })
11339                .collect();
11340            builder.add_standards(&standard_inputs);
11341
11342            // Add jurisdiction nodes
11343            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
11344                jurisdiction_records
11345                    .iter()
11346                    .map(|r| datasynth_graph::JurisdictionNodeInput {
11347                        country_code: r.country_code.clone(),
11348                        country_name: r.country_name.clone(),
11349                        framework: r.accounting_framework.clone(),
11350                        standard_count: r.standard_count,
11351                        tax_rate: r.statutory_tax_rate,
11352                    })
11353                    .collect();
11354            builder.add_jurisdictions(&jurisdiction_inputs);
11355
11356            // Add cross-reference edges
11357            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
11358                cross_reference_records
11359                    .iter()
11360                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
11361                        from_standard: r.from_standard.clone(),
11362                        to_standard: r.to_standard.clone(),
11363                        relationship: r.relationship.clone(),
11364                        convergence_level: r.convergence_level,
11365                    })
11366                    .collect();
11367            builder.add_cross_references(&xref_inputs);
11368
11369            // Add jurisdiction→standard mappings
11370            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
11371                .iter()
11372                .map(|r| datasynth_graph::JurisdictionMappingInput {
11373                    country_code: r.jurisdiction.clone(),
11374                    standard_id: r.standard_id.clone(),
11375                })
11376                .collect();
11377            builder.add_jurisdiction_mappings(&mapping_inputs);
11378
11379            // Add procedure nodes
11380            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
11381                .iter()
11382                .map(|p| datasynth_graph::ProcedureNodeInput {
11383                    procedure_id: p.procedure_id.clone(),
11384                    standard_id: p.standard_id.clone(),
11385                    procedure_type: p.procedure_type.clone(),
11386                    sample_size: p.sample_size,
11387                    confidence_level: p.confidence_level,
11388                })
11389                .collect();
11390            builder.add_procedures(&proc_inputs);
11391
11392            // Add finding nodes
11393            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
11394                .iter()
11395                .map(|f| datasynth_graph::FindingNodeInput {
11396                    finding_id: f.finding_id.to_string(),
11397                    standard_id: f
11398                        .related_standards
11399                        .first()
11400                        .map(|s| s.as_str().to_string())
11401                        .unwrap_or_default(),
11402                    severity: f.severity.to_string(),
11403                    deficiency_level: f.deficiency_level.to_string(),
11404                    severity_score: f.deficiency_level.severity_score(),
11405                    control_id: f.control_id.clone(),
11406                    affected_accounts: f.affected_accounts.clone(),
11407                })
11408                .collect();
11409            builder.add_findings(&finding_inputs);
11410
11411            // Cross-domain: link standards to accounts from chart of accounts
11412            if cr_config.graph.include_account_links {
11413                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
11414                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
11415                for std_record in &standard_records {
11416                    if let Some(std_obj) =
11417                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
11418                            &std_record.standard_id,
11419                        ))
11420                    {
11421                        for acct_type in &std_obj.applicable_account_types {
11422                            account_links.push(datasynth_graph::AccountLinkInput {
11423                                standard_id: std_record.standard_id.clone(),
11424                                account_code: acct_type.clone(),
11425                                account_name: acct_type.clone(),
11426                            });
11427                        }
11428                    }
11429                }
11430                builder.add_account_links(&account_links);
11431            }
11432
11433            // Cross-domain: link standards to internal controls
11434            if cr_config.graph.include_control_links {
11435                let mut control_links = Vec::new();
11436                // SOX/PCAOB standards link to all controls
11437                let sox_like_ids: Vec<String> = standard_records
11438                    .iter()
11439                    .filter(|r| {
11440                        r.standard_id.starts_with("SOX")
11441                            || r.standard_id.starts_with("PCAOB-AS-2201")
11442                    })
11443                    .map(|r| r.standard_id.clone())
11444                    .collect();
11445                // Get control IDs from config (C001-C060 standard controls)
11446                let control_ids = [
11447                    ("C001", "Cash Controls"),
11448                    ("C002", "Large Transaction Approval"),
11449                    ("C010", "PO Approval"),
11450                    ("C011", "Three-Way Match"),
11451                    ("C020", "Revenue Recognition"),
11452                    ("C021", "Credit Check"),
11453                    ("C030", "Manual JE Approval"),
11454                    ("C031", "Period Close Review"),
11455                    ("C032", "Account Reconciliation"),
11456                    ("C040", "Payroll Processing"),
11457                    ("C050", "Fixed Asset Capitalization"),
11458                    ("C060", "Intercompany Elimination"),
11459                ];
11460                for sox_id in &sox_like_ids {
11461                    for (ctrl_id, ctrl_name) in &control_ids {
11462                        control_links.push(datasynth_graph::ControlLinkInput {
11463                            standard_id: sox_id.clone(),
11464                            control_id: ctrl_id.to_string(),
11465                            control_name: ctrl_name.to_string(),
11466                        });
11467                    }
11468                }
11469                builder.add_control_links(&control_links);
11470            }
11471
11472            // Cross-domain: filing nodes with company links
11473            if cr_config.graph.include_company_links {
11474                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
11475                    .iter()
11476                    .enumerate()
11477                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
11478                        filing_id: format!("F{:04}", i + 1),
11479                        filing_type: f.filing_type.to_string(),
11480                        company_code: f.company_code.clone(),
11481                        jurisdiction: f.jurisdiction.clone(),
11482                        status: format!("{:?}", f.status),
11483                    })
11484                    .collect();
11485                builder.add_filings(&filing_inputs);
11486            }
11487
11488            let graph = builder.build();
11489            info!(
11490                "  Compliance graph: {} nodes, {} edges",
11491                graph.nodes.len(),
11492                graph.edges.len()
11493            );
11494            Some(graph)
11495        } else {
11496            None
11497        };
11498
11499        self.check_resources_with_log("post-compliance-regulations")?;
11500
11501        Ok(ComplianceRegulationsSnapshot {
11502            standard_records,
11503            cross_reference_records,
11504            jurisdiction_records,
11505            audit_procedures,
11506            findings,
11507            filings,
11508            compliance_graph,
11509        })
11510    }
11511
11512    /// Build a lineage graph describing config → phase → output relationships.
11513    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
11514        use super::lineage::LineageGraphBuilder;
11515
11516        let mut builder = LineageGraphBuilder::new();
11517
11518        // Config sections
11519        builder.add_config_section("config:global", "Global Config");
11520        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
11521        builder.add_config_section("config:transactions", "Transaction Config");
11522
11523        // Generator phases
11524        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
11525        builder.add_generator_phase("phase:je", "Journal Entry Generation");
11526
11527        // Config → phase edges
11528        builder.configured_by("phase:coa", "config:chart_of_accounts");
11529        builder.configured_by("phase:je", "config:transactions");
11530
11531        // Output files
11532        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
11533        builder.produced_by("output:je", "phase:je");
11534
11535        // Optional phases based on config
11536        if self.phase_config.generate_master_data {
11537            builder.add_config_section("config:master_data", "Master Data Config");
11538            builder.add_generator_phase("phase:master_data", "Master Data Generation");
11539            builder.configured_by("phase:master_data", "config:master_data");
11540            builder.input_to("phase:master_data", "phase:je");
11541        }
11542
11543        if self.phase_config.generate_document_flows {
11544            builder.add_config_section("config:document_flows", "Document Flow Config");
11545            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
11546            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
11547            builder.configured_by("phase:p2p", "config:document_flows");
11548            builder.configured_by("phase:o2c", "config:document_flows");
11549
11550            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
11551            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
11552            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
11553            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
11554            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
11555
11556            builder.produced_by("output:po", "phase:p2p");
11557            builder.produced_by("output:gr", "phase:p2p");
11558            builder.produced_by("output:vi", "phase:p2p");
11559            builder.produced_by("output:so", "phase:o2c");
11560            builder.produced_by("output:ci", "phase:o2c");
11561        }
11562
11563        if self.phase_config.inject_anomalies {
11564            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
11565            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
11566            builder.configured_by("phase:anomaly", "config:fraud");
11567            builder.add_output_file(
11568                "output:labels",
11569                "Anomaly Labels",
11570                "labels/anomaly_labels.csv",
11571            );
11572            builder.produced_by("output:labels", "phase:anomaly");
11573        }
11574
11575        if self.phase_config.generate_audit {
11576            builder.add_config_section("config:audit", "Audit Config");
11577            builder.add_generator_phase("phase:audit", "Audit Data Generation");
11578            builder.configured_by("phase:audit", "config:audit");
11579        }
11580
11581        if self.phase_config.generate_banking {
11582            builder.add_config_section("config:banking", "Banking Config");
11583            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
11584            builder.configured_by("phase:banking", "config:banking");
11585        }
11586
11587        if self.config.llm.enabled {
11588            builder.add_config_section("config:llm", "LLM Enrichment Config");
11589            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
11590            builder.configured_by("phase:llm_enrichment", "config:llm");
11591        }
11592
11593        if self.config.diffusion.enabled {
11594            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
11595            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
11596            builder.configured_by("phase:diffusion", "config:diffusion");
11597        }
11598
11599        if self.config.causal.enabled {
11600            builder.add_config_section("config:causal", "Causal Generation Config");
11601            builder.add_generator_phase("phase:causal", "Causal Overlay");
11602            builder.configured_by("phase:causal", "config:causal");
11603        }
11604
11605        builder.build()
11606    }
11607
11608    // -----------------------------------------------------------------------
11609    // Trial-balance helpers used to replace hardcoded proxy values
11610    // -----------------------------------------------------------------------
11611
11612    /// Compute total revenue for a company from its journal entries.
11613    ///
11614    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
11615    /// net credits on all revenue-account lines filtered to `company_code`.
11616    fn compute_company_revenue(
11617        entries: &[JournalEntry],
11618        company_code: &str,
11619    ) -> rust_decimal::Decimal {
11620        use rust_decimal::Decimal;
11621        let mut revenue = Decimal::ZERO;
11622        for je in entries {
11623            if je.header.company_code != company_code {
11624                continue;
11625            }
11626            for line in &je.lines {
11627                if line.gl_account.starts_with('4') {
11628                    // Revenue is credit-normal
11629                    revenue += line.credit_amount - line.debit_amount;
11630                }
11631            }
11632        }
11633        revenue.max(Decimal::ZERO)
11634    }
11635
11636    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
11637    ///
11638    /// Asset accounts start with "1"; liability accounts start with "2".
11639    fn compute_entity_net_assets(
11640        entries: &[JournalEntry],
11641        entity_code: &str,
11642    ) -> rust_decimal::Decimal {
11643        use rust_decimal::Decimal;
11644        let mut asset_net = Decimal::ZERO;
11645        let mut liability_net = Decimal::ZERO;
11646        for je in entries {
11647            if je.header.company_code != entity_code {
11648                continue;
11649            }
11650            for line in &je.lines {
11651                if line.gl_account.starts_with('1') {
11652                    asset_net += line.debit_amount - line.credit_amount;
11653                } else if line.gl_account.starts_with('2') {
11654                    liability_net += line.credit_amount - line.debit_amount;
11655                }
11656            }
11657        }
11658        asset_net - liability_net
11659    }
11660}
11661
11662/// Get the directory name for a graph export format.
11663fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
11664    match format {
11665        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
11666        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
11667        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
11668        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
11669        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
11670    }
11671}
11672
11673#[cfg(test)]
11674#[allow(clippy::unwrap_used)]
11675mod tests {
11676    use super::*;
11677    use datasynth_config::schema::*;
11678
11679    fn create_test_config() -> GeneratorConfig {
11680        GeneratorConfig {
11681            global: GlobalConfig {
11682                industry: IndustrySector::Manufacturing,
11683                start_date: "2024-01-01".to_string(),
11684                period_months: 1,
11685                seed: Some(42),
11686                parallel: false,
11687                group_currency: "USD".to_string(),
11688                presentation_currency: None,
11689                worker_threads: 0,
11690                memory_limit_mb: 0,
11691                fiscal_year_months: None,
11692            },
11693            companies: vec![CompanyConfig {
11694                code: "1000".to_string(),
11695                name: "Test Company".to_string(),
11696                currency: "USD".to_string(),
11697                functional_currency: None,
11698                country: "US".to_string(),
11699                annual_transaction_volume: TransactionVolume::TenK,
11700                volume_weight: 1.0,
11701                fiscal_year_variant: "K4".to_string(),
11702            }],
11703            chart_of_accounts: ChartOfAccountsConfig {
11704                complexity: CoAComplexity::Small,
11705                industry_specific: true,
11706                custom_accounts: None,
11707                min_hierarchy_depth: 2,
11708                max_hierarchy_depth: 4,
11709            },
11710            transactions: TransactionConfig::default(),
11711            output: OutputConfig::default(),
11712            fraud: FraudConfig::default(),
11713            internal_controls: InternalControlsConfig::default(),
11714            business_processes: BusinessProcessConfig::default(),
11715            user_personas: UserPersonaConfig::default(),
11716            templates: TemplateConfig::default(),
11717            approval: ApprovalConfig::default(),
11718            departments: DepartmentConfig::default(),
11719            master_data: MasterDataConfig::default(),
11720            document_flows: DocumentFlowConfig::default(),
11721            intercompany: IntercompanyConfig::default(),
11722            balance: BalanceConfig::default(),
11723            ocpm: OcpmConfig::default(),
11724            audit: AuditGenerationConfig::default(),
11725            banking: datasynth_banking::BankingConfig::default(),
11726            data_quality: DataQualitySchemaConfig::default(),
11727            scenario: ScenarioConfig::default(),
11728            temporal: TemporalDriftConfig::default(),
11729            graph_export: GraphExportConfig::default(),
11730            streaming: StreamingSchemaConfig::default(),
11731            rate_limit: RateLimitSchemaConfig::default(),
11732            temporal_attributes: TemporalAttributeSchemaConfig::default(),
11733            relationships: RelationshipSchemaConfig::default(),
11734            accounting_standards: AccountingStandardsConfig::default(),
11735            audit_standards: AuditStandardsConfig::default(),
11736            distributions: Default::default(),
11737            temporal_patterns: Default::default(),
11738            vendor_network: VendorNetworkSchemaConfig::default(),
11739            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
11740            relationship_strength: RelationshipStrengthSchemaConfig::default(),
11741            cross_process_links: CrossProcessLinksSchemaConfig::default(),
11742            organizational_events: OrganizationalEventsSchemaConfig::default(),
11743            behavioral_drift: BehavioralDriftSchemaConfig::default(),
11744            market_drift: MarketDriftSchemaConfig::default(),
11745            drift_labeling: DriftLabelingSchemaConfig::default(),
11746            anomaly_injection: Default::default(),
11747            industry_specific: Default::default(),
11748            fingerprint_privacy: Default::default(),
11749            quality_gates: Default::default(),
11750            compliance: Default::default(),
11751            webhooks: Default::default(),
11752            llm: Default::default(),
11753            diffusion: Default::default(),
11754            causal: Default::default(),
11755            source_to_pay: Default::default(),
11756            financial_reporting: Default::default(),
11757            hr: Default::default(),
11758            manufacturing: Default::default(),
11759            sales_quotes: Default::default(),
11760            tax: Default::default(),
11761            treasury: Default::default(),
11762            project_accounting: Default::default(),
11763            esg: Default::default(),
11764            country_packs: None,
11765            scenarios: Default::default(),
11766            session: Default::default(),
11767            compliance_regulations: Default::default(),
11768        }
11769    }
11770
11771    #[test]
11772    fn test_enhanced_orchestrator_creation() {
11773        let config = create_test_config();
11774        let orchestrator = EnhancedOrchestrator::with_defaults(config);
11775        assert!(orchestrator.is_ok());
11776    }
11777
11778    #[test]
11779    fn test_minimal_generation() {
11780        let config = create_test_config();
11781        let phase_config = PhaseConfig {
11782            generate_master_data: false,
11783            generate_document_flows: false,
11784            generate_journal_entries: true,
11785            inject_anomalies: false,
11786            show_progress: false,
11787            ..Default::default()
11788        };
11789
11790        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11791        let result = orchestrator.generate();
11792
11793        assert!(result.is_ok());
11794        let result = result.unwrap();
11795        assert!(!result.journal_entries.is_empty());
11796    }
11797
11798    #[test]
11799    fn test_master_data_generation() {
11800        let config = create_test_config();
11801        let phase_config = PhaseConfig {
11802            generate_master_data: true,
11803            generate_document_flows: false,
11804            generate_journal_entries: false,
11805            inject_anomalies: false,
11806            show_progress: false,
11807            vendors_per_company: 5,
11808            customers_per_company: 5,
11809            materials_per_company: 10,
11810            assets_per_company: 5,
11811            employees_per_company: 10,
11812            ..Default::default()
11813        };
11814
11815        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11816        let result = orchestrator.generate().unwrap();
11817
11818        assert!(!result.master_data.vendors.is_empty());
11819        assert!(!result.master_data.customers.is_empty());
11820        assert!(!result.master_data.materials.is_empty());
11821    }
11822
11823    #[test]
11824    fn test_document_flow_generation() {
11825        let config = create_test_config();
11826        let phase_config = PhaseConfig {
11827            generate_master_data: true,
11828            generate_document_flows: true,
11829            generate_journal_entries: false,
11830            inject_anomalies: false,
11831            inject_data_quality: false,
11832            validate_balances: false,
11833            generate_ocpm_events: false,
11834            show_progress: false,
11835            vendors_per_company: 5,
11836            customers_per_company: 5,
11837            materials_per_company: 10,
11838            assets_per_company: 5,
11839            employees_per_company: 10,
11840            p2p_chains: 5,
11841            o2c_chains: 5,
11842            ..Default::default()
11843        };
11844
11845        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11846        let result = orchestrator.generate().unwrap();
11847
11848        // Should have generated P2P and O2C chains
11849        assert!(!result.document_flows.p2p_chains.is_empty());
11850        assert!(!result.document_flows.o2c_chains.is_empty());
11851
11852        // Flattened documents should be populated
11853        assert!(!result.document_flows.purchase_orders.is_empty());
11854        assert!(!result.document_flows.sales_orders.is_empty());
11855    }
11856
11857    #[test]
11858    fn test_anomaly_injection() {
11859        let config = create_test_config();
11860        let phase_config = PhaseConfig {
11861            generate_master_data: false,
11862            generate_document_flows: false,
11863            generate_journal_entries: true,
11864            inject_anomalies: true,
11865            show_progress: false,
11866            ..Default::default()
11867        };
11868
11869        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11870        let result = orchestrator.generate().unwrap();
11871
11872        // Should have journal entries
11873        assert!(!result.journal_entries.is_empty());
11874
11875        // With ~833 entries and 2% rate, expect some anomalies
11876        // Note: This is probabilistic, so we just verify the structure exists
11877        assert!(result.anomaly_labels.summary.is_some());
11878    }
11879
11880    #[test]
11881    fn test_full_generation_pipeline() {
11882        let config = create_test_config();
11883        let phase_config = PhaseConfig {
11884            generate_master_data: true,
11885            generate_document_flows: true,
11886            generate_journal_entries: true,
11887            inject_anomalies: false,
11888            inject_data_quality: false,
11889            validate_balances: true,
11890            generate_ocpm_events: false,
11891            show_progress: false,
11892            vendors_per_company: 3,
11893            customers_per_company: 3,
11894            materials_per_company: 5,
11895            assets_per_company: 3,
11896            employees_per_company: 5,
11897            p2p_chains: 3,
11898            o2c_chains: 3,
11899            ..Default::default()
11900        };
11901
11902        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11903        let result = orchestrator.generate().unwrap();
11904
11905        // All phases should have results
11906        assert!(!result.master_data.vendors.is_empty());
11907        assert!(!result.master_data.customers.is_empty());
11908        assert!(!result.document_flows.p2p_chains.is_empty());
11909        assert!(!result.document_flows.o2c_chains.is_empty());
11910        assert!(!result.journal_entries.is_empty());
11911        assert!(result.statistics.accounts_count > 0);
11912
11913        // Subledger linking should have run
11914        assert!(!result.subledger.ap_invoices.is_empty());
11915        assert!(!result.subledger.ar_invoices.is_empty());
11916
11917        // Balance validation should have run
11918        assert!(result.balance_validation.validated);
11919        assert!(result.balance_validation.entries_processed > 0);
11920    }
11921
11922    #[test]
11923    fn test_subledger_linking() {
11924        let config = create_test_config();
11925        let phase_config = PhaseConfig {
11926            generate_master_data: true,
11927            generate_document_flows: true,
11928            generate_journal_entries: false,
11929            inject_anomalies: false,
11930            inject_data_quality: false,
11931            validate_balances: false,
11932            generate_ocpm_events: false,
11933            show_progress: false,
11934            vendors_per_company: 5,
11935            customers_per_company: 5,
11936            materials_per_company: 10,
11937            assets_per_company: 3,
11938            employees_per_company: 5,
11939            p2p_chains: 5,
11940            o2c_chains: 5,
11941            ..Default::default()
11942        };
11943
11944        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11945        let result = orchestrator.generate().unwrap();
11946
11947        // Should have document flows
11948        assert!(!result.document_flows.vendor_invoices.is_empty());
11949        assert!(!result.document_flows.customer_invoices.is_empty());
11950
11951        // Subledger should be linked from document flows
11952        assert!(!result.subledger.ap_invoices.is_empty());
11953        assert!(!result.subledger.ar_invoices.is_empty());
11954
11955        // AP invoices count should match vendor invoices count
11956        assert_eq!(
11957            result.subledger.ap_invoices.len(),
11958            result.document_flows.vendor_invoices.len()
11959        );
11960
11961        // AR invoices count should match customer invoices count
11962        assert_eq!(
11963            result.subledger.ar_invoices.len(),
11964            result.document_flows.customer_invoices.len()
11965        );
11966
11967        // Statistics should reflect subledger counts
11968        assert_eq!(
11969            result.statistics.ap_invoice_count,
11970            result.subledger.ap_invoices.len()
11971        );
11972        assert_eq!(
11973            result.statistics.ar_invoice_count,
11974            result.subledger.ar_invoices.len()
11975        );
11976    }
11977
11978    #[test]
11979    fn test_balance_validation() {
11980        let config = create_test_config();
11981        let phase_config = PhaseConfig {
11982            generate_master_data: false,
11983            generate_document_flows: false,
11984            generate_journal_entries: true,
11985            inject_anomalies: false,
11986            validate_balances: true,
11987            show_progress: false,
11988            ..Default::default()
11989        };
11990
11991        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
11992        let result = orchestrator.generate().unwrap();
11993
11994        // Balance validation should run
11995        assert!(result.balance_validation.validated);
11996        assert!(result.balance_validation.entries_processed > 0);
11997
11998        // Generated JEs should be balanced (no unbalanced entries)
11999        assert!(!result.balance_validation.has_unbalanced_entries);
12000
12001        // Total debits should equal total credits
12002        assert_eq!(
12003            result.balance_validation.total_debits,
12004            result.balance_validation.total_credits
12005        );
12006    }
12007
12008    #[test]
12009    fn test_statistics_accuracy() {
12010        let config = create_test_config();
12011        let phase_config = PhaseConfig {
12012            generate_master_data: true,
12013            generate_document_flows: false,
12014            generate_journal_entries: true,
12015            inject_anomalies: false,
12016            show_progress: false,
12017            vendors_per_company: 10,
12018            customers_per_company: 20,
12019            materials_per_company: 15,
12020            assets_per_company: 5,
12021            employees_per_company: 8,
12022            ..Default::default()
12023        };
12024
12025        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12026        let result = orchestrator.generate().unwrap();
12027
12028        // Statistics should match actual data
12029        assert_eq!(
12030            result.statistics.vendor_count,
12031            result.master_data.vendors.len()
12032        );
12033        assert_eq!(
12034            result.statistics.customer_count,
12035            result.master_data.customers.len()
12036        );
12037        assert_eq!(
12038            result.statistics.material_count,
12039            result.master_data.materials.len()
12040        );
12041        assert_eq!(
12042            result.statistics.total_entries as usize,
12043            result.journal_entries.len()
12044        );
12045    }
12046
12047    #[test]
12048    fn test_phase_config_defaults() {
12049        let config = PhaseConfig::default();
12050        assert!(config.generate_master_data);
12051        assert!(config.generate_document_flows);
12052        assert!(config.generate_journal_entries);
12053        assert!(!config.inject_anomalies);
12054        assert!(config.validate_balances);
12055        assert!(config.show_progress);
12056        assert!(config.vendors_per_company > 0);
12057        assert!(config.customers_per_company > 0);
12058    }
12059
12060    #[test]
12061    fn test_get_coa_before_generation() {
12062        let config = create_test_config();
12063        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
12064
12065        // Before generation, CoA should be None
12066        assert!(orchestrator.get_coa().is_none());
12067    }
12068
12069    #[test]
12070    fn test_get_coa_after_generation() {
12071        let config = create_test_config();
12072        let phase_config = PhaseConfig {
12073            generate_master_data: false,
12074            generate_document_flows: false,
12075            generate_journal_entries: true,
12076            inject_anomalies: false,
12077            show_progress: false,
12078            ..Default::default()
12079        };
12080
12081        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12082        let _ = orchestrator.generate().unwrap();
12083
12084        // After generation, CoA should be available
12085        assert!(orchestrator.get_coa().is_some());
12086    }
12087
12088    #[test]
12089    fn test_get_master_data() {
12090        let config = create_test_config();
12091        let phase_config = PhaseConfig {
12092            generate_master_data: true,
12093            generate_document_flows: false,
12094            generate_journal_entries: false,
12095            inject_anomalies: false,
12096            show_progress: false,
12097            vendors_per_company: 5,
12098            customers_per_company: 5,
12099            materials_per_company: 5,
12100            assets_per_company: 5,
12101            employees_per_company: 5,
12102            ..Default::default()
12103        };
12104
12105        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12106        let result = orchestrator.generate().unwrap();
12107
12108        // After generate(), master_data is moved into the result
12109        assert!(!result.master_data.vendors.is_empty());
12110    }
12111
12112    #[test]
12113    fn test_with_progress_builder() {
12114        let config = create_test_config();
12115        let orchestrator = EnhancedOrchestrator::with_defaults(config)
12116            .unwrap()
12117            .with_progress(false);
12118
12119        // Should still work without progress
12120        assert!(!orchestrator.phase_config.show_progress);
12121    }
12122
12123    #[test]
12124    fn test_multi_company_generation() {
12125        let mut config = create_test_config();
12126        config.companies.push(CompanyConfig {
12127            code: "2000".to_string(),
12128            name: "Subsidiary".to_string(),
12129            currency: "EUR".to_string(),
12130            functional_currency: None,
12131            country: "DE".to_string(),
12132            annual_transaction_volume: TransactionVolume::TenK,
12133            volume_weight: 0.5,
12134            fiscal_year_variant: "K4".to_string(),
12135        });
12136
12137        let phase_config = PhaseConfig {
12138            generate_master_data: true,
12139            generate_document_flows: false,
12140            generate_journal_entries: true,
12141            inject_anomalies: false,
12142            show_progress: false,
12143            vendors_per_company: 5,
12144            customers_per_company: 5,
12145            materials_per_company: 5,
12146            assets_per_company: 5,
12147            employees_per_company: 5,
12148            ..Default::default()
12149        };
12150
12151        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12152        let result = orchestrator.generate().unwrap();
12153
12154        // Should have master data for both companies
12155        assert!(result.statistics.vendor_count >= 10); // 5 per company
12156        assert!(result.statistics.customer_count >= 10);
12157        assert!(result.statistics.companies_count == 2);
12158    }
12159
12160    #[test]
12161    fn test_empty_master_data_skips_document_flows() {
12162        let config = create_test_config();
12163        let phase_config = PhaseConfig {
12164            generate_master_data: false,   // Skip master data
12165            generate_document_flows: true, // Try to generate flows
12166            generate_journal_entries: false,
12167            inject_anomalies: false,
12168            show_progress: false,
12169            ..Default::default()
12170        };
12171
12172        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12173        let result = orchestrator.generate().unwrap();
12174
12175        // Without master data, document flows should be empty
12176        assert!(result.document_flows.p2p_chains.is_empty());
12177        assert!(result.document_flows.o2c_chains.is_empty());
12178    }
12179
12180    #[test]
12181    fn test_journal_entry_line_item_count() {
12182        let config = create_test_config();
12183        let phase_config = PhaseConfig {
12184            generate_master_data: false,
12185            generate_document_flows: false,
12186            generate_journal_entries: true,
12187            inject_anomalies: false,
12188            show_progress: false,
12189            ..Default::default()
12190        };
12191
12192        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12193        let result = orchestrator.generate().unwrap();
12194
12195        // Total line items should match sum of all entry line counts
12196        let calculated_line_items: u64 = result
12197            .journal_entries
12198            .iter()
12199            .map(|e| e.line_count() as u64)
12200            .sum();
12201        assert_eq!(result.statistics.total_line_items, calculated_line_items);
12202    }
12203
12204    #[test]
12205    fn test_audit_generation() {
12206        let config = create_test_config();
12207        let phase_config = PhaseConfig {
12208            generate_master_data: false,
12209            generate_document_flows: false,
12210            generate_journal_entries: true,
12211            inject_anomalies: false,
12212            show_progress: false,
12213            generate_audit: true,
12214            audit_engagements: 2,
12215            workpapers_per_engagement: 5,
12216            evidence_per_workpaper: 2,
12217            risks_per_engagement: 3,
12218            findings_per_engagement: 2,
12219            judgments_per_engagement: 2,
12220            ..Default::default()
12221        };
12222
12223        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12224        let result = orchestrator.generate().unwrap();
12225
12226        // Should have generated audit data
12227        assert_eq!(result.audit.engagements.len(), 2);
12228        assert!(!result.audit.workpapers.is_empty());
12229        assert!(!result.audit.evidence.is_empty());
12230        assert!(!result.audit.risk_assessments.is_empty());
12231        assert!(!result.audit.findings.is_empty());
12232        assert!(!result.audit.judgments.is_empty());
12233
12234        // New ISA entity collections should also be populated
12235        assert!(
12236            !result.audit.confirmations.is_empty(),
12237            "ISA 505 confirmations should be generated"
12238        );
12239        assert!(
12240            !result.audit.confirmation_responses.is_empty(),
12241            "ISA 505 confirmation responses should be generated"
12242        );
12243        assert!(
12244            !result.audit.procedure_steps.is_empty(),
12245            "ISA 330 procedure steps should be generated"
12246        );
12247        // Samples may or may not be generated depending on workpaper sampling methods
12248        assert!(
12249            !result.audit.analytical_results.is_empty(),
12250            "ISA 520 analytical procedures should be generated"
12251        );
12252        assert!(
12253            !result.audit.ia_functions.is_empty(),
12254            "ISA 610 IA functions should be generated (one per engagement)"
12255        );
12256        assert!(
12257            !result.audit.related_parties.is_empty(),
12258            "ISA 550 related parties should be generated"
12259        );
12260
12261        // Statistics should match
12262        assert_eq!(
12263            result.statistics.audit_engagement_count,
12264            result.audit.engagements.len()
12265        );
12266        assert_eq!(
12267            result.statistics.audit_workpaper_count,
12268            result.audit.workpapers.len()
12269        );
12270        assert_eq!(
12271            result.statistics.audit_evidence_count,
12272            result.audit.evidence.len()
12273        );
12274        assert_eq!(
12275            result.statistics.audit_risk_count,
12276            result.audit.risk_assessments.len()
12277        );
12278        assert_eq!(
12279            result.statistics.audit_finding_count,
12280            result.audit.findings.len()
12281        );
12282        assert_eq!(
12283            result.statistics.audit_judgment_count,
12284            result.audit.judgments.len()
12285        );
12286        assert_eq!(
12287            result.statistics.audit_confirmation_count,
12288            result.audit.confirmations.len()
12289        );
12290        assert_eq!(
12291            result.statistics.audit_confirmation_response_count,
12292            result.audit.confirmation_responses.len()
12293        );
12294        assert_eq!(
12295            result.statistics.audit_procedure_step_count,
12296            result.audit.procedure_steps.len()
12297        );
12298        assert_eq!(
12299            result.statistics.audit_sample_count,
12300            result.audit.samples.len()
12301        );
12302        assert_eq!(
12303            result.statistics.audit_analytical_result_count,
12304            result.audit.analytical_results.len()
12305        );
12306        assert_eq!(
12307            result.statistics.audit_ia_function_count,
12308            result.audit.ia_functions.len()
12309        );
12310        assert_eq!(
12311            result.statistics.audit_ia_report_count,
12312            result.audit.ia_reports.len()
12313        );
12314        assert_eq!(
12315            result.statistics.audit_related_party_count,
12316            result.audit.related_parties.len()
12317        );
12318        assert_eq!(
12319            result.statistics.audit_related_party_transaction_count,
12320            result.audit.related_party_transactions.len()
12321        );
12322    }
12323
12324    #[test]
12325    fn test_new_phases_disabled_by_default() {
12326        let config = create_test_config();
12327        // Verify new config fields default to disabled
12328        assert!(!config.llm.enabled);
12329        assert!(!config.diffusion.enabled);
12330        assert!(!config.causal.enabled);
12331
12332        let phase_config = PhaseConfig {
12333            generate_master_data: false,
12334            generate_document_flows: false,
12335            generate_journal_entries: true,
12336            inject_anomalies: false,
12337            show_progress: false,
12338            ..Default::default()
12339        };
12340
12341        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12342        let result = orchestrator.generate().unwrap();
12343
12344        // All new phase statistics should be zero when disabled
12345        assert_eq!(result.statistics.llm_enrichment_ms, 0);
12346        assert_eq!(result.statistics.llm_vendors_enriched, 0);
12347        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
12348        assert_eq!(result.statistics.diffusion_samples_generated, 0);
12349        assert_eq!(result.statistics.causal_generation_ms, 0);
12350        assert_eq!(result.statistics.causal_samples_generated, 0);
12351        assert!(result.statistics.causal_validation_passed.is_none());
12352        assert_eq!(result.statistics.counterfactual_pair_count, 0);
12353        assert!(result.counterfactual_pairs.is_empty());
12354    }
12355
12356    #[test]
12357    fn test_counterfactual_generation_enabled() {
12358        let config = create_test_config();
12359        let phase_config = PhaseConfig {
12360            generate_master_data: false,
12361            generate_document_flows: false,
12362            generate_journal_entries: true,
12363            inject_anomalies: false,
12364            show_progress: false,
12365            generate_counterfactuals: true,
12366            generate_period_close: false, // Disable so entry count matches counterfactual pairs
12367            ..Default::default()
12368        };
12369
12370        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12371        let result = orchestrator.generate().unwrap();
12372
12373        // With JE generation enabled, counterfactual pairs should be generated
12374        if !result.journal_entries.is_empty() {
12375            assert_eq!(
12376                result.counterfactual_pairs.len(),
12377                result.journal_entries.len()
12378            );
12379            assert_eq!(
12380                result.statistics.counterfactual_pair_count,
12381                result.journal_entries.len()
12382            );
12383            // Each pair should have a distinct pair_id
12384            let ids: std::collections::HashSet<_> = result
12385                .counterfactual_pairs
12386                .iter()
12387                .map(|p| p.pair_id.clone())
12388                .collect();
12389            assert_eq!(ids.len(), result.counterfactual_pairs.len());
12390        }
12391    }
12392
12393    #[test]
12394    fn test_llm_enrichment_enabled() {
12395        let mut config = create_test_config();
12396        config.llm.enabled = true;
12397        config.llm.max_vendor_enrichments = 3;
12398
12399        let phase_config = PhaseConfig {
12400            generate_master_data: true,
12401            generate_document_flows: false,
12402            generate_journal_entries: false,
12403            inject_anomalies: false,
12404            show_progress: false,
12405            vendors_per_company: 5,
12406            customers_per_company: 3,
12407            materials_per_company: 3,
12408            assets_per_company: 3,
12409            employees_per_company: 3,
12410            ..Default::default()
12411        };
12412
12413        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12414        let result = orchestrator.generate().unwrap();
12415
12416        // LLM enrichment should have run
12417        assert!(result.statistics.llm_vendors_enriched > 0);
12418        assert!(result.statistics.llm_vendors_enriched <= 3);
12419    }
12420
12421    #[test]
12422    fn test_diffusion_enhancement_enabled() {
12423        let mut config = create_test_config();
12424        config.diffusion.enabled = true;
12425        config.diffusion.n_steps = 50;
12426        config.diffusion.sample_size = 20;
12427
12428        let phase_config = PhaseConfig {
12429            generate_master_data: false,
12430            generate_document_flows: false,
12431            generate_journal_entries: true,
12432            inject_anomalies: false,
12433            show_progress: false,
12434            ..Default::default()
12435        };
12436
12437        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12438        let result = orchestrator.generate().unwrap();
12439
12440        // Diffusion phase should have generated samples
12441        assert_eq!(result.statistics.diffusion_samples_generated, 20);
12442    }
12443
12444    #[test]
12445    fn test_causal_overlay_enabled() {
12446        let mut config = create_test_config();
12447        config.causal.enabled = true;
12448        config.causal.template = "fraud_detection".to_string();
12449        config.causal.sample_size = 100;
12450        config.causal.validate = true;
12451
12452        let phase_config = PhaseConfig {
12453            generate_master_data: false,
12454            generate_document_flows: false,
12455            generate_journal_entries: true,
12456            inject_anomalies: false,
12457            show_progress: false,
12458            ..Default::default()
12459        };
12460
12461        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12462        let result = orchestrator.generate().unwrap();
12463
12464        // Causal phase should have generated samples
12465        assert_eq!(result.statistics.causal_samples_generated, 100);
12466        // Validation should have run
12467        assert!(result.statistics.causal_validation_passed.is_some());
12468    }
12469
12470    #[test]
12471    fn test_causal_overlay_revenue_cycle_template() {
12472        let mut config = create_test_config();
12473        config.causal.enabled = true;
12474        config.causal.template = "revenue_cycle".to_string();
12475        config.causal.sample_size = 50;
12476        config.causal.validate = false;
12477
12478        let phase_config = PhaseConfig {
12479            generate_master_data: false,
12480            generate_document_flows: false,
12481            generate_journal_entries: true,
12482            inject_anomalies: false,
12483            show_progress: false,
12484            ..Default::default()
12485        };
12486
12487        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12488        let result = orchestrator.generate().unwrap();
12489
12490        // Causal phase should have generated samples
12491        assert_eq!(result.statistics.causal_samples_generated, 50);
12492        // Validation was disabled
12493        assert!(result.statistics.causal_validation_passed.is_none());
12494    }
12495
12496    #[test]
12497    fn test_all_new_phases_enabled_together() {
12498        let mut config = create_test_config();
12499        config.llm.enabled = true;
12500        config.llm.max_vendor_enrichments = 2;
12501        config.diffusion.enabled = true;
12502        config.diffusion.n_steps = 20;
12503        config.diffusion.sample_size = 10;
12504        config.causal.enabled = true;
12505        config.causal.sample_size = 50;
12506        config.causal.validate = true;
12507
12508        let phase_config = PhaseConfig {
12509            generate_master_data: true,
12510            generate_document_flows: false,
12511            generate_journal_entries: true,
12512            inject_anomalies: false,
12513            show_progress: false,
12514            vendors_per_company: 5,
12515            customers_per_company: 3,
12516            materials_per_company: 3,
12517            assets_per_company: 3,
12518            employees_per_company: 3,
12519            ..Default::default()
12520        };
12521
12522        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
12523        let result = orchestrator.generate().unwrap();
12524
12525        // All three phases should have run
12526        assert!(result.statistics.llm_vendors_enriched > 0);
12527        assert_eq!(result.statistics.diffusion_samples_generated, 10);
12528        assert_eq!(result.statistics.causal_samples_generated, 50);
12529        assert!(result.statistics.causal_validation_passed.is_some());
12530    }
12531
12532    #[test]
12533    fn test_statistics_serialization_with_new_fields() {
12534        let stats = EnhancedGenerationStatistics {
12535            total_entries: 100,
12536            total_line_items: 500,
12537            llm_enrichment_ms: 42,
12538            llm_vendors_enriched: 10,
12539            diffusion_enhancement_ms: 100,
12540            diffusion_samples_generated: 50,
12541            causal_generation_ms: 200,
12542            causal_samples_generated: 100,
12543            causal_validation_passed: Some(true),
12544            ..Default::default()
12545        };
12546
12547        let json = serde_json::to_string(&stats).unwrap();
12548        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
12549
12550        assert_eq!(deserialized.llm_enrichment_ms, 42);
12551        assert_eq!(deserialized.llm_vendors_enriched, 10);
12552        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
12553        assert_eq!(deserialized.diffusion_samples_generated, 50);
12554        assert_eq!(deserialized.causal_generation_ms, 200);
12555        assert_eq!(deserialized.causal_samples_generated, 100);
12556        assert_eq!(deserialized.causal_validation_passed, Some(true));
12557    }
12558
12559    #[test]
12560    fn test_statistics_backward_compat_deserialization() {
12561        // Old JSON without the new fields should still deserialize
12562        let old_json = r#"{
12563            "total_entries": 100,
12564            "total_line_items": 500,
12565            "accounts_count": 50,
12566            "companies_count": 1,
12567            "period_months": 12,
12568            "vendor_count": 10,
12569            "customer_count": 20,
12570            "material_count": 15,
12571            "asset_count": 5,
12572            "employee_count": 8,
12573            "p2p_chain_count": 5,
12574            "o2c_chain_count": 5,
12575            "ap_invoice_count": 5,
12576            "ar_invoice_count": 5,
12577            "ocpm_event_count": 0,
12578            "ocpm_object_count": 0,
12579            "ocpm_case_count": 0,
12580            "audit_engagement_count": 0,
12581            "audit_workpaper_count": 0,
12582            "audit_evidence_count": 0,
12583            "audit_risk_count": 0,
12584            "audit_finding_count": 0,
12585            "audit_judgment_count": 0,
12586            "anomalies_injected": 0,
12587            "data_quality_issues": 0,
12588            "banking_customer_count": 0,
12589            "banking_account_count": 0,
12590            "banking_transaction_count": 0,
12591            "banking_suspicious_count": 0,
12592            "graph_export_count": 0,
12593            "graph_node_count": 0,
12594            "graph_edge_count": 0
12595        }"#;
12596
12597        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
12598
12599        // New fields should default to 0 / None
12600        assert_eq!(stats.llm_enrichment_ms, 0);
12601        assert_eq!(stats.llm_vendors_enriched, 0);
12602        assert_eq!(stats.diffusion_enhancement_ms, 0);
12603        assert_eq!(stats.diffusion_samples_generated, 0);
12604        assert_eq!(stats.causal_generation_ms, 0);
12605        assert_eq!(stats.causal_samples_generated, 0);
12606        assert!(stats.causal_validation_passed.is_none());
12607    }
12608}