Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1264    /// `None` when the neural/hybrid backend is not active.
1265    #[serde(default, skip_serializing_if = "Option::is_none")]
1266    pub neural_hybrid_weight: Option<f64>,
1267    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1268    #[serde(default, skip_serializing_if = "Option::is_none")]
1269    pub neural_hybrid_strategy: Option<String>,
1270    /// How many columns were routed through the neural backend.
1271    #[serde(default, skip_serializing_if = "Option::is_none")]
1272    pub neural_routed_column_count: Option<usize>,
1273    /// Causal generation timing (milliseconds).
1274    #[serde(default)]
1275    pub causal_generation_ms: u64,
1276    /// Number of causal samples generated.
1277    #[serde(default)]
1278    pub causal_samples_generated: usize,
1279    /// Whether causal validation passed.
1280    #[serde(default)]
1281    pub causal_validation_passed: Option<bool>,
1282    /// S2C sourcing counts.
1283    #[serde(default)]
1284    pub sourcing_project_count: usize,
1285    #[serde(default)]
1286    pub rfx_event_count: usize,
1287    #[serde(default)]
1288    pub bid_count: usize,
1289    #[serde(default)]
1290    pub contract_count: usize,
1291    #[serde(default)]
1292    pub catalog_item_count: usize,
1293    #[serde(default)]
1294    pub scorecard_count: usize,
1295    /// Financial reporting counts.
1296    #[serde(default)]
1297    pub financial_statement_count: usize,
1298    #[serde(default)]
1299    pub bank_reconciliation_count: usize,
1300    /// HR counts.
1301    #[serde(default)]
1302    pub payroll_run_count: usize,
1303    #[serde(default)]
1304    pub time_entry_count: usize,
1305    #[serde(default)]
1306    pub expense_report_count: usize,
1307    #[serde(default)]
1308    pub benefit_enrollment_count: usize,
1309    #[serde(default)]
1310    pub pension_plan_count: usize,
1311    #[serde(default)]
1312    pub stock_grant_count: usize,
1313    /// Accounting standards counts.
1314    #[serde(default)]
1315    pub revenue_contract_count: usize,
1316    #[serde(default)]
1317    pub impairment_test_count: usize,
1318    #[serde(default)]
1319    pub business_combination_count: usize,
1320    #[serde(default)]
1321    pub ecl_model_count: usize,
1322    #[serde(default)]
1323    pub provision_count: usize,
1324    /// Manufacturing counts.
1325    #[serde(default)]
1326    pub production_order_count: usize,
1327    #[serde(default)]
1328    pub quality_inspection_count: usize,
1329    #[serde(default)]
1330    pub cycle_count_count: usize,
1331    #[serde(default)]
1332    pub bom_component_count: usize,
1333    #[serde(default)]
1334    pub inventory_movement_count: usize,
1335    /// Sales & reporting counts.
1336    #[serde(default)]
1337    pub sales_quote_count: usize,
1338    #[serde(default)]
1339    pub kpi_count: usize,
1340    #[serde(default)]
1341    pub budget_line_count: usize,
1342    /// Tax counts.
1343    #[serde(default)]
1344    pub tax_jurisdiction_count: usize,
1345    #[serde(default)]
1346    pub tax_code_count: usize,
1347    /// ESG counts.
1348    #[serde(default)]
1349    pub esg_emission_count: usize,
1350    #[serde(default)]
1351    pub esg_disclosure_count: usize,
1352    /// Intercompany counts.
1353    #[serde(default)]
1354    pub ic_matched_pair_count: usize,
1355    #[serde(default)]
1356    pub ic_elimination_count: usize,
1357    /// Number of intercompany journal entries (seller + buyer side).
1358    #[serde(default)]
1359    pub ic_transaction_count: usize,
1360    /// Number of fixed asset subledger records.
1361    #[serde(default)]
1362    pub fa_subledger_count: usize,
1363    /// Number of inventory subledger records.
1364    #[serde(default)]
1365    pub inventory_subledger_count: usize,
1366    /// Treasury debt instrument count.
1367    #[serde(default)]
1368    pub treasury_debt_instrument_count: usize,
1369    /// Treasury hedging instrument count.
1370    #[serde(default)]
1371    pub treasury_hedging_instrument_count: usize,
1372    /// Project accounting project count.
1373    #[serde(default)]
1374    pub project_count: usize,
1375    /// Project accounting change order count.
1376    #[serde(default)]
1377    pub project_change_order_count: usize,
1378    /// Tax provision count.
1379    #[serde(default)]
1380    pub tax_provision_count: usize,
1381    /// Opening balance count.
1382    #[serde(default)]
1383    pub opening_balance_count: usize,
1384    /// Subledger reconciliation count.
1385    #[serde(default)]
1386    pub subledger_reconciliation_count: usize,
1387    /// Tax line count.
1388    #[serde(default)]
1389    pub tax_line_count: usize,
1390    /// Project cost line count.
1391    #[serde(default)]
1392    pub project_cost_line_count: usize,
1393    /// Cash position count.
1394    #[serde(default)]
1395    pub cash_position_count: usize,
1396    /// Cash forecast count.
1397    #[serde(default)]
1398    pub cash_forecast_count: usize,
1399    /// Cash pool count.
1400    #[serde(default)]
1401    pub cash_pool_count: usize,
1402    /// Process evolution event count.
1403    #[serde(default)]
1404    pub process_evolution_event_count: usize,
1405    /// Organizational event count.
1406    #[serde(default)]
1407    pub organizational_event_count: usize,
1408    /// Counterfactual pair count.
1409    #[serde(default)]
1410    pub counterfactual_pair_count: usize,
1411    /// Number of fraud red-flag indicators generated.
1412    #[serde(default)]
1413    pub red_flag_count: usize,
1414    /// Number of collusion rings generated.
1415    #[serde(default)]
1416    pub collusion_ring_count: usize,
1417    /// Number of bi-temporal vendor version chains generated.
1418    #[serde(default)]
1419    pub temporal_version_chain_count: usize,
1420    /// Number of nodes in the entity relationship graph.
1421    #[serde(default)]
1422    pub entity_relationship_node_count: usize,
1423    /// Number of edges in the entity relationship graph.
1424    #[serde(default)]
1425    pub entity_relationship_edge_count: usize,
1426    /// Number of cross-process links generated.
1427    #[serde(default)]
1428    pub cross_process_link_count: usize,
1429    /// Number of disruption events generated.
1430    #[serde(default)]
1431    pub disruption_event_count: usize,
1432    /// Number of industry-specific GL accounts generated.
1433    #[serde(default)]
1434    pub industry_gl_account_count: usize,
1435    /// Number of period-close journal entries generated (tax provision + closing entries).
1436    #[serde(default)]
1437    pub period_close_je_count: usize,
1438}
1439
1440/// Enhanced orchestrator with full feature integration.
1441pub struct EnhancedOrchestrator {
1442    config: GeneratorConfig,
1443    phase_config: PhaseConfig,
1444    coa: Option<Arc<ChartOfAccounts>>,
1445    master_data: MasterDataSnapshot,
1446    seed: u64,
1447    multi_progress: Option<MultiProgress>,
1448    /// Resource guard for memory, disk, and CPU monitoring
1449    resource_guard: ResourceGuard,
1450    /// Output path for disk space monitoring
1451    output_path: Option<PathBuf>,
1452    /// Copula generators for preserving correlations (from fingerprint)
1453    copula_generators: Vec<CopulaGeneratorSpec>,
1454    /// Country pack registry for localized data generation
1455    country_pack_registry: datasynth_core::CountryPackRegistry,
1456    /// Optional streaming sink for phase-by-phase output
1457    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1458}
1459
1460impl EnhancedOrchestrator {
1461    /// Create a new enhanced orchestrator.
1462    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1463        datasynth_config::validate_config(&config)?;
1464
1465        let seed = config.global.seed.unwrap_or_else(rand::random);
1466
1467        // Build resource guard from config
1468        let resource_guard = Self::build_resource_guard(&config, None);
1469
1470        // Build country pack registry from config
1471        let country_pack_registry = match &config.country_packs {
1472            Some(cp) => {
1473                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1474                    .map_err(|e| SynthError::config(e.to_string()))?
1475            }
1476            None => datasynth_core::CountryPackRegistry::builtin_only()
1477                .map_err(|e| SynthError::config(e.to_string()))?,
1478        };
1479
1480        Ok(Self {
1481            config,
1482            phase_config,
1483            coa: None,
1484            master_data: MasterDataSnapshot::default(),
1485            seed,
1486            multi_progress: None,
1487            resource_guard,
1488            output_path: None,
1489            copula_generators: Vec::new(),
1490            country_pack_registry,
1491            phase_sink: None,
1492        })
1493    }
1494
1495    /// Create with default phase config.
1496    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1497        Self::new(config, PhaseConfig::default())
1498    }
1499
1500    /// Set a streaming phase sink for real-time output (builder pattern).
1501    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1502        self.phase_sink = Some(sink);
1503        self
1504    }
1505
1506    /// Set a streaming phase sink on an existing orchestrator.
1507    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1508        self.phase_sink = Some(sink);
1509    }
1510
1511    /// Emit a batch of items to the phase sink (if configured).
1512    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1513        if let Some(ref sink) = self.phase_sink {
1514            for item in items {
1515                if let Ok(value) = serde_json::to_value(item) {
1516                    if let Err(e) = sink.emit(phase, type_name, &value) {
1517                        warn!(
1518                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1519                        );
1520                    }
1521                }
1522            }
1523            if let Err(e) = sink.phase_complete(phase) {
1524                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1525            }
1526        }
1527    }
1528
1529    /// Enable/disable progress bars.
1530    pub fn with_progress(mut self, show: bool) -> Self {
1531        self.phase_config.show_progress = show;
1532        if show {
1533            self.multi_progress = Some(MultiProgress::new());
1534        }
1535        self
1536    }
1537
1538    /// Set the output path for disk space monitoring.
1539    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1540        let path = path.into();
1541        self.output_path = Some(path.clone());
1542        // Rebuild resource guard with the output path
1543        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1544        self
1545    }
1546
1547    /// Access the country pack registry.
1548    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1549        &self.country_pack_registry
1550    }
1551
1552    /// Look up a country pack by country code string.
1553    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1554        self.country_pack_registry.get_by_str(country)
1555    }
1556
1557    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1558    /// company, defaulting to `"US"` if no companies are configured.
1559    fn primary_country_code(&self) -> &str {
1560        self.config
1561            .companies
1562            .first()
1563            .map(|c| c.country.as_str())
1564            .unwrap_or("US")
1565    }
1566
1567    /// Resolve the country pack for the primary (first) company.
1568    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1569        self.country_pack_for(self.primary_country_code())
1570    }
1571
1572    /// Resolve the CoA framework from config/country-pack.
1573    fn resolve_coa_framework(&self) -> CoAFramework {
1574        if self.config.accounting_standards.enabled {
1575            match self.config.accounting_standards.framework {
1576                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1577                    return CoAFramework::FrenchPcg;
1578                }
1579                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1580                    return CoAFramework::GermanSkr04;
1581                }
1582                _ => {}
1583            }
1584        }
1585        // Fallback: derive from country pack
1586        let pack = self.primary_pack();
1587        match pack.accounting.framework.as_str() {
1588            "french_gaap" => CoAFramework::FrenchPcg,
1589            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1590            _ => CoAFramework::UsGaap,
1591        }
1592    }
1593
1594    /// Check if copula generators are available.
1595    ///
1596    /// Returns true if the orchestrator has copula generators for preserving
1597    /// correlations (typically from fingerprint-based generation).
1598    pub fn has_copulas(&self) -> bool {
1599        !self.copula_generators.is_empty()
1600    }
1601
1602    /// Get the copula generators.
1603    ///
1604    /// Returns a reference to the copula generators for use during generation.
1605    /// These can be used to generate correlated samples that preserve the
1606    /// statistical relationships from the source data.
1607    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1608        &self.copula_generators
1609    }
1610
1611    /// Get a mutable reference to the copula generators.
1612    ///
1613    /// Allows generators to sample from copulas during data generation.
1614    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1615        &mut self.copula_generators
1616    }
1617
1618    /// Sample correlated values from a named copula.
1619    ///
1620    /// Returns None if the copula doesn't exist.
1621    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1622        self.copula_generators
1623            .iter_mut()
1624            .find(|c| c.name == copula_name)
1625            .map(|c| c.generator.sample())
1626    }
1627
1628    /// Create an orchestrator from a fingerprint file.
1629    ///
1630    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1631    /// and creates an orchestrator configured to generate data matching
1632    /// the statistical properties of the original data.
1633    ///
1634    /// # Arguments
1635    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1636    /// * `phase_config` - Phase configuration for generation
1637    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1638    ///
1639    /// # Example
1640    /// ```no_run
1641    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1642    /// use std::path::Path;
1643    ///
1644    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1645    ///     Path::new("fingerprint.dsf"),
1646    ///     PhaseConfig::default(),
1647    ///     1.0,
1648    /// ).unwrap();
1649    /// ```
1650    pub fn from_fingerprint(
1651        fingerprint_path: &std::path::Path,
1652        phase_config: PhaseConfig,
1653        scale: f64,
1654    ) -> SynthResult<Self> {
1655        info!("Loading fingerprint from: {}", fingerprint_path.display());
1656
1657        // Read the fingerprint
1658        let reader = FingerprintReader::new();
1659        let fingerprint = reader
1660            .read_from_file(fingerprint_path)
1661            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1662
1663        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1664    }
1665
1666    /// Create an orchestrator from a loaded fingerprint.
1667    ///
1668    /// # Arguments
1669    /// * `fingerprint` - The loaded fingerprint
1670    /// * `phase_config` - Phase configuration for generation
1671    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1672    pub fn from_fingerprint_data(
1673        fingerprint: Fingerprint,
1674        phase_config: PhaseConfig,
1675        scale: f64,
1676    ) -> SynthResult<Self> {
1677        info!(
1678            "Synthesizing config from fingerprint (version: {}, tables: {})",
1679            fingerprint.manifest.version,
1680            fingerprint.schema.tables.len()
1681        );
1682
1683        // Generate a seed for the synthesis
1684        let seed: u64 = rand::random();
1685        info!("Fingerprint synthesis seed: {}", seed);
1686
1687        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1688        let options = SynthesisOptions {
1689            scale,
1690            seed: Some(seed),
1691            preserve_correlations: true,
1692            inject_anomalies: true,
1693        };
1694        let synthesizer = ConfigSynthesizer::with_options(options);
1695
1696        // Synthesize full result including copula generators
1697        let synthesis_result = synthesizer
1698            .synthesize_full(&fingerprint, seed)
1699            .map_err(|e| {
1700                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1701            })?;
1702
1703        // Start with a base config from the fingerprint's industry if available
1704        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1705            Self::base_config_for_industry(industry)
1706        } else {
1707            Self::base_config_for_industry("manufacturing")
1708        };
1709
1710        // Apply the synthesized patches
1711        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1712
1713        // Log synthesis results
1714        info!(
1715            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1716            fingerprint.schema.tables.len(),
1717            scale,
1718            synthesis_result.copula_generators.len()
1719        );
1720
1721        if !synthesis_result.copula_generators.is_empty() {
1722            for spec in &synthesis_result.copula_generators {
1723                info!(
1724                    "  Copula '{}' for table '{}': {} columns",
1725                    spec.name,
1726                    spec.table,
1727                    spec.columns.len()
1728                );
1729            }
1730        }
1731
1732        // Create the orchestrator with the synthesized config
1733        let mut orchestrator = Self::new(config, phase_config)?;
1734
1735        // Store copula generators for use during generation
1736        orchestrator.copula_generators = synthesis_result.copula_generators;
1737
1738        Ok(orchestrator)
1739    }
1740
1741    /// Create a base config for a given industry.
1742    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1743        use datasynth_config::presets::create_preset;
1744        use datasynth_config::TransactionVolume;
1745        use datasynth_core::models::{CoAComplexity, IndustrySector};
1746
1747        let sector = match industry.to_lowercase().as_str() {
1748            "manufacturing" => IndustrySector::Manufacturing,
1749            "retail" => IndustrySector::Retail,
1750            "financial" | "financial_services" => IndustrySector::FinancialServices,
1751            "healthcare" => IndustrySector::Healthcare,
1752            "technology" | "tech" => IndustrySector::Technology,
1753            _ => IndustrySector::Manufacturing,
1754        };
1755
1756        // Create a preset with reasonable defaults
1757        create_preset(
1758            sector,
1759            1,  // company count
1760            12, // period months
1761            CoAComplexity::Medium,
1762            TransactionVolume::TenK,
1763        )
1764    }
1765
1766    /// Apply a config patch to a GeneratorConfig.
1767    fn apply_config_patch(
1768        mut config: GeneratorConfig,
1769        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1770    ) -> GeneratorConfig {
1771        use datasynth_fingerprint::synthesis::ConfigValue;
1772
1773        for (key, value) in patch.values() {
1774            match (key.as_str(), value) {
1775                // Transaction count is handled via TransactionVolume enum on companies
1776                // Log it but cannot directly set it (would need to modify company volumes)
1777                ("transactions.count", ConfigValue::Integer(n)) => {
1778                    info!(
1779                        "Fingerprint suggests {} transactions (apply via company volumes)",
1780                        n
1781                    );
1782                }
1783                ("global.period_months", ConfigValue::Integer(n)) => {
1784                    config.global.period_months = (*n).clamp(1, 120) as u32;
1785                }
1786                ("global.start_date", ConfigValue::String(s)) => {
1787                    config.global.start_date = s.clone();
1788                }
1789                ("global.seed", ConfigValue::Integer(n)) => {
1790                    config.global.seed = Some(*n as u64);
1791                }
1792                ("fraud.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1799                    config.data_quality.enabled = *b;
1800                }
1801                // Handle anomaly injection paths (mapped to fraud config)
1802                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1803                    config.fraud.enabled = *b;
1804                }
1805                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1806                    config.fraud.fraud_rate = *f;
1807                }
1808                _ => {
1809                    debug!("Ignoring unknown config patch key: {}", key);
1810                }
1811            }
1812        }
1813
1814        config
1815    }
1816
1817    /// Build a resource guard from the configuration.
1818    fn build_resource_guard(
1819        config: &GeneratorConfig,
1820        output_path: Option<PathBuf>,
1821    ) -> ResourceGuard {
1822        let mut builder = ResourceGuardBuilder::new();
1823
1824        // Configure memory limit if set
1825        if config.global.memory_limit_mb > 0 {
1826            builder = builder.memory_limit(config.global.memory_limit_mb);
1827        }
1828
1829        // Configure disk monitoring for output path
1830        if let Some(path) = output_path {
1831            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1832        }
1833
1834        // Use conservative degradation settings for production safety
1835        builder = builder.conservative();
1836
1837        builder.build()
1838    }
1839
1840    /// Check resources (memory, disk, CPU) and return degradation level.
1841    ///
1842    /// Returns an error if hard limits are exceeded.
1843    /// Returns Ok(DegradationLevel) indicating current resource state.
1844    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1845        self.resource_guard.check()
1846    }
1847
1848    /// Check resources with logging.
1849    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1850        let level = self.resource_guard.check()?;
1851
1852        if level != DegradationLevel::Normal {
1853            warn!(
1854                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1855                phase,
1856                level,
1857                self.resource_guard.current_memory_mb(),
1858                self.resource_guard.available_disk_mb()
1859            );
1860        }
1861
1862        Ok(level)
1863    }
1864
1865    /// Get current degradation actions based on resource state.
1866    fn get_degradation_actions(&self) -> DegradationActions {
1867        self.resource_guard.get_actions()
1868    }
1869
1870    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1871    fn check_memory_limit(&self) -> SynthResult<()> {
1872        self.check_resources()?;
1873        Ok(())
1874    }
1875
1876    /// Run the complete generation workflow.
1877    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1878        info!("Starting enhanced generation workflow");
1879        info!(
1880            "Config: industry={:?}, period_months={}, companies={}",
1881            self.config.global.industry,
1882            self.config.global.period_months,
1883            self.config.companies.len()
1884        );
1885
1886        // Set decimal serialization mode (thread-local, affects JSON output).
1887        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1888        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1889        datasynth_core::serde_decimal::set_numeric_native(is_native);
1890        struct NumericModeGuard;
1891        impl Drop for NumericModeGuard {
1892            fn drop(&mut self) {
1893                datasynth_core::serde_decimal::set_numeric_native(false);
1894            }
1895        }
1896        let _numeric_guard = if is_native {
1897            Some(NumericModeGuard)
1898        } else {
1899            None
1900        };
1901
1902        // Initial resource check before starting
1903        let initial_level = self.check_resources_with_log("initial")?;
1904        if initial_level == DegradationLevel::Emergency {
1905            return Err(SynthError::resource(
1906                "Insufficient resources to start generation",
1907            ));
1908        }
1909
1910        let mut stats = EnhancedGenerationStatistics {
1911            companies_count: self.config.companies.len(),
1912            period_months: self.config.global.period_months,
1913            ..Default::default()
1914        };
1915
1916        // Phase 1: Chart of Accounts
1917        let coa = self.phase_chart_of_accounts(&mut stats)?;
1918
1919        // Phase 2: Master Data
1920        self.phase_master_data(&mut stats)?;
1921
1922        // Emit master data to stream sink
1923        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1924        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1925        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1926
1927        // Phase 3: Document Flows + Subledger Linking
1928        let (mut document_flows, mut subledger, fa_journal_entries) =
1929            self.phase_document_flows(&mut stats)?;
1930
1931        // Emit document flows to stream sink
1932        self.emit_phase_items(
1933            "document_flows",
1934            "PurchaseOrder",
1935            &document_flows.purchase_orders,
1936        );
1937        self.emit_phase_items(
1938            "document_flows",
1939            "GoodsReceipt",
1940            &document_flows.goods_receipts,
1941        );
1942        self.emit_phase_items(
1943            "document_flows",
1944            "VendorInvoice",
1945            &document_flows.vendor_invoices,
1946        );
1947        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1948        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1949
1950        // Phase 3b: Opening Balances (before JE generation)
1951        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1952
1953        // Phase 3c: Convert opening balances to journal entries and prepend them.
1954        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1955        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1956        // balance map type.
1957        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1958            .iter()
1959            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1960            .collect();
1961        if !opening_balance_jes.is_empty() {
1962            debug!(
1963                "Prepending {} opening balance JEs to entries",
1964                opening_balance_jes.len()
1965            );
1966        }
1967
1968        // Phase 4: Journal Entries
1969        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1970
1971        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1972        // starts from the correct initial state.
1973        if !opening_balance_jes.is_empty() {
1974            let mut combined = opening_balance_jes;
1975            combined.extend(entries);
1976            entries = combined;
1977        }
1978
1979        // Phase 4c: Append FA acquisition journal entries to main entries
1980        if !fa_journal_entries.is_empty() {
1981            debug!(
1982                "Appending {} FA acquisition JEs to main entries",
1983                fa_journal_entries.len()
1984            );
1985            entries.extend(fa_journal_entries);
1986        }
1987
1988        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1989        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1990
1991        // Get current degradation actions for optional phases
1992        let actions = self.get_degradation_actions();
1993
1994        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1995        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1996
1997        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1998        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1999        if !sourcing.contracts.is_empty() {
2000            let mut linked_count = 0usize;
2001            // Collect (vendor_id, po_id) pairs from P2P chains
2002            let po_vendor_pairs: Vec<(String, String)> = document_flows
2003                .p2p_chains
2004                .iter()
2005                .map(|chain| {
2006                    (
2007                        chain.purchase_order.vendor_id.clone(),
2008                        chain.purchase_order.header.document_id.clone(),
2009                    )
2010                })
2011                .collect();
2012
2013            for chain in &mut document_flows.p2p_chains {
2014                if chain.purchase_order.contract_id.is_none() {
2015                    if let Some(contract) = sourcing
2016                        .contracts
2017                        .iter()
2018                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2019                    {
2020                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2021                        linked_count += 1;
2022                    }
2023                }
2024            }
2025
2026            // Populate reverse FK: purchase_order_ids on each contract
2027            for contract in &mut sourcing.contracts {
2028                let po_ids: Vec<String> = po_vendor_pairs
2029                    .iter()
2030                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2031                    .map(|(_, po_id)| po_id.clone())
2032                    .collect();
2033                if !po_ids.is_empty() {
2034                    contract.purchase_order_ids = po_ids;
2035                }
2036            }
2037
2038            if linked_count > 0 {
2039                debug!(
2040                    "Linked {} purchase orders to S2C contracts by vendor match",
2041                    linked_count
2042                );
2043            }
2044        }
2045
2046        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2047        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2048
2049        // Phase 5c: Append IC journal entries to main entries
2050        if !intercompany.seller_journal_entries.is_empty()
2051            || !intercompany.buyer_journal_entries.is_empty()
2052        {
2053            let ic_je_count = intercompany.seller_journal_entries.len()
2054                + intercompany.buyer_journal_entries.len();
2055            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2056            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2057            debug!(
2058                "Appended {} IC journal entries to main entries",
2059                ic_je_count
2060            );
2061        }
2062
2063        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2064        if !intercompany.elimination_entries.is_empty() {
2065            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2066                &intercompany.elimination_entries,
2067            );
2068            if !elim_jes.is_empty() {
2069                debug!(
2070                    "Appended {} elimination journal entries to main entries",
2071                    elim_jes.len()
2072                );
2073                // IC elimination net-zero assertion (v2.5 hardening)
2074                let elim_debit: rust_decimal::Decimal =
2075                    elim_jes.iter().map(|je| je.total_debit()).sum();
2076                let elim_credit: rust_decimal::Decimal =
2077                    elim_jes.iter().map(|je| je.total_credit()).sum();
2078                let elim_diff = (elim_debit - elim_credit).abs();
2079                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2080                if elim_diff > tolerance {
2081                    return Err(datasynth_core::error::SynthError::generation(format!(
2082                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2083                        elim_debit, elim_credit, elim_diff, tolerance
2084                    )));
2085                }
2086                debug!(
2087                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2088                    elim_debit, elim_credit, elim_diff
2089                );
2090                entries.extend(elim_jes);
2091            }
2092        }
2093
2094        // Phase 5e: Wire IC source documents into document flow snapshot
2095        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2096            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2097                document_flows
2098                    .customer_invoices
2099                    .extend(ic_docs.seller_invoices.iter().cloned());
2100                document_flows
2101                    .purchase_orders
2102                    .extend(ic_docs.buyer_orders.iter().cloned());
2103                document_flows
2104                    .goods_receipts
2105                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2106                document_flows
2107                    .vendor_invoices
2108                    .extend(ic_docs.buyer_invoices.iter().cloned());
2109                debug!(
2110                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2111                    ic_docs.seller_invoices.len(),
2112                    ic_docs.buyer_orders.len(),
2113                    ic_docs.buyer_goods_receipts.len(),
2114                    ic_docs.buyer_invoices.len(),
2115                );
2116            }
2117        }
2118
2119        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2120        let hr = self.phase_hr_data(&mut stats)?;
2121
2122        // Phase 6b: Generate JEs from payroll runs
2123        if !hr.payroll_runs.is_empty() {
2124            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2125            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2126            entries.extend(payroll_jes);
2127        }
2128
2129        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2130        if !hr.pension_journal_entries.is_empty() {
2131            debug!(
2132                "Generated {} JEs from pension plans",
2133                hr.pension_journal_entries.len()
2134            );
2135            entries.extend(hr.pension_journal_entries.iter().cloned());
2136        }
2137
2138        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2139        if !hr.stock_comp_journal_entries.is_empty() {
2140            debug!(
2141                "Generated {} JEs from stock-based compensation",
2142                hr.stock_comp_journal_entries.len()
2143            );
2144            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2145        }
2146
2147        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2148        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2149
2150        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2151        if !manufacturing_snap.production_orders.is_empty() {
2152            let currency = self
2153                .config
2154                .companies
2155                .first()
2156                .map(|c| c.currency.as_str())
2157                .unwrap_or("USD");
2158            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2159                &manufacturing_snap.production_orders,
2160                &manufacturing_snap.quality_inspections,
2161                currency,
2162            );
2163            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2164            entries.extend(mfg_jes);
2165        }
2166
2167        // Phase 7a-warranty: Generate warranty provisions per company
2168        if !manufacturing_snap.quality_inspections.is_empty() {
2169            let framework = match self.config.accounting_standards.framework {
2170                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2171                _ => "US_GAAP",
2172            };
2173            for company in &self.config.companies {
2174                let company_orders: Vec<_> = manufacturing_snap
2175                    .production_orders
2176                    .iter()
2177                    .filter(|o| o.company_code == company.code)
2178                    .cloned()
2179                    .collect();
2180                let company_inspections: Vec<_> = manufacturing_snap
2181                    .quality_inspections
2182                    .iter()
2183                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2184                    .cloned()
2185                    .collect();
2186                if company_inspections.is_empty() {
2187                    continue;
2188                }
2189                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2190                let warranty_result = warranty_gen.generate(
2191                    &company.code,
2192                    &company_orders,
2193                    &company_inspections,
2194                    &company.currency,
2195                    framework,
2196                );
2197                if !warranty_result.journal_entries.is_empty() {
2198                    debug!(
2199                        "Generated {} warranty provision JEs for {}",
2200                        warranty_result.journal_entries.len(),
2201                        company.code
2202                    );
2203                    entries.extend(warranty_result.journal_entries);
2204                }
2205            }
2206        }
2207
2208        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2209        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2210        {
2211            let cogs_currency = self
2212                .config
2213                .companies
2214                .first()
2215                .map(|c| c.currency.as_str())
2216                .unwrap_or("USD");
2217            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2218                &document_flows.deliveries,
2219                &manufacturing_snap.production_orders,
2220                cogs_currency,
2221            );
2222            if !cogs_jes.is_empty() {
2223                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2224                entries.extend(cogs_jes);
2225            }
2226        }
2227
2228        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2229        //
2230        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2231        // subledger inventory positions.  Here we reconcile them so that position balances
2232        // reflect the actual stock movements within the generation period.
2233        if !manufacturing_snap.inventory_movements.is_empty()
2234            && !subledger.inventory_positions.is_empty()
2235        {
2236            use datasynth_core::models::MovementType as MfgMovementType;
2237            let mut receipt_count = 0usize;
2238            let mut issue_count = 0usize;
2239            for movement in &manufacturing_snap.inventory_movements {
2240                // Find a matching position by material code and company
2241                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2242                    p.material_id == movement.material_code
2243                        && p.company_code == movement.entity_code
2244                }) {
2245                    match movement.movement_type {
2246                        MfgMovementType::GoodsReceipt => {
2247                            // Increase stock and update weighted-average cost
2248                            pos.add_quantity(
2249                                movement.quantity,
2250                                movement.value,
2251                                movement.movement_date,
2252                            );
2253                            receipt_count += 1;
2254                        }
2255                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2256                            // Decrease stock (best-effort; silently skip if insufficient)
2257                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2258                            issue_count += 1;
2259                        }
2260                        _ => {}
2261                    }
2262                }
2263            }
2264            debug!(
2265                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2266                manufacturing_snap.inventory_movements.len(),
2267                receipt_count,
2268                issue_count,
2269            );
2270        }
2271
2272        // Update final entry/line-item stats after all JE-generating phases
2273        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2274        if !entries.is_empty() {
2275            stats.total_entries = entries.len() as u64;
2276            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2277            debug!(
2278                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2279                stats.total_entries, stats.total_line_items
2280            );
2281        }
2282
2283        // Phase 7b: Apply internal controls to journal entries
2284        if self.config.internal_controls.enabled && !entries.is_empty() {
2285            info!("Phase 7b: Applying internal controls to journal entries");
2286            let control_config = ControlGeneratorConfig {
2287                exception_rate: self.config.internal_controls.exception_rate,
2288                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2289                enable_sox_marking: true,
2290                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2291                    self.config.internal_controls.sox_materiality_threshold,
2292                )
2293                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2294                ..Default::default()
2295            };
2296            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2297            for entry in &mut entries {
2298                control_gen.apply_controls(entry, &coa);
2299            }
2300            let with_controls = entries
2301                .iter()
2302                .filter(|e| !e.header.control_ids.is_empty())
2303                .count();
2304            info!(
2305                "Applied controls to {} entries ({} with control IDs assigned)",
2306                entries.len(),
2307                with_controls
2308            );
2309        }
2310
2311        // Phase 7c: Extract SoD violations from annotated journal entries.
2312        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2313        // Here we materialise those flags into standalone SodViolation records.
2314        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2315            .iter()
2316            .filter(|e| e.header.sod_violation)
2317            .filter_map(|e| {
2318                e.header.sod_conflict_type.map(|ct| {
2319                    use datasynth_core::models::{RiskLevel, SodViolation};
2320                    let severity = match ct {
2321                        datasynth_core::models::SodConflictType::PaymentReleaser
2322                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2323                            RiskLevel::Critical
2324                        }
2325                        datasynth_core::models::SodConflictType::PreparerApprover
2326                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2327                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2328                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2329                            RiskLevel::High
2330                        }
2331                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2332                            RiskLevel::Medium
2333                        }
2334                    };
2335                    let action = format!(
2336                        "SoD conflict {:?} on entry {} ({})",
2337                        ct, e.header.document_id, e.header.company_code
2338                    );
2339                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2340                })
2341            })
2342            .collect();
2343        if !sod_violations.is_empty() {
2344            info!(
2345                "Phase 7c: Extracted {} SoD violations from {} entries",
2346                sod_violations.len(),
2347                entries.len()
2348            );
2349        }
2350
2351        // Emit journal entries to stream sink (after all JE-generating phases)
2352        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2353
2354        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2355        //
2356        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2357        // document-level fraud are exempt from subsequent line-level flag
2358        // overwrites, and so downstream consumers see a coherent picture.
2359        //
2360        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2361        {
2362            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2363            if self.config.fraud.enabled && doc_rate > 0.0 {
2364                use datasynth_core::fraud_propagation::{
2365                    inject_document_fraud, propagate_documents_to_entries,
2366                };
2367                use datasynth_core::utils::weighted_select;
2368                use datasynth_core::FraudType;
2369                use rand_chacha::rand_core::SeedableRng;
2370
2371                let dist = &self.config.fraud.fraud_type_distribution;
2372                let fraud_type_weights: [(FraudType, f64); 8] = [
2373                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2374                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2375                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2376                    (
2377                        FraudType::ImproperCapitalization,
2378                        dist.expense_capitalization,
2379                    ),
2380                    (FraudType::SplitTransaction, dist.split_transaction),
2381                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2382                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2383                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2384                ];
2385                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2386                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2387                    if weights_sum <= 0.0 {
2388                        FraudType::FictitiousEntry
2389                    } else {
2390                        *weighted_select(rng, &fraud_type_weights)
2391                    }
2392                };
2393
2394                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2395                let mut doc_tagged = 0usize;
2396                macro_rules! inject_into {
2397                    ($collection:expr) => {{
2398                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2399                            $collection.iter_mut().map(|d| &mut d.header).collect();
2400                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2401                    }};
2402                }
2403                inject_into!(document_flows.purchase_orders);
2404                inject_into!(document_flows.goods_receipts);
2405                inject_into!(document_flows.vendor_invoices);
2406                inject_into!(document_flows.payments);
2407                inject_into!(document_flows.sales_orders);
2408                inject_into!(document_flows.deliveries);
2409                inject_into!(document_flows.customer_invoices);
2410                if doc_tagged > 0 {
2411                    info!(
2412                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2413                    );
2414                }
2415
2416                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2417                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2418                        Vec::new();
2419                    headers.extend(
2420                        document_flows
2421                            .purchase_orders
2422                            .iter()
2423                            .map(|d| d.header.clone()),
2424                    );
2425                    headers.extend(
2426                        document_flows
2427                            .goods_receipts
2428                            .iter()
2429                            .map(|d| d.header.clone()),
2430                    );
2431                    headers.extend(
2432                        document_flows
2433                            .vendor_invoices
2434                            .iter()
2435                            .map(|d| d.header.clone()),
2436                    );
2437                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2438                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2439                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2440                    headers.extend(
2441                        document_flows
2442                            .customer_invoices
2443                            .iter()
2444                            .map(|d| d.header.clone()),
2445                    );
2446                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2447                    if propagated > 0 {
2448                        info!(
2449                            "Propagated document-level fraud to {propagated} derived journal entries"
2450                        );
2451                    }
2452                }
2453            }
2454        }
2455
2456        // Phase 8: Anomaly Injection (after all JE-generating phases)
2457        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2458
2459        // Emit anomaly labels to stream sink
2460        self.emit_phase_items(
2461            "anomaly_injection",
2462            "LabeledAnomaly",
2463            &anomaly_labels.labels,
2464        );
2465
2466        // Propagate fraud labels from journal entries to source documents.
2467        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2468        // instead of tracing through document_references.json.
2469        //
2470        // Gated by `fraud.propagate_to_document` (default true) — disable when
2471        // downstream consumers want document fraud flags to reflect only
2472        // document-level injection, not line-level.
2473        if self.config.fraud.propagate_to_document {
2474            use std::collections::HashMap;
2475            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2476            //
2477            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2478            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2479            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2480            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2481            // we register BOTH the prefixed form (raw reference) AND the bare form
2482            // (post-colon portion) in the map. Also register the JE's document_id
2483            // UUID so documents that set `journal_entry_id` match via that path.
2484            //
2485            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2486            // looked up "foo", silently producing 0 propagations.
2487            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2488            for je in &entries {
2489                if je.header.is_fraud {
2490                    if let Some(ref fraud_type) = je.header.fraud_type {
2491                        if let Some(ref reference) = je.header.reference {
2492                            // Register the full reference ("GR:PO-2024-000001")
2493                            fraud_map.insert(reference.clone(), *fraud_type);
2494                            // Also register the bare document ID ("PO-2024-000001")
2495                            // by stripping the "PREFIX:" if present.
2496                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2497                                if !bare.is_empty() {
2498                                    fraud_map.insert(bare.to_string(), *fraud_type);
2499                                }
2500                            }
2501                        }
2502                        // Also tag via journal_entry_id on document headers
2503                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2504                    }
2505                }
2506            }
2507            if !fraud_map.is_empty() {
2508                let mut propagated = 0usize;
2509                // Use DocumentHeader::propagate_fraud method for each doc type
2510                macro_rules! propagate_to {
2511                    ($collection:expr) => {
2512                        for doc in &mut $collection {
2513                            if doc.header.propagate_fraud(&fraud_map) {
2514                                propagated += 1;
2515                            }
2516                        }
2517                    };
2518                }
2519                propagate_to!(document_flows.purchase_orders);
2520                propagate_to!(document_flows.goods_receipts);
2521                propagate_to!(document_flows.vendor_invoices);
2522                propagate_to!(document_flows.payments);
2523                propagate_to!(document_flows.sales_orders);
2524                propagate_to!(document_flows.deliveries);
2525                propagate_to!(document_flows.customer_invoices);
2526                if propagated > 0 {
2527                    info!(
2528                        "Propagated fraud labels to {} document flow records",
2529                        propagated
2530                    );
2531                }
2532            }
2533        }
2534
2535        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2536        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2537
2538        // Emit red flags to stream sink
2539        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2540
2541        // Phase 26b: Collusion Ring Generation (after red flags)
2542        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2543
2544        // Emit collusion rings to stream sink
2545        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2546
2547        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2548        let balance_validation = self.phase_balance_validation(&entries)?;
2549
2550        // Phase 9b: GL-to-Subledger Reconciliation
2551        let subledger_reconciliation =
2552            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2553
2554        // Phase 10: Data Quality Injection
2555        let (data_quality_stats, quality_issues) =
2556            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2557
2558        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2559        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2560
2561        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2562        {
2563            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2564
2565            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2566            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2567            let mut unbalanced_clean = 0usize;
2568            for je in &entries {
2569                if je.header.is_fraud || je.header.is_anomaly {
2570                    continue;
2571                }
2572                let diff = (je.total_debit() - je.total_credit()).abs();
2573                if diff > tolerance {
2574                    unbalanced_clean += 1;
2575                    if unbalanced_clean <= 3 {
2576                        warn!(
2577                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2578                            je.header.document_id,
2579                            je.total_debit(),
2580                            je.total_credit(),
2581                            diff
2582                        );
2583                    }
2584                }
2585            }
2586            if unbalanced_clean > 0 {
2587                return Err(datasynth_core::error::SynthError::generation(format!(
2588                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2589                     First few logged above. Tolerance={}",
2590                    unbalanced_clean, tolerance
2591                )));
2592            }
2593            debug!(
2594                "Phase 10c: All {} non-anomaly JEs individually balanced",
2595                entries
2596                    .iter()
2597                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2598                    .count()
2599            );
2600
2601            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2602            let company_codes: Vec<String> = self
2603                .config
2604                .companies
2605                .iter()
2606                .map(|c| c.code.clone())
2607                .collect();
2608            for company_code in &company_codes {
2609                let mut assets = rust_decimal::Decimal::ZERO;
2610                let mut liab_equity = rust_decimal::Decimal::ZERO;
2611
2612                for entry in &entries {
2613                    if entry.header.company_code != *company_code {
2614                        continue;
2615                    }
2616                    for line in &entry.lines {
2617                        let acct = &line.gl_account;
2618                        let net = line.debit_amount - line.credit_amount;
2619                        // Asset accounts (1xxx): normal debit balance
2620                        if acct.starts_with('1') {
2621                            assets += net;
2622                        }
2623                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2624                        else if acct.starts_with('2') || acct.starts_with('3') {
2625                            liab_equity -= net; // credit-normal, so negate debit-net
2626                        }
2627                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2628                        // so they net to zero after closing entries
2629                    }
2630                }
2631
2632                let bs_diff = (assets - liab_equity).abs();
2633                if bs_diff > tolerance {
2634                    warn!(
2635                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2636                         revenue/expense closing entries may not fully offset",
2637                        company_code, assets, liab_equity, bs_diff
2638                    );
2639                    // Warn rather than error: multi-period datasets may have timing
2640                    // differences from accruals/deferrals that resolve in later periods.
2641                    // The TB footing check (Assert 1) is the hard gate.
2642                } else {
2643                    debug!(
2644                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2645                        company_code, assets, liab_equity, bs_diff
2646                    );
2647                }
2648            }
2649
2650            info!("Phase 10c: All generation-time accounting assertions passed");
2651        }
2652
2653        // Phase 11: Audit Data
2654        let audit = self.phase_audit_data(&entries, &mut stats)?;
2655
2656        // Phase 12: Banking KYC/AML Data
2657        let mut banking = self.phase_banking_data(&mut stats)?;
2658
2659        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2660        // Creates coherence between the accounting layer (payments, JEs) and the
2661        // banking layer (bank transactions). A vendor invoice payment now appears
2662        // on both sides with cross-references and fraud labels propagated.
2663        if self.phase_config.generate_banking
2664            && !document_flows.payments.is_empty()
2665            && !banking.accounts.is_empty()
2666        {
2667            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2668            if bridge_rate > 0.0 {
2669                let mut bridge =
2670                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2671                        self.seed,
2672                    );
2673                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2674                    &document_flows.payments,
2675                    &banking.customers,
2676                    &banking.accounts,
2677                    bridge_rate,
2678                );
2679                info!(
2680                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2681                    bridge_stats.bridged_count,
2682                    bridge_stats.transactions_emitted,
2683                    bridge_stats.fraud_propagated,
2684                );
2685                let bridged_count = bridged_txns.len();
2686                banking.transactions.extend(bridged_txns);
2687
2688                // Re-run velocity computation so bridged txns also get features
2689                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2690                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2691                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2692                        &mut banking.transactions,
2693                    );
2694                }
2695
2696                // Recompute suspicious count after bridging
2697                banking.suspicious_count = banking
2698                    .transactions
2699                    .iter()
2700                    .filter(|t| t.is_suspicious)
2701                    .count();
2702                stats.banking_transaction_count = banking.transactions.len();
2703                stats.banking_suspicious_count = banking.suspicious_count;
2704            }
2705        }
2706
2707        // Phase 13: Graph Export
2708        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2709
2710        // Phase 14: LLM Enrichment
2711        self.phase_llm_enrichment(&mut stats);
2712
2713        // Phase 15: Diffusion Enhancement
2714        self.phase_diffusion_enhancement(&mut stats);
2715
2716        // Phase 16: Causal Overlay
2717        self.phase_causal_overlay(&mut stats);
2718
2719        // Phase 17: Bank Reconciliation + Financial Statements
2720        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2721        // provision data (from accounting_standards / tax snapshots) can be wired in.
2722        let mut financial_reporting = self.phase_financial_reporting(
2723            &document_flows,
2724            &entries,
2725            &coa,
2726            &hr,
2727            &audit,
2728            &mut stats,
2729        )?;
2730
2731        // BS coherence check: assets = liabilities + equity
2732        {
2733            use datasynth_core::models::StatementType;
2734            for stmt in &financial_reporting.consolidated_statements {
2735                if stmt.statement_type == StatementType::BalanceSheet {
2736                    let total_assets: rust_decimal::Decimal = stmt
2737                        .line_items
2738                        .iter()
2739                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2740                        .map(|li| li.amount)
2741                        .sum();
2742                    let total_le: rust_decimal::Decimal = stmt
2743                        .line_items
2744                        .iter()
2745                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2746                        .map(|li| li.amount)
2747                        .sum();
2748                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2749                        warn!(
2750                            "BS equation imbalance: assets={}, L+E={}",
2751                            total_assets, total_le
2752                        );
2753                    }
2754                }
2755            }
2756        }
2757
2758        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2759        let accounting_standards =
2760            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2761
2762        // Phase 18a: Merge ECL journal entries into main GL
2763        if !accounting_standards.ecl_journal_entries.is_empty() {
2764            debug!(
2765                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2766                accounting_standards.ecl_journal_entries.len()
2767            );
2768            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2769        }
2770
2771        // Phase 18a: Merge provision journal entries into main GL
2772        if !accounting_standards.provision_journal_entries.is_empty() {
2773            debug!(
2774                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2775                accounting_standards.provision_journal_entries.len()
2776            );
2777            entries.extend(
2778                accounting_standards
2779                    .provision_journal_entries
2780                    .iter()
2781                    .cloned(),
2782            );
2783        }
2784
2785        // Phase 18b: OCPM Events (after all process data is available)
2786        let mut ocpm = self.phase_ocpm_events(
2787            &document_flows,
2788            &sourcing,
2789            &hr,
2790            &manufacturing_snap,
2791            &banking,
2792            &audit,
2793            &financial_reporting,
2794            &mut stats,
2795        )?;
2796
2797        // Emit OCPM events to stream sink
2798        if let Some(ref event_log) = ocpm.event_log {
2799            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2800        }
2801
2802        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
2803        if let Some(ref event_log) = ocpm.event_log {
2804            // Build reverse index: document_ref → (event_id, case_id, object_ids)
2805            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2806                std::collections::HashMap::new();
2807            for (idx, event) in event_log.events.iter().enumerate() {
2808                if let Some(ref doc_ref) = event.document_ref {
2809                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2810                }
2811            }
2812
2813            if !doc_index.is_empty() {
2814                let mut annotated = 0usize;
2815                for entry in &mut entries {
2816                    let doc_id_str = entry.header.document_id.to_string();
2817                    // Collect matching event indices from document_id and reference
2818                    let mut matched_indices: Vec<usize> = Vec::new();
2819                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2820                        matched_indices.extend(indices);
2821                    }
2822                    if let Some(ref reference) = entry.header.reference {
2823                        let bare_ref = reference
2824                            .find(':')
2825                            .map(|i| &reference[i + 1..])
2826                            .unwrap_or(reference.as_str());
2827                        if let Some(indices) = doc_index.get(bare_ref) {
2828                            for &idx in indices {
2829                                if !matched_indices.contains(&idx) {
2830                                    matched_indices.push(idx);
2831                                }
2832                            }
2833                        }
2834                    }
2835                    // Apply matches to JE header
2836                    if !matched_indices.is_empty() {
2837                        for &idx in &matched_indices {
2838                            let event = &event_log.events[idx];
2839                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2840                                entry.header.ocpm_event_ids.push(event.event_id);
2841                            }
2842                            for obj_ref in &event.object_refs {
2843                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2844                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
2845                                }
2846                            }
2847                            if entry.header.ocpm_case_id.is_none() {
2848                                entry.header.ocpm_case_id = event.case_id;
2849                            }
2850                        }
2851                        annotated += 1;
2852                    }
2853                }
2854                debug!(
2855                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2856                    annotated
2857                );
2858            }
2859        }
2860
2861        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
2862        // IC eliminations, opening balances, standards-driven entries) so
2863        // every JournalEntry carries at least one `ocpm_event_ids` link.
2864        if let Some(ref mut event_log) = ocpm.event_log {
2865            let synthesized =
2866                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
2867            if synthesized > 0 {
2868                info!(
2869                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
2870                );
2871            }
2872
2873            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
2874            // events and their owning CaseTrace. Without this, every exported
2875            // OCEL event has `is_anomaly = false` even when the underlying JE
2876            // was flagged.
2877            let anomaly_events =
2878                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
2879            if anomaly_events > 0 {
2880                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
2881            }
2882
2883            // Phase 18f: Inject process-variant imperfections (rework, skipped
2884            // steps, out-of-order events) so conformance checkers see
2885            // realistic variant counts and fitness < 1.0. Uses the P2P
2886            // process rates as the single source of truth.
2887            let p2p_cfg = &self.config.ocpm.p2p_process;
2888            let any_imperfection = p2p_cfg.rework_probability > 0.0
2889                || p2p_cfg.skip_step_probability > 0.0
2890                || p2p_cfg.out_of_order_probability > 0.0;
2891            if any_imperfection {
2892                use rand_chacha::rand_core::SeedableRng;
2893                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
2894                    rework_rate: p2p_cfg.rework_probability,
2895                    skip_rate: p2p_cfg.skip_step_probability,
2896                    out_of_order_rate: p2p_cfg.out_of_order_probability,
2897                };
2898                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
2899                let stats =
2900                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
2901                if stats.rework + stats.skipped + stats.out_of_order > 0 {
2902                    info!(
2903                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
2904                        stats.rework, stats.skipped, stats.out_of_order
2905                    );
2906                }
2907            }
2908        }
2909
2910        // Phase 19: Sales Quotes, Management KPIs, Budgets
2911        let sales_kpi_budgets =
2912            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2913
2914        // Phase 22: Treasury Data Generation
2915        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2916        // are included in the pre-tax income used by phase_tax_generation.
2917        let treasury =
2918            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2919
2920        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2921        if !treasury.journal_entries.is_empty() {
2922            debug!(
2923                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2924                treasury.journal_entries.len()
2925            );
2926            entries.extend(treasury.journal_entries.iter().cloned());
2927        }
2928
2929        // Phase 20: Tax Generation
2930        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2931
2932        // Phase 20 JEs: Merge tax posting journal entries into main GL
2933        if !tax.tax_posting_journal_entries.is_empty() {
2934            debug!(
2935                "Merging {} tax posting JEs into GL",
2936                tax.tax_posting_journal_entries.len()
2937            );
2938            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2939        }
2940
2941        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2942        // Build supplementary cash flow items from upstream JE data (depreciation,
2943        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2944        {
2945            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2946
2947            let framework_str = {
2948                use datasynth_config::schema::AccountingFrameworkConfig;
2949                match self
2950                    .config
2951                    .accounting_standards
2952                    .framework
2953                    .unwrap_or_default()
2954                {
2955                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2956                        "IFRS"
2957                    }
2958                    _ => "US_GAAP",
2959                }
2960            };
2961
2962            // Sum depreciation debits (account 6000) from close JEs
2963            let depreciation_total: rust_decimal::Decimal = entries
2964                .iter()
2965                .filter(|je| je.header.document_type == "CL")
2966                .flat_map(|je| je.lines.iter())
2967                .filter(|l| l.gl_account.starts_with("6000"))
2968                .map(|l| l.debit_amount)
2969                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2970
2971            // Sum interest expense debits (account 7100)
2972            let interest_paid: rust_decimal::Decimal = entries
2973                .iter()
2974                .flat_map(|je| je.lines.iter())
2975                .filter(|l| l.gl_account.starts_with("7100"))
2976                .map(|l| l.debit_amount)
2977                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2978
2979            // Sum tax expense debits (account 8000)
2980            let tax_paid: rust_decimal::Decimal = entries
2981                .iter()
2982                .flat_map(|je| je.lines.iter())
2983                .filter(|l| l.gl_account.starts_with("8000"))
2984                .map(|l| l.debit_amount)
2985                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2986
2987            // Sum capex debits on fixed assets (account 1500)
2988            let capex: rust_decimal::Decimal = entries
2989                .iter()
2990                .flat_map(|je| je.lines.iter())
2991                .filter(|l| l.gl_account.starts_with("1500"))
2992                .map(|l| l.debit_amount)
2993                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2994
2995            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2996            let dividends_paid: rust_decimal::Decimal = entries
2997                .iter()
2998                .flat_map(|je| je.lines.iter())
2999                .filter(|l| l.gl_account == "2170")
3000                .map(|l| l.debit_amount)
3001                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3002
3003            let cf_data = CashFlowSourceData {
3004                depreciation_total,
3005                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3006                delta_ar: rust_decimal::Decimal::ZERO,
3007                delta_ap: rust_decimal::Decimal::ZERO,
3008                delta_inventory: rust_decimal::Decimal::ZERO,
3009                capex,
3010                debt_issuance: rust_decimal::Decimal::ZERO,
3011                debt_repayment: rust_decimal::Decimal::ZERO,
3012                interest_paid,
3013                tax_paid,
3014                dividends_paid,
3015                framework: framework_str.to_string(),
3016            };
3017
3018            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3019            if !enhanced_cf_items.is_empty() {
3020                // Merge into ALL cash flow statements (standalone + consolidated)
3021                use datasynth_core::models::StatementType;
3022                let merge_count = enhanced_cf_items.len();
3023                for stmt in financial_reporting
3024                    .financial_statements
3025                    .iter_mut()
3026                    .chain(financial_reporting.consolidated_statements.iter_mut())
3027                    .chain(
3028                        financial_reporting
3029                            .standalone_statements
3030                            .values_mut()
3031                            .flat_map(|v| v.iter_mut()),
3032                    )
3033                {
3034                    if stmt.statement_type == StatementType::CashFlowStatement {
3035                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3036                    }
3037                }
3038                info!(
3039                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3040                    merge_count
3041                );
3042            }
3043        }
3044
3045        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3046        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3047        self.generate_notes_to_financial_statements(
3048            &mut financial_reporting,
3049            &accounting_standards,
3050            &tax,
3051            &hr,
3052            &audit,
3053            &treasury,
3054        );
3055
3056        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3057        // When we have 2+ companies, derive segment data from actual journal entries
3058        // to complement or replace the FS-generator-based segments.
3059        if self.config.companies.len() >= 2 && !entries.is_empty() {
3060            let companies: Vec<(String, String)> = self
3061                .config
3062                .companies
3063                .iter()
3064                .map(|c| (c.code.clone(), c.name.clone()))
3065                .collect();
3066            let ic_elim: rust_decimal::Decimal =
3067                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3068            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3069                .unwrap_or(NaiveDate::MIN);
3070            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3071            let period_label = format!(
3072                "{}-{:02}",
3073                end_date.year(),
3074                (end_date - chrono::Days::new(1)).month()
3075            );
3076
3077            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3078            let (je_segments, je_recon) =
3079                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3080            if !je_segments.is_empty() {
3081                info!(
3082                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3083                    je_segments.len(),
3084                    ic_elim,
3085                );
3086                // Replace if existing segment_reports were empty; otherwise supplement
3087                if financial_reporting.segment_reports.is_empty() {
3088                    financial_reporting.segment_reports = je_segments;
3089                    financial_reporting.segment_reconciliations = vec![je_recon];
3090                } else {
3091                    financial_reporting.segment_reports.extend(je_segments);
3092                    financial_reporting.segment_reconciliations.push(je_recon);
3093                }
3094            }
3095        }
3096
3097        // Phase 21: ESG Data Generation
3098        let esg_snap =
3099            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3100
3101        // Phase 23: Project Accounting Data Generation
3102        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3103
3104        // Phase 24: Process Evolution + Organizational Events
3105        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3106
3107        // Phase 24b: Disruption Events
3108        let disruption_events = self.phase_disruption_events(&mut stats)?;
3109
3110        // Phase 27: Bi-Temporal Vendor Version Chains
3111        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3112
3113        // Phase 28: Entity Relationship Graph + Cross-Process Links
3114        let (entity_relationship_graph, cross_process_links) =
3115            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3116
3117        // Phase 29: Industry-specific GL accounts
3118        let industry_output = self.phase_industry_data(&mut stats);
3119
3120        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3121        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3122
3123        // Phase: Neural enhancement (optional — requires neural feature + config)
3124        if self.config.diffusion.enabled
3125            && (self.config.diffusion.backend == "neural"
3126                || self.config.diffusion.backend == "hybrid")
3127        {
3128            let neural = &self.config.diffusion.neural;
3129            // Validate hybrid_strategy early so an unknown string doesn't
3130            // silently fall through to weighted_average semantics.
3131            const VALID_STRATEGIES: &[&str] = &["weighted_average", "column_select", "threshold"];
3132            if !VALID_STRATEGIES.contains(&neural.hybrid_strategy.as_str()) {
3133                warn!(
3134                    "Unknown diffusion.neural.hybrid_strategy='{}' — expected one of {:?}; \
3135                     falling back to 'weighted_average'.",
3136                    neural.hybrid_strategy, VALID_STRATEGIES
3137                );
3138            }
3139            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3140            if (weight - neural.hybrid_weight).abs() > f64::EPSILON {
3141                warn!(
3142                    "diffusion.neural.hybrid_weight={} clamped to [0,1] → {}",
3143                    neural.hybrid_weight, weight
3144                );
3145            }
3146            info!(
3147                "Phase neural enhancement: backend={} strategy={} weight={:.2} columns={} \
3148                 (neural_columns: {:?})",
3149                self.config.diffusion.backend,
3150                neural.hybrid_strategy,
3151                weight,
3152                neural.neural_columns.len(),
3153                neural.neural_columns,
3154            );
3155            stats.neural_hybrid_weight = Some(weight);
3156            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3157            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3158            // Neural enhancement integrates via the DiffusionBackend trait:
3159            // 1. NeuralDiffusionTrainer::train() on generated amounts
3160            // 2. HybridGenerator blends rule-based + neural at configured weight
3161            // 3. TabularTransformer for conditional column prediction
3162            // 4. GnnGraphTrainer for entity relationship structure
3163            // Actual training requires the `neural` cargo feature on datasynth-core.
3164            // The orchestrator delegates to the diffusion module which is feature-gated.
3165        }
3166
3167        // Phase 19b: Hypergraph Export (after all data is available)
3168        self.phase_hypergraph_export(
3169            &coa,
3170            &entries,
3171            &document_flows,
3172            &sourcing,
3173            &hr,
3174            &manufacturing_snap,
3175            &banking,
3176            &audit,
3177            &financial_reporting,
3178            &ocpm,
3179            &compliance_regulations,
3180            &mut stats,
3181        )?;
3182
3183        // Phase 10c: Additional graph builders (approval, entity, banking)
3184        // These run after all data is available since they need banking/IC data.
3185        if self.phase_config.generate_graph_export {
3186            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3187        }
3188
3189        // Log informational messages for config sections not yet fully wired
3190        if self.config.streaming.enabled {
3191            info!("Note: streaming config is enabled but batch mode does not use it");
3192        }
3193        if self.config.vendor_network.enabled {
3194            debug!("Vendor network config available; relationship graph generation is partial");
3195        }
3196        if self.config.customer_segmentation.enabled {
3197            debug!("Customer segmentation config available; segment-aware generation is partial");
3198        }
3199
3200        // Log final resource statistics
3201        let resource_stats = self.resource_guard.stats();
3202        info!(
3203            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3204            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3205            resource_stats.disk.estimated_bytes_written,
3206            resource_stats.degradation_level
3207        );
3208
3209        // Flush any remaining stream sink data
3210        if let Some(ref sink) = self.phase_sink {
3211            if let Err(e) = sink.flush() {
3212                warn!("Stream sink flush failed: {e}");
3213            }
3214        }
3215
3216        // Build data lineage graph
3217        let lineage = self.build_lineage_graph();
3218
3219        // Evaluate quality gates if enabled in config
3220        let gate_result = if self.config.quality_gates.enabled {
3221            let profile_name = &self.config.quality_gates.profile;
3222            match datasynth_eval::gates::get_profile(profile_name) {
3223                Some(profile) => {
3224                    // Build an evaluation populated with actual generation metrics.
3225                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3226
3227                    // Populate balance sheet evaluation from balance validation results
3228                    if balance_validation.validated {
3229                        eval.coherence.balance =
3230                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3231                                equation_balanced: balance_validation.is_balanced,
3232                                max_imbalance: (balance_validation.total_debits
3233                                    - balance_validation.total_credits)
3234                                    .abs(),
3235                                periods_evaluated: 1,
3236                                periods_imbalanced: if balance_validation.is_balanced {
3237                                    0
3238                                } else {
3239                                    1
3240                                },
3241                                period_results: Vec::new(),
3242                                companies_evaluated: self.config.companies.len(),
3243                            });
3244                    }
3245
3246                    // Set coherence passes based on balance validation
3247                    eval.coherence.passes = balance_validation.is_balanced;
3248                    if !balance_validation.is_balanced {
3249                        eval.coherence
3250                            .failures
3251                            .push("Balance sheet equation not satisfied".to_string());
3252                    }
3253
3254                    // Set statistical score based on entry count (basic sanity)
3255                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3256                    eval.statistical.passes = !entries.is_empty();
3257
3258                    // Set quality score from data quality stats
3259                    eval.quality.overall_score = 0.9; // Default high for generated data
3260                    eval.quality.passes = true;
3261
3262                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3263                    info!(
3264                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3265                        profile_name, result.gates_passed, result.gates_total, result.summary
3266                    );
3267                    Some(result)
3268                }
3269                None => {
3270                    warn!(
3271                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3272                        profile_name
3273                    );
3274                    None
3275                }
3276            }
3277        } else {
3278            None
3279        };
3280
3281        // Generate internal controls if enabled
3282        let internal_controls = if self.config.internal_controls.enabled {
3283            InternalControl::standard_controls()
3284        } else {
3285            Vec::new()
3286        };
3287
3288        Ok(EnhancedGenerationResult {
3289            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3290            master_data: std::mem::take(&mut self.master_data),
3291            document_flows,
3292            subledger,
3293            ocpm,
3294            audit,
3295            banking,
3296            graph_export,
3297            sourcing,
3298            financial_reporting,
3299            hr,
3300            accounting_standards,
3301            manufacturing: manufacturing_snap,
3302            sales_kpi_budgets,
3303            tax,
3304            esg: esg_snap,
3305            treasury,
3306            project_accounting,
3307            process_evolution,
3308            organizational_events,
3309            disruption_events,
3310            intercompany,
3311            journal_entries: entries,
3312            anomaly_labels,
3313            balance_validation,
3314            data_quality_stats,
3315            quality_issues,
3316            statistics: stats,
3317            lineage: Some(lineage),
3318            gate_result,
3319            internal_controls,
3320            sod_violations,
3321            opening_balances,
3322            subledger_reconciliation,
3323            counterfactual_pairs,
3324            red_flags,
3325            collusion_rings,
3326            temporal_vendor_chains,
3327            entity_relationship_graph,
3328            cross_process_links,
3329            industry_output,
3330            compliance_regulations,
3331        })
3332    }
3333
3334    // ========================================================================
3335    // Generation Phase Methods
3336    // ========================================================================
3337
3338    /// Phase 1: Generate Chart of Accounts and update statistics.
3339    fn phase_chart_of_accounts(
3340        &mut self,
3341        stats: &mut EnhancedGenerationStatistics,
3342    ) -> SynthResult<Arc<ChartOfAccounts>> {
3343        info!("Phase 1: Generating Chart of Accounts");
3344        let coa = self.generate_coa()?;
3345        stats.accounts_count = coa.account_count();
3346        info!(
3347            "Chart of Accounts generated: {} accounts",
3348            stats.accounts_count
3349        );
3350        self.check_resources_with_log("post-coa")?;
3351        Ok(coa)
3352    }
3353
3354    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3355    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3356        if self.phase_config.generate_master_data {
3357            info!("Phase 2: Generating Master Data");
3358            self.generate_master_data()?;
3359            stats.vendor_count = self.master_data.vendors.len();
3360            stats.customer_count = self.master_data.customers.len();
3361            stats.material_count = self.master_data.materials.len();
3362            stats.asset_count = self.master_data.assets.len();
3363            stats.employee_count = self.master_data.employees.len();
3364            info!(
3365                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3366                stats.vendor_count, stats.customer_count, stats.material_count,
3367                stats.asset_count, stats.employee_count
3368            );
3369            self.check_resources_with_log("post-master-data")?;
3370        } else {
3371            debug!("Phase 2: Skipped (master data generation disabled)");
3372        }
3373        Ok(())
3374    }
3375
3376    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3377    fn phase_document_flows(
3378        &mut self,
3379        stats: &mut EnhancedGenerationStatistics,
3380    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3381        let mut document_flows = DocumentFlowSnapshot::default();
3382        let mut subledger = SubledgerSnapshot::default();
3383        // Dunning JEs (interest + charges) accumulated here and merged into the
3384        // main FA-JE list below so they appear in the GL.
3385        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3386
3387        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3388            info!("Phase 3: Generating Document Flows");
3389            self.generate_document_flows(&mut document_flows)?;
3390            stats.p2p_chain_count = document_flows.p2p_chains.len();
3391            stats.o2c_chain_count = document_flows.o2c_chains.len();
3392            info!(
3393                "Document flows generated: {} P2P chains, {} O2C chains",
3394                stats.p2p_chain_count, stats.o2c_chain_count
3395            );
3396
3397            // Phase 3b: Link document flows to subledgers (for data coherence)
3398            debug!("Phase 3b: Linking document flows to subledgers");
3399            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3400            stats.ap_invoice_count = subledger.ap_invoices.len();
3401            stats.ar_invoice_count = subledger.ar_invoices.len();
3402            debug!(
3403                "Subledgers linked: {} AP invoices, {} AR invoices",
3404                stats.ap_invoice_count, stats.ar_invoice_count
3405            );
3406
3407            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3408            // Without this step the subledger is systematically overstated because
3409            // amount_remaining is set at invoice creation and never reduced by
3410            // the payments that were generated in the document-flow phase.
3411            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3412            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3413            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3414            debug!("Payment settlements applied to AP and AR subledgers");
3415
3416            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3417            // The as-of date is the last day of the configured period.
3418            if let Ok(start_date) =
3419                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3420            {
3421                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3422                    - chrono::Days::new(1);
3423                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3424                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3425                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3426                // derived from JE-level aggregation and will typically differ. This is a known
3427                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3428                // generated independently. A future reconciliation phase should align them by
3429                // using subledger totals as the authoritative source for BS Receivables.
3430                for company in &self.config.companies {
3431                    let ar_report = ARAgingReport::from_invoices(
3432                        company.code.clone(),
3433                        &subledger.ar_invoices,
3434                        as_of_date,
3435                    );
3436                    subledger.ar_aging_reports.push(ar_report);
3437
3438                    let ap_report = APAgingReport::from_invoices(
3439                        company.code.clone(),
3440                        &subledger.ap_invoices,
3441                        as_of_date,
3442                    );
3443                    subledger.ap_aging_reports.push(ap_report);
3444                }
3445                debug!(
3446                    "AR/AP aging reports built: {} AR, {} AP",
3447                    subledger.ar_aging_reports.len(),
3448                    subledger.ap_aging_reports.len()
3449                );
3450
3451                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3452                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3453                {
3454                    use datasynth_generators::DunningGenerator;
3455                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3456                    for company in &self.config.companies {
3457                        let currency = company.currency.as_str();
3458                        // Collect mutable references to AR invoices for this company
3459                        // (dunning generator updates dunning_info on invoices in-place).
3460                        let mut company_invoices: Vec<
3461                            datasynth_core::models::subledger::ar::ARInvoice,
3462                        > = subledger
3463                            .ar_invoices
3464                            .iter()
3465                            .filter(|inv| inv.company_code == company.code)
3466                            .cloned()
3467                            .collect();
3468
3469                        if company_invoices.is_empty() {
3470                            continue;
3471                        }
3472
3473                        let result = dunning_gen.execute_dunning_run(
3474                            &company.code,
3475                            as_of_date,
3476                            &mut company_invoices,
3477                            currency,
3478                        );
3479
3480                        // Write back updated dunning info to the main AR invoice list
3481                        for updated in &company_invoices {
3482                            if let Some(orig) = subledger
3483                                .ar_invoices
3484                                .iter_mut()
3485                                .find(|i| i.invoice_number == updated.invoice_number)
3486                            {
3487                                orig.dunning_info = updated.dunning_info.clone();
3488                            }
3489                        }
3490
3491                        subledger.dunning_runs.push(result.dunning_run);
3492                        subledger.dunning_letters.extend(result.letters);
3493                        // Dunning JEs (interest + charges) collected into local buffer.
3494                        dunning_journal_entries.extend(result.journal_entries);
3495                    }
3496                    debug!(
3497                        "Dunning runs complete: {} runs, {} letters",
3498                        subledger.dunning_runs.len(),
3499                        subledger.dunning_letters.len()
3500                    );
3501                }
3502            }
3503
3504            self.check_resources_with_log("post-document-flows")?;
3505        } else {
3506            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3507        }
3508
3509        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3510        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3511        if !self.master_data.assets.is_empty() {
3512            debug!("Generating FA subledger records");
3513            let company_code = self
3514                .config
3515                .companies
3516                .first()
3517                .map(|c| c.code.as_str())
3518                .unwrap_or("1000");
3519            let currency = self
3520                .config
3521                .companies
3522                .first()
3523                .map(|c| c.currency.as_str())
3524                .unwrap_or("USD");
3525
3526            let mut fa_gen = datasynth_generators::FAGenerator::new(
3527                datasynth_generators::FAGeneratorConfig::default(),
3528                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3529            );
3530
3531            for asset in &self.master_data.assets {
3532                let (record, je) = fa_gen.generate_asset_acquisition(
3533                    company_code,
3534                    &format!("{:?}", asset.asset_class),
3535                    &asset.description,
3536                    asset.acquisition_date,
3537                    currency,
3538                    asset.cost_center.as_deref(),
3539                );
3540                subledger.fa_records.push(record);
3541                fa_journal_entries.push(je);
3542            }
3543
3544            stats.fa_subledger_count = subledger.fa_records.len();
3545            debug!(
3546                "FA subledger records generated: {} (with {} acquisition JEs)",
3547                stats.fa_subledger_count,
3548                fa_journal_entries.len()
3549            );
3550        }
3551
3552        // Generate Inventory subledger records from master data materials
3553        if !self.master_data.materials.is_empty() {
3554            debug!("Generating Inventory subledger records");
3555            let first_company = self.config.companies.first();
3556            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3557            let inv_currency = first_company
3558                .map(|c| c.currency.clone())
3559                .unwrap_or_else(|| "USD".to_string());
3560
3561            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3562                datasynth_generators::InventoryGeneratorConfig::default(),
3563                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3564                inv_currency.clone(),
3565            );
3566
3567            for (i, material) in self.master_data.materials.iter().enumerate() {
3568                let plant = format!("PLANT{:02}", (i % 3) + 1);
3569                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3570                let initial_qty = rust_decimal::Decimal::from(
3571                    material
3572                        .safety_stock
3573                        .to_string()
3574                        .parse::<i64>()
3575                        .unwrap_or(100),
3576                );
3577
3578                let position = inv_gen.generate_position(
3579                    company_code,
3580                    &plant,
3581                    &storage_loc,
3582                    &material.material_id,
3583                    &material.description,
3584                    initial_qty,
3585                    Some(material.standard_cost),
3586                    &inv_currency,
3587                );
3588                subledger.inventory_positions.push(position);
3589            }
3590
3591            stats.inventory_subledger_count = subledger.inventory_positions.len();
3592            debug!(
3593                "Inventory subledger records generated: {}",
3594                stats.inventory_subledger_count
3595            );
3596        }
3597
3598        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3599        if !subledger.fa_records.is_empty() {
3600            if let Ok(start_date) =
3601                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3602            {
3603                let company_code = self
3604                    .config
3605                    .companies
3606                    .first()
3607                    .map(|c| c.code.as_str())
3608                    .unwrap_or("1000");
3609                let fiscal_year = start_date.year();
3610                let start_period = start_date.month();
3611                let end_period =
3612                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3613
3614                let depr_cfg = FaDepreciationScheduleConfig {
3615                    fiscal_year,
3616                    start_period,
3617                    end_period,
3618                    seed_offset: 800,
3619                };
3620                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3621                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3622                let run_count = runs.len();
3623                subledger.depreciation_runs = runs;
3624                debug!(
3625                    "Depreciation runs generated: {} runs for {} periods",
3626                    run_count, self.config.global.period_months
3627                );
3628            }
3629        }
3630
3631        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3632        if !subledger.inventory_positions.is_empty() {
3633            if let Ok(start_date) =
3634                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3635            {
3636                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3637                    - chrono::Days::new(1);
3638
3639                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3640                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3641
3642                for company in &self.config.companies {
3643                    let result = inv_val_gen.generate(
3644                        &company.code,
3645                        &subledger.inventory_positions,
3646                        as_of_date,
3647                    );
3648                    subledger.inventory_valuations.push(result);
3649                }
3650                debug!(
3651                    "Inventory valuations generated: {} company reports",
3652                    subledger.inventory_valuations.len()
3653                );
3654            }
3655        }
3656
3657        Ok((document_flows, subledger, fa_journal_entries))
3658    }
3659
3660    /// Phase 3c: Generate OCPM events from document flows.
3661    #[allow(clippy::too_many_arguments)]
3662    fn phase_ocpm_events(
3663        &mut self,
3664        document_flows: &DocumentFlowSnapshot,
3665        sourcing: &SourcingSnapshot,
3666        hr: &HrSnapshot,
3667        manufacturing: &ManufacturingSnapshot,
3668        banking: &BankingSnapshot,
3669        audit: &AuditSnapshot,
3670        financial_reporting: &FinancialReportingSnapshot,
3671        stats: &mut EnhancedGenerationStatistics,
3672    ) -> SynthResult<OcpmSnapshot> {
3673        let degradation = self.check_resources()?;
3674        if degradation >= DegradationLevel::Reduced {
3675            debug!(
3676                "Phase skipped due to resource pressure (degradation: {:?})",
3677                degradation
3678            );
3679            return Ok(OcpmSnapshot::default());
3680        }
3681        if self.phase_config.generate_ocpm_events {
3682            info!("Phase 3c: Generating OCPM Events");
3683            let ocpm_snapshot = self.generate_ocpm_events(
3684                document_flows,
3685                sourcing,
3686                hr,
3687                manufacturing,
3688                banking,
3689                audit,
3690                financial_reporting,
3691            )?;
3692            stats.ocpm_event_count = ocpm_snapshot.event_count;
3693            stats.ocpm_object_count = ocpm_snapshot.object_count;
3694            stats.ocpm_case_count = ocpm_snapshot.case_count;
3695            info!(
3696                "OCPM events generated: {} events, {} objects, {} cases",
3697                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3698            );
3699            self.check_resources_with_log("post-ocpm")?;
3700            Ok(ocpm_snapshot)
3701        } else {
3702            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3703            Ok(OcpmSnapshot::default())
3704        }
3705    }
3706
3707    /// Phase 4: Generate journal entries from document flows and standalone generation.
3708    fn phase_journal_entries(
3709        &mut self,
3710        coa: &Arc<ChartOfAccounts>,
3711        document_flows: &DocumentFlowSnapshot,
3712        _stats: &mut EnhancedGenerationStatistics,
3713    ) -> SynthResult<Vec<JournalEntry>> {
3714        let mut entries = Vec::new();
3715
3716        // Phase 4a: Generate JEs from document flows (for data coherence)
3717        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3718            debug!("Phase 4a: Generating JEs from document flows");
3719            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3720            debug!("Generated {} JEs from document flows", flow_entries.len());
3721            entries.extend(flow_entries);
3722        }
3723
3724        // Phase 4b: Generate standalone journal entries
3725        if self.phase_config.generate_journal_entries {
3726            info!("Phase 4: Generating Journal Entries");
3727            let je_entries = self.generate_journal_entries(coa)?;
3728            info!("Generated {} standalone journal entries", je_entries.len());
3729            entries.extend(je_entries);
3730        } else {
3731            debug!("Phase 4: Skipped (journal entry generation disabled)");
3732        }
3733
3734        if !entries.is_empty() {
3735            // Note: stats.total_entries/total_line_items are set in generate()
3736            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3737            self.check_resources_with_log("post-journal-entries")?;
3738        }
3739
3740        Ok(entries)
3741    }
3742
3743    /// Phase 5: Inject anomalies into journal entries.
3744    fn phase_anomaly_injection(
3745        &mut self,
3746        entries: &mut [JournalEntry],
3747        actions: &DegradationActions,
3748        stats: &mut EnhancedGenerationStatistics,
3749    ) -> SynthResult<AnomalyLabels> {
3750        if self.phase_config.inject_anomalies
3751            && !entries.is_empty()
3752            && !actions.skip_anomaly_injection
3753        {
3754            info!("Phase 5: Injecting Anomalies");
3755            let result = self.inject_anomalies(entries)?;
3756            stats.anomalies_injected = result.labels.len();
3757            info!("Injected {} anomalies", stats.anomalies_injected);
3758            self.check_resources_with_log("post-anomaly-injection")?;
3759            Ok(result)
3760        } else if actions.skip_anomaly_injection {
3761            warn!("Phase 5: Skipped due to resource degradation");
3762            Ok(AnomalyLabels::default())
3763        } else {
3764            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3765            Ok(AnomalyLabels::default())
3766        }
3767    }
3768
3769    /// Phase 6: Validate balance sheet equation on journal entries.
3770    fn phase_balance_validation(
3771        &mut self,
3772        entries: &[JournalEntry],
3773    ) -> SynthResult<BalanceValidationResult> {
3774        if self.phase_config.validate_balances && !entries.is_empty() {
3775            debug!("Phase 6: Validating Balances");
3776            let balance_validation = self.validate_journal_entries(entries)?;
3777            if balance_validation.is_balanced {
3778                debug!("Balance validation passed");
3779            } else {
3780                warn!(
3781                    "Balance validation found {} errors",
3782                    balance_validation.validation_errors.len()
3783                );
3784            }
3785            Ok(balance_validation)
3786        } else {
3787            Ok(BalanceValidationResult::default())
3788        }
3789    }
3790
3791    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3792    fn phase_data_quality_injection(
3793        &mut self,
3794        entries: &mut [JournalEntry],
3795        actions: &DegradationActions,
3796        stats: &mut EnhancedGenerationStatistics,
3797    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3798        if self.phase_config.inject_data_quality
3799            && !entries.is_empty()
3800            && !actions.skip_data_quality
3801        {
3802            info!("Phase 7: Injecting Data Quality Variations");
3803            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3804            stats.data_quality_issues = dq_stats.records_with_issues;
3805            info!("Injected {} data quality issues", stats.data_quality_issues);
3806            self.check_resources_with_log("post-data-quality")?;
3807            Ok((dq_stats, quality_issues))
3808        } else if actions.skip_data_quality {
3809            warn!("Phase 7: Skipped due to resource degradation");
3810            Ok((DataQualityStats::default(), Vec::new()))
3811        } else {
3812            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3813            Ok((DataQualityStats::default(), Vec::new()))
3814        }
3815    }
3816
3817    /// Phase 10b: Generate period-close journal entries.
3818    ///
3819    /// Generates:
3820    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3821    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3822    ///    for the configured period.
3823    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3824    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3825    ///    earnings via the Income Summary (3600) clearing account.
3826    fn phase_period_close(
3827        &mut self,
3828        entries: &mut Vec<JournalEntry>,
3829        subledger: &SubledgerSnapshot,
3830        stats: &mut EnhancedGenerationStatistics,
3831    ) -> SynthResult<()> {
3832        if !self.phase_config.generate_period_close || entries.is_empty() {
3833            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3834            return Ok(());
3835        }
3836
3837        info!("Phase 10b: Generating period-close journal entries");
3838
3839        use datasynth_core::accounts::{
3840            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3841        };
3842        use rust_decimal::Decimal;
3843
3844        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3845            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3846        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3847        // Posting date for close entries is the last day of the period
3848        let close_date = end_date - chrono::Days::new(1);
3849
3850        // Statutory tax rate (21% — configurable rates come in later tiers)
3851        let tax_rate = Decimal::new(21, 2); // 0.21
3852
3853        // Collect company codes from config
3854        let company_codes: Vec<String> = self
3855            .config
3856            .companies
3857            .iter()
3858            .map(|c| c.code.clone())
3859            .collect();
3860
3861        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3862        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3863        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3864
3865        // --- Depreciation JEs (per asset) ---
3866        // Compute period depreciation for each active fixed asset using straight-line method.
3867        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3868        let period_months = self.config.global.period_months;
3869        for asset in &subledger.fa_records {
3870            // Skip assets that are inactive / fully depreciated / non-depreciable
3871            use datasynth_core::models::subledger::fa::AssetStatus;
3872            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3873                continue;
3874            }
3875            let useful_life_months = asset.useful_life_months();
3876            if useful_life_months == 0 {
3877                // Land or CIP — not depreciated
3878                continue;
3879            }
3880            let salvage_value = asset.salvage_value();
3881            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3882            if depreciable_base == Decimal::ZERO {
3883                continue;
3884            }
3885            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3886                * Decimal::from(period_months))
3887            .round_dp(2);
3888            if period_depr <= Decimal::ZERO {
3889                continue;
3890            }
3891
3892            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3893            depr_header.document_type = "CL".to_string();
3894            depr_header.header_text = Some(format!(
3895                "Depreciation - {} {}",
3896                asset.asset_number, asset.description
3897            ));
3898            depr_header.created_by = "CLOSE_ENGINE".to_string();
3899            depr_header.source = TransactionSource::Automated;
3900            depr_header.business_process = Some(BusinessProcess::R2R);
3901
3902            let doc_id = depr_header.document_id;
3903            let mut depr_je = JournalEntry::new(depr_header);
3904
3905            // DR Depreciation Expense (6000)
3906            depr_je.add_line(JournalEntryLine::debit(
3907                doc_id,
3908                1,
3909                expense_accounts::DEPRECIATION.to_string(),
3910                period_depr,
3911            ));
3912            // CR Accumulated Depreciation (1510)
3913            depr_je.add_line(JournalEntryLine::credit(
3914                doc_id,
3915                2,
3916                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3917                period_depr,
3918            ));
3919
3920            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3921            close_jes.push(depr_je);
3922        }
3923
3924        if !subledger.fa_records.is_empty() {
3925            debug!(
3926                "Generated {} depreciation JEs from {} FA records",
3927                close_jes.len(),
3928                subledger.fa_records.len()
3929            );
3930        }
3931
3932        // --- Accrual entries (standard period-end accruals per company) ---
3933        // Generate standard accrued expense entries (utilities, rent, interest) using
3934        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3935        {
3936            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3937            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3938
3939            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3940            let accrual_items: &[(&str, &str, &str)] = &[
3941                ("Accrued Utilities", "6200", "2100"),
3942                ("Accrued Rent", "6300", "2100"),
3943                ("Accrued Interest", "6100", "2150"),
3944            ];
3945
3946            for company_code in &company_codes {
3947                // Estimate company revenue from existing JEs
3948                let company_revenue: Decimal = entries
3949                    .iter()
3950                    .filter(|e| e.header.company_code == *company_code)
3951                    .flat_map(|e| e.lines.iter())
3952                    .filter(|l| l.gl_account.starts_with('4'))
3953                    .map(|l| l.credit_amount - l.debit_amount)
3954                    .fold(Decimal::ZERO, |acc, v| acc + v);
3955
3956                if company_revenue <= Decimal::ZERO {
3957                    continue;
3958                }
3959
3960                // Use 0.5% of period revenue per accrual item as a proxy
3961                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3962                if accrual_base <= Decimal::ZERO {
3963                    continue;
3964                }
3965
3966                for (description, expense_acct, liability_acct) in accrual_items {
3967                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3968                        company_code,
3969                        description,
3970                        accrual_base,
3971                        expense_acct,
3972                        liability_acct,
3973                        close_date,
3974                        None,
3975                    );
3976                    close_jes.push(accrual_je);
3977                    if let Some(rev_je) = reversal_je {
3978                        close_jes.push(rev_je);
3979                    }
3980                }
3981            }
3982
3983            debug!(
3984                "Generated accrual entries for {} companies",
3985                company_codes.len()
3986            );
3987        }
3988
3989        for company_code in &company_codes {
3990            // Calculate net income for this company from existing JEs:
3991            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3992            // Revenue (4xxx): credit-normal, so net = credits - debits
3993            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3994            let mut total_revenue = Decimal::ZERO;
3995            let mut total_expenses = Decimal::ZERO;
3996
3997            for entry in entries.iter() {
3998                if entry.header.company_code != *company_code {
3999                    continue;
4000                }
4001                for line in &entry.lines {
4002                    let category = AccountCategory::from_account(&line.gl_account);
4003                    match category {
4004                        AccountCategory::Revenue => {
4005                            // Revenue is credit-normal: net revenue = credits - debits
4006                            total_revenue += line.credit_amount - line.debit_amount;
4007                        }
4008                        AccountCategory::Cogs
4009                        | AccountCategory::OperatingExpense
4010                        | AccountCategory::OtherIncomeExpense
4011                        | AccountCategory::Tax => {
4012                            // Expenses are debit-normal: net expense = debits - credits
4013                            total_expenses += line.debit_amount - line.credit_amount;
4014                        }
4015                        _ => {}
4016                    }
4017                }
4018            }
4019
4020            let pre_tax_income = total_revenue - total_expenses;
4021
4022            // Skip if no income statement activity
4023            if pre_tax_income == Decimal::ZERO {
4024                debug!(
4025                    "Company {}: no pre-tax income, skipping period close",
4026                    company_code
4027                );
4028                continue;
4029            }
4030
4031            // --- Tax provision / DTA JE ---
4032            if pre_tax_income > Decimal::ZERO {
4033                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4034                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4035
4036                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4037                tax_header.document_type = "CL".to_string();
4038                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4039                tax_header.created_by = "CLOSE_ENGINE".to_string();
4040                tax_header.source = TransactionSource::Automated;
4041                tax_header.business_process = Some(BusinessProcess::R2R);
4042
4043                let doc_id = tax_header.document_id;
4044                let mut tax_je = JournalEntry::new(tax_header);
4045
4046                // DR Tax Expense (8000)
4047                tax_je.add_line(JournalEntryLine::debit(
4048                    doc_id,
4049                    1,
4050                    tax_accounts::TAX_EXPENSE.to_string(),
4051                    tax_amount,
4052                ));
4053                // CR Income Tax Payable (2130)
4054                tax_je.add_line(JournalEntryLine::credit(
4055                    doc_id,
4056                    2,
4057                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4058                    tax_amount,
4059                ));
4060
4061                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4062                close_jes.push(tax_je);
4063            } else {
4064                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4065                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4066                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4067                if dta_amount > Decimal::ZERO {
4068                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4069                    dta_header.document_type = "CL".to_string();
4070                    dta_header.header_text =
4071                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4072                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4073                    dta_header.source = TransactionSource::Automated;
4074                    dta_header.business_process = Some(BusinessProcess::R2R);
4075
4076                    let doc_id = dta_header.document_id;
4077                    let mut dta_je = JournalEntry::new(dta_header);
4078
4079                    // DR Deferred Tax Asset (1600)
4080                    dta_je.add_line(JournalEntryLine::debit(
4081                        doc_id,
4082                        1,
4083                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4084                        dta_amount,
4085                    ));
4086                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4087                    // reflecting the benefit of the future deductible temporary difference.
4088                    dta_je.add_line(JournalEntryLine::credit(
4089                        doc_id,
4090                        2,
4091                        tax_accounts::TAX_EXPENSE.to_string(),
4092                        dta_amount,
4093                    ));
4094
4095                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4096                    close_jes.push(dta_je);
4097                    debug!(
4098                        "Company {}: loss year — recognised DTA of {}",
4099                        company_code, dta_amount
4100                    );
4101                }
4102            }
4103
4104            // --- Dividend JEs (v2.4) ---
4105            // If the entity is profitable after tax, declare a 10% dividend payout.
4106            // This runs AFTER tax provision so the dividend is based on post-tax income
4107            // but BEFORE the retained earnings close so the RE transfer reflects the
4108            // reduced balance.
4109            let tax_provision = if pre_tax_income > Decimal::ZERO {
4110                (pre_tax_income * tax_rate).round_dp(2)
4111            } else {
4112                Decimal::ZERO
4113            };
4114            let net_income = pre_tax_income - tax_provision;
4115
4116            if net_income > Decimal::ZERO {
4117                use datasynth_generators::DividendGenerator;
4118                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4119                let mut div_gen = DividendGenerator::new(self.seed + 460);
4120                let currency_str = self
4121                    .config
4122                    .companies
4123                    .iter()
4124                    .find(|c| c.code == *company_code)
4125                    .map(|c| c.currency.as_str())
4126                    .unwrap_or("USD");
4127                let div_result = div_gen.generate(
4128                    company_code,
4129                    close_date,
4130                    Decimal::new(1, 0), // $1 per share placeholder
4131                    dividend_amount,
4132                    currency_str,
4133                );
4134                let div_je_count = div_result.journal_entries.len();
4135                close_jes.extend(div_result.journal_entries);
4136                debug!(
4137                    "Company {}: declared dividend of {} ({} JEs)",
4138                    company_code, dividend_amount, div_je_count
4139                );
4140            }
4141
4142            // --- Income statement closing JE ---
4143            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4144            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4145            // close the pre-tax loss into Retained Earnings as-is.
4146            if net_income != Decimal::ZERO {
4147                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4148                close_header.document_type = "CL".to_string();
4149                close_header.header_text =
4150                    Some(format!("Income statement close - {}", company_code));
4151                close_header.created_by = "CLOSE_ENGINE".to_string();
4152                close_header.source = TransactionSource::Automated;
4153                close_header.business_process = Some(BusinessProcess::R2R);
4154
4155                let doc_id = close_header.document_id;
4156                let mut close_je = JournalEntry::new(close_header);
4157
4158                let abs_net_income = net_income.abs();
4159
4160                if net_income > Decimal::ZERO {
4161                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4162                    close_je.add_line(JournalEntryLine::debit(
4163                        doc_id,
4164                        1,
4165                        equity_accounts::INCOME_SUMMARY.to_string(),
4166                        abs_net_income,
4167                    ));
4168                    close_je.add_line(JournalEntryLine::credit(
4169                        doc_id,
4170                        2,
4171                        equity_accounts::RETAINED_EARNINGS.to_string(),
4172                        abs_net_income,
4173                    ));
4174                } else {
4175                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4176                    close_je.add_line(JournalEntryLine::debit(
4177                        doc_id,
4178                        1,
4179                        equity_accounts::RETAINED_EARNINGS.to_string(),
4180                        abs_net_income,
4181                    ));
4182                    close_je.add_line(JournalEntryLine::credit(
4183                        doc_id,
4184                        2,
4185                        equity_accounts::INCOME_SUMMARY.to_string(),
4186                        abs_net_income,
4187                    ));
4188                }
4189
4190                debug_assert!(
4191                    close_je.is_balanced(),
4192                    "Income statement closing JE must be balanced"
4193                );
4194                close_jes.push(close_je);
4195            }
4196        }
4197
4198        let close_count = close_jes.len();
4199        if close_count > 0 {
4200            info!("Generated {} period-close journal entries", close_count);
4201            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4202            entries.extend(close_jes);
4203            stats.period_close_je_count = close_count;
4204
4205            // Update total entry/line-item stats
4206            stats.total_entries = entries.len() as u64;
4207            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4208        } else {
4209            debug!("No period-close entries generated (no income statement activity)");
4210        }
4211
4212        Ok(())
4213    }
4214
4215    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4216    fn phase_audit_data(
4217        &mut self,
4218        entries: &[JournalEntry],
4219        stats: &mut EnhancedGenerationStatistics,
4220    ) -> SynthResult<AuditSnapshot> {
4221        if self.phase_config.generate_audit {
4222            info!("Phase 8: Generating Audit Data");
4223            let audit_snapshot = self.generate_audit_data(entries)?;
4224            stats.audit_engagement_count = audit_snapshot.engagements.len();
4225            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4226            stats.audit_evidence_count = audit_snapshot.evidence.len();
4227            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4228            stats.audit_finding_count = audit_snapshot.findings.len();
4229            stats.audit_judgment_count = audit_snapshot.judgments.len();
4230            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4231            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4232            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4233            stats.audit_sample_count = audit_snapshot.samples.len();
4234            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4235            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4236            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4237            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4238            stats.audit_related_party_transaction_count =
4239                audit_snapshot.related_party_transactions.len();
4240            info!(
4241                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4242                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4243                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4244                 {} RP transactions",
4245                stats.audit_engagement_count,
4246                stats.audit_workpaper_count,
4247                stats.audit_evidence_count,
4248                stats.audit_risk_count,
4249                stats.audit_finding_count,
4250                stats.audit_judgment_count,
4251                stats.audit_confirmation_count,
4252                stats.audit_procedure_step_count,
4253                stats.audit_sample_count,
4254                stats.audit_analytical_result_count,
4255                stats.audit_ia_function_count,
4256                stats.audit_ia_report_count,
4257                stats.audit_related_party_count,
4258                stats.audit_related_party_transaction_count,
4259            );
4260            self.check_resources_with_log("post-audit")?;
4261            Ok(audit_snapshot)
4262        } else {
4263            debug!("Phase 8: Skipped (audit generation disabled)");
4264            Ok(AuditSnapshot::default())
4265        }
4266    }
4267
4268    /// Phase 9: Generate banking KYC/AML data.
4269    fn phase_banking_data(
4270        &mut self,
4271        stats: &mut EnhancedGenerationStatistics,
4272    ) -> SynthResult<BankingSnapshot> {
4273        if self.phase_config.generate_banking {
4274            info!("Phase 9: Generating Banking KYC/AML Data");
4275            let banking_snapshot = self.generate_banking_data()?;
4276            stats.banking_customer_count = banking_snapshot.customers.len();
4277            stats.banking_account_count = banking_snapshot.accounts.len();
4278            stats.banking_transaction_count = banking_snapshot.transactions.len();
4279            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4280            info!(
4281                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4282                stats.banking_customer_count, stats.banking_account_count,
4283                stats.banking_transaction_count, stats.banking_suspicious_count
4284            );
4285            self.check_resources_with_log("post-banking")?;
4286            Ok(banking_snapshot)
4287        } else {
4288            debug!("Phase 9: Skipped (banking generation disabled)");
4289            Ok(BankingSnapshot::default())
4290        }
4291    }
4292
4293    /// Phase 10: Export accounting network graphs for ML training.
4294    fn phase_graph_export(
4295        &mut self,
4296        entries: &[JournalEntry],
4297        coa: &Arc<ChartOfAccounts>,
4298        stats: &mut EnhancedGenerationStatistics,
4299    ) -> SynthResult<GraphExportSnapshot> {
4300        if self.phase_config.generate_graph_export && !entries.is_empty() {
4301            info!("Phase 10: Exporting Accounting Network Graphs");
4302            match self.export_graphs(entries, coa, stats) {
4303                Ok(snapshot) => {
4304                    info!(
4305                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4306                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4307                    );
4308                    Ok(snapshot)
4309                }
4310                Err(e) => {
4311                    warn!("Phase 10: Graph export failed: {}", e);
4312                    Ok(GraphExportSnapshot::default())
4313                }
4314            }
4315        } else {
4316            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4317            Ok(GraphExportSnapshot::default())
4318        }
4319    }
4320
4321    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4322    #[allow(clippy::too_many_arguments)]
4323    fn phase_hypergraph_export(
4324        &self,
4325        coa: &Arc<ChartOfAccounts>,
4326        entries: &[JournalEntry],
4327        document_flows: &DocumentFlowSnapshot,
4328        sourcing: &SourcingSnapshot,
4329        hr: &HrSnapshot,
4330        manufacturing: &ManufacturingSnapshot,
4331        banking: &BankingSnapshot,
4332        audit: &AuditSnapshot,
4333        financial_reporting: &FinancialReportingSnapshot,
4334        ocpm: &OcpmSnapshot,
4335        compliance: &ComplianceRegulationsSnapshot,
4336        stats: &mut EnhancedGenerationStatistics,
4337    ) -> SynthResult<()> {
4338        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4339            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4340            match self.export_hypergraph(
4341                coa,
4342                entries,
4343                document_flows,
4344                sourcing,
4345                hr,
4346                manufacturing,
4347                banking,
4348                audit,
4349                financial_reporting,
4350                ocpm,
4351                compliance,
4352                stats,
4353            ) {
4354                Ok(info) => {
4355                    info!(
4356                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4357                        info.node_count, info.edge_count, info.hyperedge_count
4358                    );
4359                }
4360                Err(e) => {
4361                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4362                }
4363            }
4364        } else {
4365            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4366        }
4367        Ok(())
4368    }
4369
4370    /// Phase 11: LLM Enrichment.
4371    ///
4372    /// Uses an LLM provider (mock by default) to enrich vendor names with
4373    /// realistic, context-aware names. This phase is non-blocking: failures
4374    /// log a warning but do not stop the generation pipeline.
4375    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4376        if !self.config.llm.enabled {
4377            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4378            return;
4379        }
4380
4381        info!("Phase 11: Starting LLM Enrichment");
4382        let start = std::time::Instant::now();
4383
4384        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4385            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4386            // and the corresponding API key environment variable is present.
4387            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4388                let schema_provider = &self.config.llm.provider;
4389                let api_key_env = match schema_provider.as_str() {
4390                    "openai" => Some("OPENAI_API_KEY"),
4391                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4392                    "custom" => Some("LLM_API_KEY"),
4393                    _ => None,
4394                };
4395                if let Some(key_env) = api_key_env {
4396                    if std::env::var(key_env).is_ok() {
4397                        let llm_config = datasynth_core::llm::LlmConfig {
4398                            model: self.config.llm.model.clone(),
4399                            api_key_env: key_env.to_string(),
4400                            ..datasynth_core::llm::LlmConfig::default()
4401                        };
4402                        match HttpLlmProvider::new(llm_config) {
4403                            Ok(p) => Arc::new(p),
4404                            Err(e) => {
4405                                warn!(
4406                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4407                                    e
4408                                );
4409                                Arc::new(MockLlmProvider::new(self.seed))
4410                            }
4411                        }
4412                    } else {
4413                        Arc::new(MockLlmProvider::new(self.seed))
4414                    }
4415                } else {
4416                    Arc::new(MockLlmProvider::new(self.seed))
4417                }
4418            };
4419            let enricher = VendorLlmEnricher::new(provider);
4420
4421            let industry = format!("{:?}", self.config.global.industry);
4422            let max_enrichments = self
4423                .config
4424                .llm
4425                .max_vendor_enrichments
4426                .min(self.master_data.vendors.len());
4427
4428            let mut enriched_count = 0usize;
4429            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4430                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4431                    Ok(name) => {
4432                        vendor.name = name;
4433                        enriched_count += 1;
4434                    }
4435                    Err(e) => {
4436                        warn!(
4437                            "LLM vendor enrichment failed for {}: {}",
4438                            vendor.vendor_id, e
4439                        );
4440                    }
4441                }
4442            }
4443
4444            enriched_count
4445        }));
4446
4447        match result {
4448            Ok(enriched_count) => {
4449                stats.llm_vendors_enriched = enriched_count;
4450                let elapsed = start.elapsed();
4451                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4452                info!(
4453                    "Phase 11 complete: {} vendors enriched in {}ms",
4454                    enriched_count, stats.llm_enrichment_ms
4455                );
4456            }
4457            Err(_) => {
4458                let elapsed = start.elapsed();
4459                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4460                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4461            }
4462        }
4463    }
4464
4465    /// Phase 12: Diffusion Enhancement.
4466    ///
4467    /// Generates a sample set using the statistical diffusion backend to
4468    /// demonstrate distribution-matching data generation. This phase is
4469    /// non-blocking: failures log a warning but do not stop the pipeline.
4470    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4471        if !self.config.diffusion.enabled {
4472            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4473            return;
4474        }
4475
4476        info!("Phase 12: Starting Diffusion Enhancement");
4477        let start = std::time::Instant::now();
4478
4479        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4480            // Target distribution: transaction amounts (log-normal-like)
4481            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4482            let stds = vec![2000.0, 1.5, 1.0];
4483
4484            let diffusion_config = DiffusionConfig {
4485                n_steps: self.config.diffusion.n_steps,
4486                seed: self.seed,
4487                ..Default::default()
4488            };
4489
4490            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4491
4492            let n_samples = self.config.diffusion.sample_size;
4493            let n_features = 3; // amount, line_items, approval_level
4494            let samples = backend.generate(n_samples, n_features, self.seed);
4495
4496            samples.len()
4497        }));
4498
4499        match result {
4500            Ok(sample_count) => {
4501                stats.diffusion_samples_generated = sample_count;
4502                let elapsed = start.elapsed();
4503                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4504                info!(
4505                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4506                    sample_count, stats.diffusion_enhancement_ms
4507                );
4508            }
4509            Err(_) => {
4510                let elapsed = start.elapsed();
4511                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4512                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4513            }
4514        }
4515    }
4516
4517    /// Phase 13: Causal Overlay.
4518    ///
4519    /// Builds a structural causal model from a built-in template (e.g.,
4520    /// fraud_detection) and generates causal samples. Optionally validates
4521    /// that the output respects the causal structure. This phase is
4522    /// non-blocking: failures log a warning but do not stop the pipeline.
4523    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4524        if !self.config.causal.enabled {
4525            debug!("Phase 13: Skipped (causal generation disabled)");
4526            return;
4527        }
4528
4529        info!("Phase 13: Starting Causal Overlay");
4530        let start = std::time::Instant::now();
4531
4532        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4533            // Select template based on config
4534            let graph = match self.config.causal.template.as_str() {
4535                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4536                _ => CausalGraph::fraud_detection_template(),
4537            };
4538
4539            let scm = StructuralCausalModel::new(graph.clone())
4540                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4541
4542            let n_samples = self.config.causal.sample_size;
4543            let samples = scm
4544                .generate(n_samples, self.seed)
4545                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4546
4547            // Optionally validate causal structure
4548            let validation_passed = if self.config.causal.validate {
4549                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4550                if report.valid {
4551                    info!(
4552                        "Causal validation passed: all {} checks OK",
4553                        report.checks.len()
4554                    );
4555                } else {
4556                    warn!(
4557                        "Causal validation: {} violations detected: {:?}",
4558                        report.violations.len(),
4559                        report.violations
4560                    );
4561                }
4562                Some(report.valid)
4563            } else {
4564                None
4565            };
4566
4567            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4568        }));
4569
4570        match result {
4571            Ok(Ok((sample_count, validation_passed))) => {
4572                stats.causal_samples_generated = sample_count;
4573                stats.causal_validation_passed = validation_passed;
4574                let elapsed = start.elapsed();
4575                stats.causal_generation_ms = elapsed.as_millis() as u64;
4576                info!(
4577                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4578                    sample_count, stats.causal_generation_ms, validation_passed,
4579                );
4580            }
4581            Ok(Err(e)) => {
4582                let elapsed = start.elapsed();
4583                stats.causal_generation_ms = elapsed.as_millis() as u64;
4584                warn!("Phase 13: Causal generation failed: {}", e);
4585            }
4586            Err(_) => {
4587                let elapsed = start.elapsed();
4588                stats.causal_generation_ms = elapsed.as_millis() as u64;
4589                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4590            }
4591        }
4592    }
4593
4594    /// Phase 14: Generate S2C sourcing data.
4595    fn phase_sourcing_data(
4596        &mut self,
4597        stats: &mut EnhancedGenerationStatistics,
4598    ) -> SynthResult<SourcingSnapshot> {
4599        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4600            debug!("Phase 14: Skipped (sourcing generation disabled)");
4601            return Ok(SourcingSnapshot::default());
4602        }
4603        let degradation = self.check_resources()?;
4604        if degradation >= DegradationLevel::Reduced {
4605            debug!(
4606                "Phase skipped due to resource pressure (degradation: {:?})",
4607                degradation
4608            );
4609            return Ok(SourcingSnapshot::default());
4610        }
4611
4612        info!("Phase 14: Generating S2C Sourcing Data");
4613        let seed = self.seed;
4614
4615        // Gather vendor data from master data
4616        let vendor_ids: Vec<String> = self
4617            .master_data
4618            .vendors
4619            .iter()
4620            .map(|v| v.vendor_id.clone())
4621            .collect();
4622        if vendor_ids.is_empty() {
4623            debug!("Phase 14: Skipped (no vendors available)");
4624            return Ok(SourcingSnapshot::default());
4625        }
4626
4627        let categories: Vec<(String, String)> = vec![
4628            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4629            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4630            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4631            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4632            ("CAT-LOG".to_string(), "Logistics".to_string()),
4633        ];
4634        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4635            .iter()
4636            .map(|(id, name)| {
4637                (
4638                    id.clone(),
4639                    name.clone(),
4640                    rust_decimal::Decimal::from(100_000),
4641                )
4642            })
4643            .collect();
4644
4645        let company_code = self
4646            .config
4647            .companies
4648            .first()
4649            .map(|c| c.code.as_str())
4650            .unwrap_or("1000");
4651        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4652            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4653        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4654        let fiscal_year = start_date.year() as u16;
4655        let owner_ids: Vec<String> = self
4656            .master_data
4657            .employees
4658            .iter()
4659            .take(5)
4660            .map(|e| e.employee_id.clone())
4661            .collect();
4662        let owner_id = owner_ids
4663            .first()
4664            .map(std::string::String::as_str)
4665            .unwrap_or("BUYER-001");
4666
4667        // Step 1: Spend Analysis
4668        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4669        let spend_analyses =
4670            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4671
4672        // Step 2: Sourcing Projects
4673        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4674        let sourcing_projects = if owner_ids.is_empty() {
4675            Vec::new()
4676        } else {
4677            project_gen.generate(
4678                company_code,
4679                &categories_with_spend,
4680                &owner_ids,
4681                start_date,
4682                self.config.global.period_months,
4683            )
4684        };
4685        stats.sourcing_project_count = sourcing_projects.len();
4686
4687        // Step 3: Qualifications
4688        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4689        let mut qual_gen = QualificationGenerator::new(seed + 2);
4690        let qualifications = qual_gen.generate(
4691            company_code,
4692            &qual_vendor_ids,
4693            sourcing_projects.first().map(|p| p.project_id.as_str()),
4694            owner_id,
4695            start_date,
4696        );
4697
4698        // Step 4: RFx Events
4699        let mut rfx_gen = RfxGenerator::new(seed + 3);
4700        let rfx_events: Vec<RfxEvent> = sourcing_projects
4701            .iter()
4702            .map(|proj| {
4703                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4704                rfx_gen.generate(
4705                    company_code,
4706                    &proj.project_id,
4707                    &proj.category_id,
4708                    &qualified_vids,
4709                    owner_id,
4710                    start_date,
4711                    50000.0,
4712                )
4713            })
4714            .collect();
4715        stats.rfx_event_count = rfx_events.len();
4716
4717        // Step 5: Bids
4718        let mut bid_gen = BidGenerator::new(seed + 4);
4719        let mut all_bids = Vec::new();
4720        for rfx in &rfx_events {
4721            let bidder_count = vendor_ids.len().clamp(2, 5);
4722            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4723            let bids = bid_gen.generate(rfx, &responding, start_date);
4724            all_bids.extend(bids);
4725        }
4726        stats.bid_count = all_bids.len();
4727
4728        // Step 6: Bid Evaluations
4729        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4730        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4731            .iter()
4732            .map(|rfx| {
4733                let rfx_bids: Vec<SupplierBid> = all_bids
4734                    .iter()
4735                    .filter(|b| b.rfx_id == rfx.rfx_id)
4736                    .cloned()
4737                    .collect();
4738                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4739            })
4740            .collect();
4741
4742        // Step 7: Contracts from winning bids
4743        let mut contract_gen = ContractGenerator::new(seed + 6);
4744        let contracts: Vec<ProcurementContract> = bid_evaluations
4745            .iter()
4746            .zip(rfx_events.iter())
4747            .filter_map(|(eval, rfx)| {
4748                eval.ranked_bids.first().and_then(|winner| {
4749                    all_bids
4750                        .iter()
4751                        .find(|b| b.bid_id == winner.bid_id)
4752                        .map(|winning_bid| {
4753                            contract_gen.generate_from_bid(
4754                                winning_bid,
4755                                Some(&rfx.sourcing_project_id),
4756                                &rfx.category_id,
4757                                owner_id,
4758                                start_date,
4759                            )
4760                        })
4761                })
4762            })
4763            .collect();
4764        stats.contract_count = contracts.len();
4765
4766        // Step 8: Catalog Items
4767        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4768        let catalog_items = catalog_gen.generate(&contracts);
4769        stats.catalog_item_count = catalog_items.len();
4770
4771        // Step 9: Scorecards
4772        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4773        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4774            .iter()
4775            .fold(
4776                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4777                |mut acc, c| {
4778                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4779                    acc
4780                },
4781            )
4782            .into_iter()
4783            .collect();
4784        let scorecards = scorecard_gen.generate(
4785            company_code,
4786            &vendor_contracts,
4787            start_date,
4788            end_date,
4789            owner_id,
4790        );
4791        stats.scorecard_count = scorecards.len();
4792
4793        // Back-populate cross-references on sourcing projects (Task 35)
4794        // Link each project to its RFx events, contracts, and spend analyses
4795        let mut sourcing_projects = sourcing_projects;
4796        for project in &mut sourcing_projects {
4797            // Link RFx events generated for this project
4798            project.rfx_ids = rfx_events
4799                .iter()
4800                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4801                .map(|rfx| rfx.rfx_id.clone())
4802                .collect();
4803
4804            // Link contract awarded from this project's RFx
4805            project.contract_id = contracts
4806                .iter()
4807                .find(|c| {
4808                    c.sourcing_project_id
4809                        .as_deref()
4810                        .is_some_and(|sp| sp == project.project_id)
4811                })
4812                .map(|c| c.contract_id.clone());
4813
4814            // Link spend analysis for matching category (use category_id as the reference)
4815            project.spend_analysis_id = spend_analyses
4816                .iter()
4817                .find(|sa| sa.category_id == project.category_id)
4818                .map(|sa| sa.category_id.clone());
4819        }
4820
4821        info!(
4822            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4823            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4824            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4825        );
4826        self.check_resources_with_log("post-sourcing")?;
4827
4828        Ok(SourcingSnapshot {
4829            spend_analyses,
4830            sourcing_projects,
4831            qualifications,
4832            rfx_events,
4833            bids: all_bids,
4834            bid_evaluations,
4835            contracts,
4836            catalog_items,
4837            scorecards,
4838        })
4839    }
4840
4841    /// Build a [`GroupStructure`] from the current company configuration.
4842    ///
4843    /// The first company in the configuration is treated as the ultimate parent.
4844    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4845    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4846    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4847        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4848
4849        let parent_code = self
4850            .config
4851            .companies
4852            .first()
4853            .map(|c| c.code.clone())
4854            .unwrap_or_else(|| "PARENT".to_string());
4855
4856        let mut group = GroupStructure::new(parent_code);
4857
4858        for company in self.config.companies.iter().skip(1) {
4859            let sub =
4860                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4861            group.add_subsidiary(sub);
4862        }
4863
4864        group
4865    }
4866
4867    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4868    fn phase_intercompany(
4869        &mut self,
4870        journal_entries: &[JournalEntry],
4871        stats: &mut EnhancedGenerationStatistics,
4872    ) -> SynthResult<IntercompanySnapshot> {
4873        // Skip if intercompany is disabled in config
4874        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4875            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4876            return Ok(IntercompanySnapshot::default());
4877        }
4878
4879        // Intercompany requires at least 2 companies
4880        if self.config.companies.len() < 2 {
4881            debug!(
4882                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4883                self.config.companies.len()
4884            );
4885            return Ok(IntercompanySnapshot::default());
4886        }
4887
4888        info!("Phase 14b: Generating Intercompany Transactions");
4889
4890        // Build the group structure early — used by ISA 600 component auditor scope
4891        // and consolidated financial statement generators downstream.
4892        let group_structure = self.build_group_structure();
4893        debug!(
4894            "Group structure built: parent={}, subsidiaries={}",
4895            group_structure.parent_entity,
4896            group_structure.subsidiaries.len()
4897        );
4898
4899        let seed = self.seed;
4900        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4901            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4902        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4903
4904        // Build ownership structure from company configs
4905        // First company is treated as the parent, remaining are subsidiaries
4906        let parent_code = self.config.companies[0].code.clone();
4907        let mut ownership_structure =
4908            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4909
4910        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4911            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4912                format!("REL{:03}", i + 1),
4913                parent_code.clone(),
4914                company.code.clone(),
4915                rust_decimal::Decimal::from(100), // Default 100% ownership
4916                start_date,
4917            );
4918            ownership_structure.add_relationship(relationship);
4919        }
4920
4921        // Convert config transfer pricing method to core model enum
4922        let tp_method = match self.config.intercompany.transfer_pricing_method {
4923            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4924                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4925            }
4926            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4927                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4928            }
4929            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4930                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4931            }
4932            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4933                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4934            }
4935            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4936                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4937            }
4938        };
4939
4940        // Build IC generator config from schema config
4941        let ic_currency = self
4942            .config
4943            .companies
4944            .first()
4945            .map(|c| c.currency.clone())
4946            .unwrap_or_else(|| "USD".to_string());
4947        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4948            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4949            transfer_pricing_method: tp_method,
4950            markup_percent: rust_decimal::Decimal::from_f64_retain(
4951                self.config.intercompany.markup_percent,
4952            )
4953            .unwrap_or(rust_decimal::Decimal::from(5)),
4954            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4955            default_currency: ic_currency,
4956            ..Default::default()
4957        };
4958
4959        // Create IC generator
4960        let mut ic_generator = datasynth_generators::ICGenerator::new(
4961            ic_gen_config,
4962            ownership_structure.clone(),
4963            seed + 50,
4964        );
4965
4966        // Generate IC transactions for the period
4967        // Use ~3 transactions per day as a reasonable default
4968        let transactions_per_day = 3;
4969        let matched_pairs = ic_generator.generate_transactions_for_period(
4970            start_date,
4971            end_date,
4972            transactions_per_day,
4973        );
4974
4975        // Generate IC source P2P/O2C documents
4976        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4977        debug!(
4978            "Generated {} IC seller invoices, {} IC buyer POs",
4979            ic_doc_chains.seller_invoices.len(),
4980            ic_doc_chains.buyer_orders.len()
4981        );
4982
4983        // Generate journal entries from matched pairs
4984        let mut seller_entries = Vec::new();
4985        let mut buyer_entries = Vec::new();
4986        let fiscal_year = start_date.year();
4987
4988        for pair in &matched_pairs {
4989            let fiscal_period = pair.posting_date.month();
4990            let (seller_je, buyer_je) =
4991                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4992            seller_entries.push(seller_je);
4993            buyer_entries.push(buyer_je);
4994        }
4995
4996        // Run matching engine
4997        let matching_config = datasynth_generators::ICMatchingConfig {
4998            base_currency: self
4999                .config
5000                .companies
5001                .first()
5002                .map(|c| c.currency.clone())
5003                .unwrap_or_else(|| "USD".to_string()),
5004            ..Default::default()
5005        };
5006        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5007        matching_engine.load_matched_pairs(&matched_pairs);
5008        let matching_result = matching_engine.run_matching(end_date);
5009
5010        // Generate elimination entries if configured
5011        let mut elimination_entries = Vec::new();
5012        if self.config.intercompany.generate_eliminations {
5013            let elim_config = datasynth_generators::EliminationConfig {
5014                consolidation_entity: "GROUP".to_string(),
5015                base_currency: self
5016                    .config
5017                    .companies
5018                    .first()
5019                    .map(|c| c.currency.clone())
5020                    .unwrap_or_else(|| "USD".to_string()),
5021                ..Default::default()
5022            };
5023
5024            let mut elim_generator =
5025                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5026
5027            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5028            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5029                matching_result
5030                    .matched_balances
5031                    .iter()
5032                    .chain(matching_result.unmatched_balances.iter())
5033                    .cloned()
5034                    .collect();
5035
5036            // Build investment and equity maps from the group structure so that the
5037            // elimination generator can produce equity-investment elimination entries
5038            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5039            //
5040            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5041            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5042            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5043            //
5044            // Net assets are derived from the journal entries using account-range heuristics:
5045            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5046            // no JE data is available (IC phase runs early in the generation pipeline).
5047            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5048                std::collections::HashMap::new();
5049            let mut equity_amounts: std::collections::HashMap<
5050                String,
5051                std::collections::HashMap<String, rust_decimal::Decimal>,
5052            > = std::collections::HashMap::new();
5053            {
5054                use rust_decimal::Decimal;
5055                let hundred = Decimal::from(100u32);
5056                let ten_pct = Decimal::new(10, 2); // 0.10
5057                let thirty_pct = Decimal::new(30, 2); // 0.30
5058                let sixty_pct = Decimal::new(60, 2); // 0.60
5059                let parent_code = &group_structure.parent_entity;
5060                for sub in &group_structure.subsidiaries {
5061                    let net_assets = {
5062                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5063                        if na > Decimal::ZERO {
5064                            na
5065                        } else {
5066                            Decimal::from(1_000_000u64)
5067                        }
5068                    };
5069                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5070                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5071                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5072
5073                    // Split subsidiary equity into conventional components:
5074                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5075                    let mut eq_map = std::collections::HashMap::new();
5076                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5077                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5078                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5079                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5080                }
5081            }
5082
5083            let journal = elim_generator.generate_eliminations(
5084                &fiscal_period,
5085                end_date,
5086                &all_balances,
5087                &matched_pairs,
5088                &investment_amounts,
5089                &equity_amounts,
5090            );
5091
5092            elimination_entries = journal.entries.clone();
5093        }
5094
5095        let matched_pair_count = matched_pairs.len();
5096        let elimination_entry_count = elimination_entries.len();
5097        let match_rate = matching_result.match_rate;
5098
5099        stats.ic_matched_pair_count = matched_pair_count;
5100        stats.ic_elimination_count = elimination_entry_count;
5101        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5102
5103        info!(
5104            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5105            matched_pair_count,
5106            stats.ic_transaction_count,
5107            seller_entries.len(),
5108            buyer_entries.len(),
5109            elimination_entry_count,
5110            match_rate * 100.0
5111        );
5112        self.check_resources_with_log("post-intercompany")?;
5113
5114        // ----------------------------------------------------------------
5115        // NCI measurements: derive from group structure ownership percentages
5116        // ----------------------------------------------------------------
5117        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5118            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5119            use rust_decimal::Decimal;
5120
5121            let eight_pct = Decimal::new(8, 2); // 0.08
5122
5123            group_structure
5124                .subsidiaries
5125                .iter()
5126                .filter(|sub| {
5127                    sub.nci_percentage > Decimal::ZERO
5128                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5129                })
5130                .map(|sub| {
5131                    // Compute net assets from actual journal entries for this subsidiary.
5132                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5133                    // IC phase runs before the main JE batch has been populated).
5134                    let net_assets_from_jes =
5135                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5136
5137                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5138                        net_assets_from_jes.round_dp(2)
5139                    } else {
5140                        // Fallback: use a plausible base amount
5141                        Decimal::from(1_000_000u64)
5142                    };
5143
5144                    // Net income approximated as 8% of net assets
5145                    let net_income = (net_assets * eight_pct).round_dp(2);
5146
5147                    NciMeasurement::compute(
5148                        sub.entity_code.clone(),
5149                        sub.nci_percentage,
5150                        net_assets,
5151                        net_income,
5152                    )
5153                })
5154                .collect()
5155        };
5156
5157        if !nci_measurements.is_empty() {
5158            info!(
5159                "NCI measurements: {} subsidiaries with non-controlling interests",
5160                nci_measurements.len()
5161            );
5162        }
5163
5164        Ok(IntercompanySnapshot {
5165            group_structure: Some(group_structure),
5166            matched_pairs,
5167            seller_journal_entries: seller_entries,
5168            buyer_journal_entries: buyer_entries,
5169            elimination_entries,
5170            nci_measurements,
5171            ic_document_chains: Some(ic_doc_chains),
5172            matched_pair_count,
5173            elimination_entry_count,
5174            match_rate,
5175        })
5176    }
5177
5178    /// Phase 15: Generate bank reconciliations and financial statements.
5179    fn phase_financial_reporting(
5180        &mut self,
5181        document_flows: &DocumentFlowSnapshot,
5182        journal_entries: &[JournalEntry],
5183        coa: &Arc<ChartOfAccounts>,
5184        _hr: &HrSnapshot,
5185        _audit: &AuditSnapshot,
5186        stats: &mut EnhancedGenerationStatistics,
5187    ) -> SynthResult<FinancialReportingSnapshot> {
5188        let fs_enabled = self.phase_config.generate_financial_statements
5189            || self.config.financial_reporting.enabled;
5190        let br_enabled = self.phase_config.generate_bank_reconciliation;
5191
5192        if !fs_enabled && !br_enabled {
5193            debug!("Phase 15: Skipped (financial reporting disabled)");
5194            return Ok(FinancialReportingSnapshot::default());
5195        }
5196
5197        info!("Phase 15: Generating Financial Reporting Data");
5198
5199        let seed = self.seed;
5200        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5201            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5202
5203        let mut financial_statements = Vec::new();
5204        let mut bank_reconciliations = Vec::new();
5205        let mut trial_balances = Vec::new();
5206        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5207        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5208            Vec::new();
5209        // Standalone statements keyed by entity code
5210        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5211            std::collections::HashMap::new();
5212        // Consolidated statements (one per period)
5213        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5214        // Consolidation schedules (one per period)
5215        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5216
5217        // Generate financial statements from JE-derived trial balances.
5218        //
5219        // When journal entries are available, we use cumulative trial balances for
5220        // balance sheet accounts and current-period trial balances for income
5221        // statement accounts. We also track prior-period trial balances so the
5222        // generator can produce comparative amounts, and we build a proper
5223        // cash flow statement from working capital changes rather than random data.
5224        if fs_enabled {
5225            let has_journal_entries = !journal_entries.is_empty();
5226
5227            // Use FinancialStatementGenerator for balance sheet and income statement,
5228            // but build cash flow ourselves from TB data when JEs are available.
5229            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5230            // Separate generator for consolidated statements (different seed offset)
5231            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5232
5233            // Collect elimination JEs once (reused across periods)
5234            let elimination_entries: Vec<&JournalEntry> = journal_entries
5235                .iter()
5236                .filter(|je| je.header.is_elimination)
5237                .collect();
5238
5239            // Generate one set of statements per period, per entity
5240            for period in 0..self.config.global.period_months {
5241                let period_start = start_date + chrono::Months::new(period);
5242                let period_end =
5243                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5244                let fiscal_year = period_end.year() as u16;
5245                let fiscal_period = period_end.month() as u8;
5246                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5247
5248                // Build per-entity trial balances for this period (non-elimination JEs)
5249                // We accumulate them for the consolidation step.
5250                let mut entity_tb_map: std::collections::HashMap<
5251                    String,
5252                    std::collections::HashMap<String, rust_decimal::Decimal>,
5253                > = std::collections::HashMap::new();
5254
5255                // --- Standalone: one set of statements per company ---
5256                for (company_idx, company) in self.config.companies.iter().enumerate() {
5257                    let company_code = company.code.as_str();
5258                    let currency = company.currency.as_str();
5259                    // Use a unique seed offset per company to keep statements deterministic
5260                    // and distinct across companies
5261                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5262                    let mut company_fs_gen =
5263                        FinancialStatementGenerator::new(seed + company_seed_offset);
5264
5265                    if has_journal_entries {
5266                        let tb_entries = Self::build_cumulative_trial_balance(
5267                            journal_entries,
5268                            coa,
5269                            company_code,
5270                            start_date,
5271                            period_end,
5272                            fiscal_year,
5273                            fiscal_period,
5274                        );
5275
5276                        // Accumulate per-entity category balances for consolidation
5277                        let entity_cat_map =
5278                            entity_tb_map.entry(company_code.to_string()).or_default();
5279                        for tb_entry in &tb_entries {
5280                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5281                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5282                        }
5283
5284                        let stmts = company_fs_gen.generate(
5285                            company_code,
5286                            currency,
5287                            &tb_entries,
5288                            period_start,
5289                            period_end,
5290                            fiscal_year,
5291                            fiscal_period,
5292                            None,
5293                            "SYS-AUTOCLOSE",
5294                        );
5295
5296                        let mut entity_stmts = Vec::new();
5297                        for stmt in stmts {
5298                            if stmt.statement_type == StatementType::CashFlowStatement {
5299                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5300                                let cf_items = Self::build_cash_flow_from_trial_balances(
5301                                    &tb_entries,
5302                                    None,
5303                                    net_income,
5304                                );
5305                                entity_stmts.push(FinancialStatement {
5306                                    cash_flow_items: cf_items,
5307                                    ..stmt
5308                                });
5309                            } else {
5310                                entity_stmts.push(stmt);
5311                            }
5312                        }
5313
5314                        // Add to the flat financial_statements list (used by KPI/budget)
5315                        financial_statements.extend(entity_stmts.clone());
5316
5317                        // Store standalone per-entity
5318                        standalone_statements
5319                            .entry(company_code.to_string())
5320                            .or_default()
5321                            .extend(entity_stmts);
5322
5323                        // Only store trial balance for the first company in the period
5324                        // to avoid duplicates in the trial_balances list
5325                        if company_idx == 0 {
5326                            trial_balances.push(PeriodTrialBalance {
5327                                fiscal_year,
5328                                fiscal_period,
5329                                period_start,
5330                                period_end,
5331                                entries: tb_entries,
5332                            });
5333                        }
5334                    } else {
5335                        // Fallback: no JEs available
5336                        let tb_entries = Self::build_trial_balance_from_entries(
5337                            journal_entries,
5338                            coa,
5339                            company_code,
5340                            fiscal_year,
5341                            fiscal_period,
5342                        );
5343
5344                        let stmts = company_fs_gen.generate(
5345                            company_code,
5346                            currency,
5347                            &tb_entries,
5348                            period_start,
5349                            period_end,
5350                            fiscal_year,
5351                            fiscal_period,
5352                            None,
5353                            "SYS-AUTOCLOSE",
5354                        );
5355                        financial_statements.extend(stmts.clone());
5356                        standalone_statements
5357                            .entry(company_code.to_string())
5358                            .or_default()
5359                            .extend(stmts);
5360
5361                        if company_idx == 0 && !tb_entries.is_empty() {
5362                            trial_balances.push(PeriodTrialBalance {
5363                                fiscal_year,
5364                                fiscal_period,
5365                                period_start,
5366                                period_end,
5367                                entries: tb_entries,
5368                            });
5369                        }
5370                    }
5371                }
5372
5373                // --- Consolidated: aggregate all entities + apply eliminations ---
5374                // Use the primary (first) company's currency for the consolidated statement
5375                let group_currency = self
5376                    .config
5377                    .companies
5378                    .first()
5379                    .map(|c| c.currency.as_str())
5380                    .unwrap_or("USD");
5381
5382                // Build owned elimination entries for this period
5383                let period_eliminations: Vec<JournalEntry> = elimination_entries
5384                    .iter()
5385                    .filter(|je| {
5386                        je.header.fiscal_year == fiscal_year
5387                            && je.header.fiscal_period == fiscal_period
5388                    })
5389                    .map(|je| (*je).clone())
5390                    .collect();
5391
5392                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5393                    &entity_tb_map,
5394                    &period_eliminations,
5395                    &period_label,
5396                );
5397
5398                // Build a pseudo trial balance from consolidated line items for the
5399                // FinancialStatementGenerator to use (only for cash flow direction).
5400                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5401                    .line_items
5402                    .iter()
5403                    .map(|li| {
5404                        let net = li.post_elimination_total;
5405                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5406                            (net, rust_decimal::Decimal::ZERO)
5407                        } else {
5408                            (rust_decimal::Decimal::ZERO, -net)
5409                        };
5410                        datasynth_generators::TrialBalanceEntry {
5411                            account_code: li.account_category.clone(),
5412                            account_name: li.account_category.clone(),
5413                            category: li.account_category.clone(),
5414                            debit_balance: debit,
5415                            credit_balance: credit,
5416                        }
5417                    })
5418                    .collect();
5419
5420                let mut cons_stmts = cons_gen.generate(
5421                    "GROUP",
5422                    group_currency,
5423                    &cons_tb,
5424                    period_start,
5425                    period_end,
5426                    fiscal_year,
5427                    fiscal_period,
5428                    None,
5429                    "SYS-AUTOCLOSE",
5430                );
5431
5432                // Split consolidated line items by statement type.
5433                // The consolidation generator returns BS items first, then IS items,
5434                // identified by their CONS- prefix and category.
5435                let bs_categories: &[&str] = &[
5436                    "CASH",
5437                    "RECEIVABLES",
5438                    "INVENTORY",
5439                    "FIXEDASSETS",
5440                    "PAYABLES",
5441                    "ACCRUEDLIABILITIES",
5442                    "LONGTERMDEBT",
5443                    "EQUITY",
5444                ];
5445                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5446                    cons_line_items.into_iter().partition(|li| {
5447                        let upper = li.label.to_uppercase();
5448                        bs_categories.iter().any(|c| upper == *c)
5449                    });
5450
5451                for stmt in &mut cons_stmts {
5452                    stmt.is_consolidated = true;
5453                    match stmt.statement_type {
5454                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5455                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5456                        _ => {} // CF and equity change statements keep generator output
5457                    }
5458                }
5459
5460                consolidated_statements.extend(cons_stmts);
5461                consolidation_schedules.push(schedule);
5462            }
5463
5464            // Backward compat: if only 1 company, use existing code path logic
5465            // (prior_cumulative_tb for comparative amounts). Already handled above;
5466            // the prior_ref is omitted to keep this change minimal.
5467            let _ = &mut fs_gen; // suppress unused warning
5468
5469            stats.financial_statement_count = financial_statements.len();
5470            info!(
5471                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5472                stats.financial_statement_count,
5473                consolidated_statements.len(),
5474                has_journal_entries
5475            );
5476
5477            // ----------------------------------------------------------------
5478            // IFRS 8 / ASC 280: Operating Segment Reporting
5479            // ----------------------------------------------------------------
5480            // Build entity seeds from the company configuration.
5481            let entity_seeds: Vec<SegmentSeed> = self
5482                .config
5483                .companies
5484                .iter()
5485                .map(|c| SegmentSeed {
5486                    code: c.code.clone(),
5487                    name: c.name.clone(),
5488                    currency: c.currency.clone(),
5489                })
5490                .collect();
5491
5492            let mut seg_gen = SegmentGenerator::new(seed + 30);
5493
5494            // Generate one set of segment reports per period.
5495            // We extract consolidated revenue / profit / assets from the consolidated
5496            // financial statements produced above, falling back to simple sums when
5497            // no consolidated statements were generated (single-entity path).
5498            for period in 0..self.config.global.period_months {
5499                let period_end =
5500                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5501                let fiscal_year = period_end.year() as u16;
5502                let fiscal_period = period_end.month() as u8;
5503                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5504
5505                use datasynth_core::models::StatementType;
5506
5507                // Try to find consolidated income statement for this period
5508                let cons_is = consolidated_statements.iter().find(|s| {
5509                    s.fiscal_year == fiscal_year
5510                        && s.fiscal_period == fiscal_period
5511                        && s.statement_type == StatementType::IncomeStatement
5512                });
5513                let cons_bs = consolidated_statements.iter().find(|s| {
5514                    s.fiscal_year == fiscal_year
5515                        && s.fiscal_period == fiscal_period
5516                        && s.statement_type == StatementType::BalanceSheet
5517                });
5518
5519                // If consolidated statements not available fall back to the flat list
5520                let is_stmt = cons_is.or_else(|| {
5521                    financial_statements.iter().find(|s| {
5522                        s.fiscal_year == fiscal_year
5523                            && s.fiscal_period == fiscal_period
5524                            && s.statement_type == StatementType::IncomeStatement
5525                    })
5526                });
5527                let bs_stmt = cons_bs.or_else(|| {
5528                    financial_statements.iter().find(|s| {
5529                        s.fiscal_year == fiscal_year
5530                            && s.fiscal_period == fiscal_period
5531                            && s.statement_type == StatementType::BalanceSheet
5532                    })
5533                });
5534
5535                let consolidated_revenue = is_stmt
5536                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5537                    .map(|li| -li.amount) // revenue is stored as negative in IS
5538                    .unwrap_or(rust_decimal::Decimal::ZERO);
5539
5540                let consolidated_profit = is_stmt
5541                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5542                    .map(|li| li.amount)
5543                    .unwrap_or(rust_decimal::Decimal::ZERO);
5544
5545                let consolidated_assets = bs_stmt
5546                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5547                    .map(|li| li.amount)
5548                    .unwrap_or(rust_decimal::Decimal::ZERO);
5549
5550                // Skip periods where we have no financial data
5551                if consolidated_revenue == rust_decimal::Decimal::ZERO
5552                    && consolidated_assets == rust_decimal::Decimal::ZERO
5553                {
5554                    continue;
5555                }
5556
5557                let group_code = self
5558                    .config
5559                    .companies
5560                    .first()
5561                    .map(|c| c.code.as_str())
5562                    .unwrap_or("GROUP");
5563
5564                // Compute period depreciation from JEs with document type "CL" hitting account
5565                // 6000 (depreciation expense).  These are generated by phase_period_close.
5566                let total_depr: rust_decimal::Decimal = journal_entries
5567                    .iter()
5568                    .filter(|je| je.header.document_type == "CL")
5569                    .flat_map(|je| je.lines.iter())
5570                    .filter(|l| l.gl_account.starts_with("6000"))
5571                    .map(|l| l.debit_amount)
5572                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5573                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5574                    Some(total_depr)
5575                } else {
5576                    None
5577                };
5578
5579                let (segs, recon) = seg_gen.generate(
5580                    group_code,
5581                    &period_label,
5582                    consolidated_revenue,
5583                    consolidated_profit,
5584                    consolidated_assets,
5585                    &entity_seeds,
5586                    depr_param,
5587                );
5588                segment_reports.extend(segs);
5589                segment_reconciliations.push(recon);
5590            }
5591
5592            info!(
5593                "Segment reports generated: {} segments, {} reconciliations",
5594                segment_reports.len(),
5595                segment_reconciliations.len()
5596            );
5597        }
5598
5599        // Generate bank reconciliations from payment data
5600        if br_enabled && !document_flows.payments.is_empty() {
5601            let employee_ids: Vec<String> = self
5602                .master_data
5603                .employees
5604                .iter()
5605                .map(|e| e.employee_id.clone())
5606                .collect();
5607            let mut br_gen =
5608                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5609
5610            // Group payments by company code and period
5611            for company in &self.config.companies {
5612                let company_payments: Vec<PaymentReference> = document_flows
5613                    .payments
5614                    .iter()
5615                    .filter(|p| p.header.company_code == company.code)
5616                    .map(|p| PaymentReference {
5617                        id: p.header.document_id.clone(),
5618                        amount: if p.is_vendor { p.amount } else { -p.amount },
5619                        date: p.header.document_date,
5620                        reference: p
5621                            .check_number
5622                            .clone()
5623                            .or_else(|| p.wire_reference.clone())
5624                            .unwrap_or_else(|| p.header.document_id.clone()),
5625                    })
5626                    .collect();
5627
5628                if company_payments.is_empty() {
5629                    continue;
5630                }
5631
5632                let bank_account_id = format!("{}-MAIN", company.code);
5633
5634                // Generate one reconciliation per period
5635                for period in 0..self.config.global.period_months {
5636                    let period_start = start_date + chrono::Months::new(period);
5637                    let period_end =
5638                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5639
5640                    let period_payments: Vec<PaymentReference> = company_payments
5641                        .iter()
5642                        .filter(|p| p.date >= period_start && p.date <= period_end)
5643                        .cloned()
5644                        .collect();
5645
5646                    let recon = br_gen.generate(
5647                        &company.code,
5648                        &bank_account_id,
5649                        period_start,
5650                        period_end,
5651                        &company.currency,
5652                        &period_payments,
5653                    );
5654                    bank_reconciliations.push(recon);
5655                }
5656            }
5657            info!(
5658                "Bank reconciliations generated: {} reconciliations",
5659                bank_reconciliations.len()
5660            );
5661        }
5662
5663        stats.bank_reconciliation_count = bank_reconciliations.len();
5664        self.check_resources_with_log("post-financial-reporting")?;
5665
5666        if !trial_balances.is_empty() {
5667            info!(
5668                "Period-close trial balances captured: {} periods",
5669                trial_balances.len()
5670            );
5671        }
5672
5673        // Notes to financial statements are generated in a separate post-processing step
5674        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5675        // phases have completed, so that deferred tax and provision data can be wired in.
5676        let notes_to_financial_statements = Vec::new();
5677
5678        Ok(FinancialReportingSnapshot {
5679            financial_statements,
5680            standalone_statements,
5681            consolidated_statements,
5682            consolidation_schedules,
5683            bank_reconciliations,
5684            trial_balances,
5685            segment_reports,
5686            segment_reconciliations,
5687            notes_to_financial_statements,
5688        })
5689    }
5690
5691    /// Populate notes to financial statements using fully-resolved snapshots.
5692    ///
5693    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5694    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5695    /// can be wired into the notes context.  The method mutates
5696    /// `financial_reporting.notes_to_financial_statements` in-place.
5697    fn generate_notes_to_financial_statements(
5698        &self,
5699        financial_reporting: &mut FinancialReportingSnapshot,
5700        accounting_standards: &AccountingStandardsSnapshot,
5701        tax: &TaxSnapshot,
5702        hr: &HrSnapshot,
5703        audit: &AuditSnapshot,
5704        treasury: &TreasurySnapshot,
5705    ) {
5706        use datasynth_config::schema::AccountingFrameworkConfig;
5707        use datasynth_core::models::StatementType;
5708        use datasynth_generators::period_close::notes_generator::{
5709            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5710        };
5711
5712        let seed = self.seed;
5713        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5714        {
5715            Ok(d) => d,
5716            Err(_) => return,
5717        };
5718
5719        let mut notes_gen = NotesGenerator::new(seed + 4235);
5720
5721        for company in &self.config.companies {
5722            let last_period_end = start_date
5723                + chrono::Months::new(self.config.global.period_months)
5724                - chrono::Days::new(1);
5725            let fiscal_year = last_period_end.year() as u16;
5726
5727            // Extract relevant amounts from the already-generated financial statements
5728            let entity_is = financial_reporting
5729                .standalone_statements
5730                .get(&company.code)
5731                .and_then(|stmts| {
5732                    stmts.iter().find(|s| {
5733                        s.fiscal_year == fiscal_year
5734                            && s.statement_type == StatementType::IncomeStatement
5735                    })
5736                });
5737            let entity_bs = financial_reporting
5738                .standalone_statements
5739                .get(&company.code)
5740                .and_then(|stmts| {
5741                    stmts.iter().find(|s| {
5742                        s.fiscal_year == fiscal_year
5743                            && s.statement_type == StatementType::BalanceSheet
5744                    })
5745                });
5746
5747            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5748            let revenue_amount = entity_is
5749                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5750                .map(|li| li.amount);
5751            let ppe_gross = entity_bs
5752                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5753                .map(|li| li.amount);
5754
5755            let framework = match self
5756                .config
5757                .accounting_standards
5758                .framework
5759                .unwrap_or_default()
5760            {
5761                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5762                    "IFRS".to_string()
5763                }
5764                _ => "US GAAP".to_string(),
5765            };
5766
5767            // ---- Deferred tax (IAS 12 / ASC 740) ----
5768            // Sum closing DTA and DTL from rollforward entries for this entity.
5769            let (entity_dta, entity_dtl) = {
5770                let mut dta = rust_decimal::Decimal::ZERO;
5771                let mut dtl = rust_decimal::Decimal::ZERO;
5772                for rf in &tax.deferred_tax.rollforwards {
5773                    if rf.entity_code == company.code {
5774                        dta += rf.closing_dta;
5775                        dtl += rf.closing_dtl;
5776                    }
5777                }
5778                (
5779                    if dta > rust_decimal::Decimal::ZERO {
5780                        Some(dta)
5781                    } else {
5782                        None
5783                    },
5784                    if dtl > rust_decimal::Decimal::ZERO {
5785                        Some(dtl)
5786                    } else {
5787                        None
5788                    },
5789                )
5790            };
5791
5792            // ---- Provisions (IAS 37 / ASC 450) ----
5793            // Filter provisions to this entity; sum best_estimate amounts.
5794            let entity_provisions: Vec<_> = accounting_standards
5795                .provisions
5796                .iter()
5797                .filter(|p| p.entity_code == company.code)
5798                .collect();
5799            let provision_count = entity_provisions.len();
5800            let total_provisions = if provision_count > 0 {
5801                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5802            } else {
5803                None
5804            };
5805
5806            // ---- Pension data from HR snapshot ----
5807            let entity_pension_plan_count = hr
5808                .pension_plans
5809                .iter()
5810                .filter(|p| p.entity_code == company.code)
5811                .count();
5812            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5813                let sum: rust_decimal::Decimal = hr
5814                    .pension_disclosures
5815                    .iter()
5816                    .filter(|d| {
5817                        hr.pension_plans
5818                            .iter()
5819                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5820                    })
5821                    .map(|d| d.net_pension_liability)
5822                    .sum();
5823                let plan_assets_sum: rust_decimal::Decimal = hr
5824                    .pension_plan_assets
5825                    .iter()
5826                    .filter(|a| {
5827                        hr.pension_plans
5828                            .iter()
5829                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5830                    })
5831                    .map(|a| a.fair_value_closing)
5832                    .sum();
5833                if entity_pension_plan_count > 0 {
5834                    Some(sum + plan_assets_sum)
5835                } else {
5836                    None
5837                }
5838            };
5839            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5840                let sum: rust_decimal::Decimal = hr
5841                    .pension_plan_assets
5842                    .iter()
5843                    .filter(|a| {
5844                        hr.pension_plans
5845                            .iter()
5846                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5847                    })
5848                    .map(|a| a.fair_value_closing)
5849                    .sum();
5850                if entity_pension_plan_count > 0 {
5851                    Some(sum)
5852                } else {
5853                    None
5854                }
5855            };
5856
5857            // ---- Audit data: related parties + subsequent events ----
5858            // Audit snapshot covers all entities; use total counts (common case = single entity).
5859            let rp_count = audit.related_party_transactions.len();
5860            let se_count = audit.subsequent_events.len();
5861            let adjusting_count = audit
5862                .subsequent_events
5863                .iter()
5864                .filter(|e| {
5865                    matches!(
5866                        e.classification,
5867                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5868                    )
5869                })
5870                .count();
5871
5872            let ctx = NotesGeneratorContext {
5873                entity_code: company.code.clone(),
5874                framework,
5875                period: format!("FY{}", fiscal_year),
5876                period_end: last_period_end,
5877                currency: company.currency.clone(),
5878                revenue_amount,
5879                total_ppe_gross: ppe_gross,
5880                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5881                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5882                deferred_tax_asset: entity_dta,
5883                deferred_tax_liability: entity_dtl,
5884                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5885                provision_count,
5886                total_provisions,
5887                // Pension data from HR snapshot
5888                pension_plan_count: entity_pension_plan_count,
5889                total_dbo: entity_total_dbo,
5890                total_plan_assets: entity_total_plan_assets,
5891                // Audit data
5892                related_party_transaction_count: rp_count,
5893                subsequent_event_count: se_count,
5894                adjusting_event_count: adjusting_count,
5895                ..NotesGeneratorContext::default()
5896            };
5897
5898            let entity_notes = notes_gen.generate(&ctx);
5899            let standard_note_count = entity_notes.len() as u32;
5900            info!(
5901                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5902                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5903            );
5904            financial_reporting
5905                .notes_to_financial_statements
5906                .extend(entity_notes);
5907
5908            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5909            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5910                .debt_instruments
5911                .iter()
5912                .filter(|d| d.entity_id == company.code)
5913                .map(|d| {
5914                    (
5915                        format!("{:?}", d.instrument_type),
5916                        d.principal,
5917                        d.maturity_date.to_string(),
5918                    )
5919                })
5920                .collect();
5921
5922            let hedge_count = treasury.hedge_relationships.len();
5923            let effective_hedges = treasury
5924                .hedge_relationships
5925                .iter()
5926                .filter(|h| h.is_effective)
5927                .count();
5928            let total_notional: rust_decimal::Decimal = treasury
5929                .hedging_instruments
5930                .iter()
5931                .map(|h| h.notional_amount)
5932                .sum();
5933            let total_fair_value: rust_decimal::Decimal = treasury
5934                .hedging_instruments
5935                .iter()
5936                .map(|h| h.fair_value)
5937                .sum();
5938
5939            // Join provision_movements with provisions to get entity/type info
5940            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5941                .provisions
5942                .iter()
5943                .filter(|p| p.entity_code == company.code)
5944                .map(|p| p.id.as_str())
5945                .collect();
5946            let provision_movements: Vec<(
5947                String,
5948                rust_decimal::Decimal,
5949                rust_decimal::Decimal,
5950                rust_decimal::Decimal,
5951            )> = accounting_standards
5952                .provision_movements
5953                .iter()
5954                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5955                .map(|m| {
5956                    let prov_type = accounting_standards
5957                        .provisions
5958                        .iter()
5959                        .find(|p| p.id == m.provision_id)
5960                        .map(|p| format!("{:?}", p.provision_type))
5961                        .unwrap_or_else(|| "Unknown".to_string());
5962                    (prov_type, m.opening, m.additions, m.closing)
5963                })
5964                .collect();
5965
5966            let enhanced_ctx = EnhancedNotesContext {
5967                entity_code: company.code.clone(),
5968                period: format!("FY{}", fiscal_year),
5969                currency: company.currency.clone(),
5970                // Inventory breakdown: best-effort using zero (would need balance tracker)
5971                finished_goods_value: rust_decimal::Decimal::ZERO,
5972                wip_value: rust_decimal::Decimal::ZERO,
5973                raw_materials_value: rust_decimal::Decimal::ZERO,
5974                debt_instruments,
5975                hedge_count,
5976                effective_hedges,
5977                total_notional,
5978                total_fair_value,
5979                provision_movements,
5980            };
5981
5982            let enhanced_notes =
5983                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5984            if !enhanced_notes.is_empty() {
5985                info!(
5986                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5987                    company.code,
5988                    enhanced_notes.len(),
5989                    enhanced_ctx.debt_instruments.len(),
5990                    hedge_count,
5991                    enhanced_ctx.provision_movements.len(),
5992                );
5993                financial_reporting
5994                    .notes_to_financial_statements
5995                    .extend(enhanced_notes);
5996            }
5997        }
5998    }
5999
6000    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6001    ///
6002    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6003    /// posted in the journal entries flows through to the trial balance, using the real
6004    /// GL account numbers from the CoA.
6005    fn build_trial_balance_from_entries(
6006        journal_entries: &[JournalEntry],
6007        coa: &ChartOfAccounts,
6008        company_code: &str,
6009        fiscal_year: u16,
6010        fiscal_period: u8,
6011    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6012        use rust_decimal::Decimal;
6013
6014        // Accumulate total debits and credits per GL account
6015        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6016        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6017
6018        for je in journal_entries {
6019            // Filter to matching company, fiscal year, and period
6020            if je.header.company_code != company_code
6021                || je.header.fiscal_year != fiscal_year
6022                || je.header.fiscal_period != fiscal_period
6023            {
6024                continue;
6025            }
6026
6027            for line in &je.lines {
6028                let acct = &line.gl_account;
6029                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6030                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6031            }
6032        }
6033
6034        // Build a TrialBalanceEntry for each account that had activity
6035        let mut all_accounts: Vec<&String> = account_debits
6036            .keys()
6037            .chain(account_credits.keys())
6038            .collect::<std::collections::HashSet<_>>()
6039            .into_iter()
6040            .collect();
6041        all_accounts.sort();
6042
6043        let mut entries = Vec::new();
6044
6045        for acct_number in all_accounts {
6046            let debit = account_debits
6047                .get(acct_number)
6048                .copied()
6049                .unwrap_or(Decimal::ZERO);
6050            let credit = account_credits
6051                .get(acct_number)
6052                .copied()
6053                .unwrap_or(Decimal::ZERO);
6054
6055            if debit.is_zero() && credit.is_zero() {
6056                continue;
6057            }
6058
6059            // Look up account name from CoA, fall back to "Account {code}"
6060            let account_name = coa
6061                .get_account(acct_number)
6062                .map(|gl| gl.short_description.clone())
6063                .unwrap_or_else(|| format!("Account {acct_number}"));
6064
6065            // Map account code prefix to the category strings expected by
6066            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6067            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6068            // OperatingExpenses).
6069            let category = Self::category_from_account_code(acct_number);
6070
6071            entries.push(datasynth_generators::TrialBalanceEntry {
6072                account_code: acct_number.clone(),
6073                account_name,
6074                category,
6075                debit_balance: debit,
6076                credit_balance: credit,
6077            });
6078        }
6079
6080        entries
6081    }
6082
6083    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6084    /// (and including) the given period end date.
6085    ///
6086    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6087    /// while income statement accounts (revenue, expenses) show only the current period.
6088    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6089    fn build_cumulative_trial_balance(
6090        journal_entries: &[JournalEntry],
6091        coa: &ChartOfAccounts,
6092        company_code: &str,
6093        start_date: NaiveDate,
6094        period_end: NaiveDate,
6095        fiscal_year: u16,
6096        fiscal_period: u8,
6097    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6098        use rust_decimal::Decimal;
6099
6100        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6101        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6102        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6103
6104        // Accumulate debits/credits for income statement accounts (current period only)
6105        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6106        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6107
6108        for je in journal_entries {
6109            if je.header.company_code != company_code {
6110                continue;
6111            }
6112
6113            for line in &je.lines {
6114                let acct = &line.gl_account;
6115                let category = Self::category_from_account_code(acct);
6116                let is_bs_account = matches!(
6117                    category.as_str(),
6118                    "Cash"
6119                        | "Receivables"
6120                        | "Inventory"
6121                        | "FixedAssets"
6122                        | "Payables"
6123                        | "AccruedLiabilities"
6124                        | "LongTermDebt"
6125                        | "Equity"
6126                );
6127
6128                if is_bs_account {
6129                    // Balance sheet: accumulate from start through period_end
6130                    if je.header.document_date <= period_end
6131                        && je.header.document_date >= start_date
6132                    {
6133                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6134                            line.debit_amount;
6135                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6136                            line.credit_amount;
6137                    }
6138                } else {
6139                    // Income statement: current period only
6140                    if je.header.fiscal_year == fiscal_year
6141                        && je.header.fiscal_period == fiscal_period
6142                    {
6143                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6144                            line.debit_amount;
6145                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6146                            line.credit_amount;
6147                    }
6148                }
6149            }
6150        }
6151
6152        // Merge all accounts
6153        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6154        all_accounts.extend(bs_debits.keys().cloned());
6155        all_accounts.extend(bs_credits.keys().cloned());
6156        all_accounts.extend(is_debits.keys().cloned());
6157        all_accounts.extend(is_credits.keys().cloned());
6158
6159        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6160        sorted_accounts.sort();
6161
6162        let mut entries = Vec::new();
6163
6164        for acct_number in &sorted_accounts {
6165            let category = Self::category_from_account_code(acct_number);
6166            let is_bs_account = matches!(
6167                category.as_str(),
6168                "Cash"
6169                    | "Receivables"
6170                    | "Inventory"
6171                    | "FixedAssets"
6172                    | "Payables"
6173                    | "AccruedLiabilities"
6174                    | "LongTermDebt"
6175                    | "Equity"
6176            );
6177
6178            let (debit, credit) = if is_bs_account {
6179                (
6180                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6181                    bs_credits
6182                        .get(acct_number)
6183                        .copied()
6184                        .unwrap_or(Decimal::ZERO),
6185                )
6186            } else {
6187                (
6188                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6189                    is_credits
6190                        .get(acct_number)
6191                        .copied()
6192                        .unwrap_or(Decimal::ZERO),
6193                )
6194            };
6195
6196            if debit.is_zero() && credit.is_zero() {
6197                continue;
6198            }
6199
6200            let account_name = coa
6201                .get_account(acct_number)
6202                .map(|gl| gl.short_description.clone())
6203                .unwrap_or_else(|| format!("Account {acct_number}"));
6204
6205            entries.push(datasynth_generators::TrialBalanceEntry {
6206                account_code: acct_number.clone(),
6207                account_name,
6208                category,
6209                debit_balance: debit,
6210                credit_balance: credit,
6211            });
6212        }
6213
6214        entries
6215    }
6216
6217    /// Build a JE-derived cash flow statement using the indirect method.
6218    ///
6219    /// Compares current and prior cumulative trial balances to derive working capital
6220    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6221    fn build_cash_flow_from_trial_balances(
6222        current_tb: &[datasynth_generators::TrialBalanceEntry],
6223        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6224        net_income: rust_decimal::Decimal,
6225    ) -> Vec<CashFlowItem> {
6226        use rust_decimal::Decimal;
6227
6228        // Helper: aggregate a TB by category and return net (debit - credit)
6229        let aggregate =
6230            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6231                let mut map: HashMap<String, Decimal> = HashMap::new();
6232                for entry in tb {
6233                    let net = entry.debit_balance - entry.credit_balance;
6234                    *map.entry(entry.category.clone()).or_default() += net;
6235                }
6236                map
6237            };
6238
6239        let current = aggregate(current_tb);
6240        let prior = prior_tb.map(aggregate);
6241
6242        // Get balance for a category, defaulting to zero
6243        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6244            *map.get(key).unwrap_or(&Decimal::ZERO)
6245        };
6246
6247        // Compute change: current - prior (or current if no prior)
6248        let change = |key: &str| -> Decimal {
6249            let curr = get(&current, key);
6250            match &prior {
6251                Some(p) => curr - get(p, key),
6252                None => curr,
6253            }
6254        };
6255
6256        // Operating activities (indirect method)
6257        // Depreciation add-back: approximate from FixedAssets decrease
6258        let fixed_asset_change = change("FixedAssets");
6259        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6260            -fixed_asset_change
6261        } else {
6262            Decimal::ZERO
6263        };
6264
6265        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6266        let ar_change = change("Receivables");
6267        let inventory_change = change("Inventory");
6268        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6269        let ap_change = change("Payables");
6270        let accrued_change = change("AccruedLiabilities");
6271
6272        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6273            + (-ap_change)
6274            + (-accrued_change);
6275
6276        // Investing activities
6277        let capex = if fixed_asset_change > Decimal::ZERO {
6278            -fixed_asset_change
6279        } else {
6280            Decimal::ZERO
6281        };
6282        let investing_cf = capex;
6283
6284        // Financing activities
6285        let debt_change = -change("LongTermDebt");
6286        let equity_change = -change("Equity");
6287        let financing_cf = debt_change + equity_change;
6288
6289        let net_change = operating_cf + investing_cf + financing_cf;
6290
6291        vec![
6292            CashFlowItem {
6293                item_code: "CF-NI".to_string(),
6294                label: "Net Income".to_string(),
6295                category: CashFlowCategory::Operating,
6296                amount: net_income,
6297                amount_prior: None,
6298                sort_order: 1,
6299                is_total: false,
6300            },
6301            CashFlowItem {
6302                item_code: "CF-DEP".to_string(),
6303                label: "Depreciation & Amortization".to_string(),
6304                category: CashFlowCategory::Operating,
6305                amount: depreciation_addback,
6306                amount_prior: None,
6307                sort_order: 2,
6308                is_total: false,
6309            },
6310            CashFlowItem {
6311                item_code: "CF-AR".to_string(),
6312                label: "Change in Accounts Receivable".to_string(),
6313                category: CashFlowCategory::Operating,
6314                amount: -ar_change,
6315                amount_prior: None,
6316                sort_order: 3,
6317                is_total: false,
6318            },
6319            CashFlowItem {
6320                item_code: "CF-AP".to_string(),
6321                label: "Change in Accounts Payable".to_string(),
6322                category: CashFlowCategory::Operating,
6323                amount: -ap_change,
6324                amount_prior: None,
6325                sort_order: 4,
6326                is_total: false,
6327            },
6328            CashFlowItem {
6329                item_code: "CF-INV".to_string(),
6330                label: "Change in Inventory".to_string(),
6331                category: CashFlowCategory::Operating,
6332                amount: -inventory_change,
6333                amount_prior: None,
6334                sort_order: 5,
6335                is_total: false,
6336            },
6337            CashFlowItem {
6338                item_code: "CF-OP".to_string(),
6339                label: "Net Cash from Operating Activities".to_string(),
6340                category: CashFlowCategory::Operating,
6341                amount: operating_cf,
6342                amount_prior: None,
6343                sort_order: 6,
6344                is_total: true,
6345            },
6346            CashFlowItem {
6347                item_code: "CF-CAPEX".to_string(),
6348                label: "Capital Expenditures".to_string(),
6349                category: CashFlowCategory::Investing,
6350                amount: capex,
6351                amount_prior: None,
6352                sort_order: 7,
6353                is_total: false,
6354            },
6355            CashFlowItem {
6356                item_code: "CF-INV-T".to_string(),
6357                label: "Net Cash from Investing Activities".to_string(),
6358                category: CashFlowCategory::Investing,
6359                amount: investing_cf,
6360                amount_prior: None,
6361                sort_order: 8,
6362                is_total: true,
6363            },
6364            CashFlowItem {
6365                item_code: "CF-DEBT".to_string(),
6366                label: "Net Borrowings / (Repayments)".to_string(),
6367                category: CashFlowCategory::Financing,
6368                amount: debt_change,
6369                amount_prior: None,
6370                sort_order: 9,
6371                is_total: false,
6372            },
6373            CashFlowItem {
6374                item_code: "CF-EQ".to_string(),
6375                label: "Equity Changes".to_string(),
6376                category: CashFlowCategory::Financing,
6377                amount: equity_change,
6378                amount_prior: None,
6379                sort_order: 10,
6380                is_total: false,
6381            },
6382            CashFlowItem {
6383                item_code: "CF-FIN-T".to_string(),
6384                label: "Net Cash from Financing Activities".to_string(),
6385                category: CashFlowCategory::Financing,
6386                amount: financing_cf,
6387                amount_prior: None,
6388                sort_order: 11,
6389                is_total: true,
6390            },
6391            CashFlowItem {
6392                item_code: "CF-NET".to_string(),
6393                label: "Net Change in Cash".to_string(),
6394                category: CashFlowCategory::Operating,
6395                amount: net_change,
6396                amount_prior: None,
6397                sort_order: 12,
6398                is_total: true,
6399            },
6400        ]
6401    }
6402
6403    /// Calculate net income from a set of trial balance entries.
6404    ///
6405    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6406    fn calculate_net_income_from_tb(
6407        tb: &[datasynth_generators::TrialBalanceEntry],
6408    ) -> rust_decimal::Decimal {
6409        use rust_decimal::Decimal;
6410
6411        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6412        for entry in tb {
6413            let net = entry.debit_balance - entry.credit_balance;
6414            *aggregated.entry(entry.category.clone()).or_default() += net;
6415        }
6416
6417        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6418        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6419        let opex = *aggregated
6420            .get("OperatingExpenses")
6421            .unwrap_or(&Decimal::ZERO);
6422        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6423        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6424
6425        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6426        // other_income is typically negative (credit), other_expenses is typically positive
6427        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6428        let tax_rate = Decimal::new(25, 2); // 0.25
6429        let tax = operating_income * tax_rate;
6430        operating_income - tax
6431    }
6432
6433    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6434    ///
6435    /// Uses the first two digits of the account code to classify into the categories
6436    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6437    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6438    /// OperatingExpenses, OtherIncome, OtherExpenses.
6439    fn category_from_account_code(code: &str) -> String {
6440        let prefix: String = code.chars().take(2).collect();
6441        match prefix.as_str() {
6442            "10" => "Cash",
6443            "11" => "Receivables",
6444            "12" | "13" | "14" => "Inventory",
6445            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6446            "20" => "Payables",
6447            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6448            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6449            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6450            "40" | "41" | "42" | "43" | "44" => "Revenue",
6451            "50" | "51" | "52" => "CostOfSales",
6452            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6453                "OperatingExpenses"
6454            }
6455            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6456            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6457            _ => "OperatingExpenses",
6458        }
6459        .to_string()
6460    }
6461
6462    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6463    fn phase_hr_data(
6464        &mut self,
6465        stats: &mut EnhancedGenerationStatistics,
6466    ) -> SynthResult<HrSnapshot> {
6467        if !self.phase_config.generate_hr {
6468            debug!("Phase 16: Skipped (HR generation disabled)");
6469            return Ok(HrSnapshot::default());
6470        }
6471
6472        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6473
6474        let seed = self.seed;
6475        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6476            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6477        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6478        let company_code = self
6479            .config
6480            .companies
6481            .first()
6482            .map(|c| c.code.as_str())
6483            .unwrap_or("1000");
6484        let currency = self
6485            .config
6486            .companies
6487            .first()
6488            .map(|c| c.currency.as_str())
6489            .unwrap_or("USD");
6490
6491        let employee_ids: Vec<String> = self
6492            .master_data
6493            .employees
6494            .iter()
6495            .map(|e| e.employee_id.clone())
6496            .collect();
6497
6498        if employee_ids.is_empty() {
6499            debug!("Phase 16: Skipped (no employees available)");
6500            return Ok(HrSnapshot::default());
6501        }
6502
6503        // Extract cost-center pool from master data employees for cross-reference
6504        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6505        let cost_center_ids: Vec<String> = self
6506            .master_data
6507            .employees
6508            .iter()
6509            .filter_map(|e| e.cost_center.clone())
6510            .collect::<std::collections::HashSet<_>>()
6511            .into_iter()
6512            .collect();
6513
6514        let mut snapshot = HrSnapshot::default();
6515
6516        // Generate payroll runs (one per month)
6517        if self.config.hr.payroll.enabled {
6518            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6519                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6520
6521            // Look up country pack for payroll deductions and labels
6522            let payroll_pack = self.primary_pack();
6523
6524            // Store the pack on the generator so generate() resolves
6525            // localized deduction rates and labels from it.
6526            payroll_gen.set_country_pack(payroll_pack.clone());
6527
6528            let employees_with_salary: Vec<(
6529                String,
6530                rust_decimal::Decimal,
6531                Option<String>,
6532                Option<String>,
6533            )> = self
6534                .master_data
6535                .employees
6536                .iter()
6537                .map(|e| {
6538                    // Use the employee's actual annual base salary.
6539                    // Fall back to $60,000 / yr if somehow zero.
6540                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6541                        e.base_salary
6542                    } else {
6543                        rust_decimal::Decimal::from(60_000)
6544                    };
6545                    (
6546                        e.employee_id.clone(),
6547                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6548                        e.cost_center.clone(),
6549                        e.department_id.clone(),
6550                    )
6551                })
6552                .collect();
6553
6554            // Use generate_with_changes when employee change history is available
6555            // so that salary adjustments, transfers, etc. are reflected in payroll.
6556            let change_history = &self.master_data.employee_change_history;
6557            let has_changes = !change_history.is_empty();
6558            if has_changes {
6559                debug!(
6560                    "Payroll will incorporate {} employee change events",
6561                    change_history.len()
6562                );
6563            }
6564
6565            for month in 0..self.config.global.period_months {
6566                let period_start = start_date + chrono::Months::new(month);
6567                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6568                let (run, items) = if has_changes {
6569                    payroll_gen.generate_with_changes(
6570                        company_code,
6571                        &employees_with_salary,
6572                        period_start,
6573                        period_end,
6574                        currency,
6575                        change_history,
6576                    )
6577                } else {
6578                    payroll_gen.generate(
6579                        company_code,
6580                        &employees_with_salary,
6581                        period_start,
6582                        period_end,
6583                        currency,
6584                    )
6585                };
6586                snapshot.payroll_runs.push(run);
6587                snapshot.payroll_run_count += 1;
6588                snapshot.payroll_line_item_count += items.len();
6589                snapshot.payroll_line_items.extend(items);
6590            }
6591        }
6592
6593        // Generate time entries
6594        if self.config.hr.time_attendance.enabled {
6595            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6596                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6597            let entries = time_gen.generate(
6598                &employee_ids,
6599                start_date,
6600                end_date,
6601                &self.config.hr.time_attendance,
6602            );
6603            snapshot.time_entry_count = entries.len();
6604            snapshot.time_entries = entries;
6605        }
6606
6607        // Generate expense reports
6608        if self.config.hr.expenses.enabled {
6609            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6610                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6611            expense_gen.set_country_pack(self.primary_pack().clone());
6612            let company_currency = self
6613                .config
6614                .companies
6615                .first()
6616                .map(|c| c.currency.as_str())
6617                .unwrap_or("USD");
6618            let reports = expense_gen.generate_with_currency(
6619                &employee_ids,
6620                start_date,
6621                end_date,
6622                &self.config.hr.expenses,
6623                company_currency,
6624            );
6625            snapshot.expense_report_count = reports.len();
6626            snapshot.expense_reports = reports;
6627        }
6628
6629        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6630        if self.config.hr.payroll.enabled {
6631            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6632            let employee_pairs: Vec<(String, String)> = self
6633                .master_data
6634                .employees
6635                .iter()
6636                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6637                .collect();
6638            let enrollments =
6639                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6640            snapshot.benefit_enrollment_count = enrollments.len();
6641            snapshot.benefit_enrollments = enrollments;
6642        }
6643
6644        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6645        if self.phase_config.generate_hr {
6646            let entity_name = self
6647                .config
6648                .companies
6649                .first()
6650                .map(|c| c.name.as_str())
6651                .unwrap_or("Entity");
6652            let period_months = self.config.global.period_months;
6653            let period_label = {
6654                let y = start_date.year();
6655                let m = start_date.month();
6656                if period_months >= 12 {
6657                    format!("FY{y}")
6658                } else {
6659                    format!("{y}-{m:02}")
6660                }
6661            };
6662            let reporting_date =
6663                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6664
6665            // Compute average annual salary from actual payroll data when available.
6666            // PayrollRun.total_gross covers all employees for one pay period; we sum
6667            // across all runs and divide by employee_count to get per-employee total,
6668            // then annualise for sub-annual periods.
6669            let avg_salary: Option<rust_decimal::Decimal> = {
6670                let employee_count = employee_ids.len();
6671                if self.config.hr.payroll.enabled
6672                    && employee_count > 0
6673                    && !snapshot.payroll_runs.is_empty()
6674                {
6675                    // Sum total gross pay across all payroll runs for this company
6676                    let total_gross: rust_decimal::Decimal = snapshot
6677                        .payroll_runs
6678                        .iter()
6679                        .filter(|r| r.company_code == company_code)
6680                        .map(|r| r.total_gross)
6681                        .sum();
6682                    if total_gross > rust_decimal::Decimal::ZERO {
6683                        // Annualise: total_gross covers `period_months` months of pay
6684                        let annual_total = if period_months > 0 && period_months < 12 {
6685                            total_gross * rust_decimal::Decimal::from(12u32)
6686                                / rust_decimal::Decimal::from(period_months)
6687                        } else {
6688                            total_gross
6689                        };
6690                        Some(
6691                            (annual_total / rust_decimal::Decimal::from(employee_count))
6692                                .round_dp(2),
6693                        )
6694                    } else {
6695                        None
6696                    }
6697                } else {
6698                    None
6699                }
6700            };
6701
6702            let mut pension_gen =
6703                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6704            let pension_snap = pension_gen.generate(
6705                company_code,
6706                entity_name,
6707                &period_label,
6708                reporting_date,
6709                employee_ids.len(),
6710                currency,
6711                avg_salary,
6712                period_months,
6713            );
6714            snapshot.pension_plan_count = pension_snap.plans.len();
6715            snapshot.pension_plans = pension_snap.plans;
6716            snapshot.pension_obligations = pension_snap.obligations;
6717            snapshot.pension_plan_assets = pension_snap.plan_assets;
6718            snapshot.pension_disclosures = pension_snap.disclosures;
6719            // Pension JEs are returned here so they can be added to entries
6720            // in the caller (stored temporarily on snapshot for transfer).
6721            // We embed them in the hr snapshot for simplicity; the orchestrator
6722            // will extract and extend `entries`.
6723            snapshot.pension_journal_entries = pension_snap.journal_entries;
6724        }
6725
6726        // Generate stock-based compensation (ASC 718 / IFRS 2)
6727        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6728            let period_months = self.config.global.period_months;
6729            let period_label = {
6730                let y = start_date.year();
6731                let m = start_date.month();
6732                if period_months >= 12 {
6733                    format!("FY{y}")
6734                } else {
6735                    format!("{y}-{m:02}")
6736                }
6737            };
6738            let reporting_date =
6739                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6740
6741            let mut stock_comp_gen =
6742                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6743            let stock_snap = stock_comp_gen.generate(
6744                company_code,
6745                &employee_ids,
6746                start_date,
6747                &period_label,
6748                reporting_date,
6749                currency,
6750            );
6751            snapshot.stock_grant_count = stock_snap.grants.len();
6752            snapshot.stock_grants = stock_snap.grants;
6753            snapshot.stock_comp_expenses = stock_snap.expenses;
6754            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6755        }
6756
6757        stats.payroll_run_count = snapshot.payroll_run_count;
6758        stats.time_entry_count = snapshot.time_entry_count;
6759        stats.expense_report_count = snapshot.expense_report_count;
6760        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6761        stats.pension_plan_count = snapshot.pension_plan_count;
6762        stats.stock_grant_count = snapshot.stock_grant_count;
6763
6764        info!(
6765            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6766            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6767            snapshot.time_entry_count, snapshot.expense_report_count,
6768            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6769            snapshot.stock_grant_count
6770        );
6771        self.check_resources_with_log("post-hr")?;
6772
6773        Ok(snapshot)
6774    }
6775
6776    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6777    fn phase_accounting_standards(
6778        &mut self,
6779        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6780        journal_entries: &[JournalEntry],
6781        stats: &mut EnhancedGenerationStatistics,
6782    ) -> SynthResult<AccountingStandardsSnapshot> {
6783        if !self.phase_config.generate_accounting_standards {
6784            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6785            return Ok(AccountingStandardsSnapshot::default());
6786        }
6787        info!("Phase 17: Generating Accounting Standards Data");
6788
6789        let seed = self.seed;
6790        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6791            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6792        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6793        let company_code = self
6794            .config
6795            .companies
6796            .first()
6797            .map(|c| c.code.as_str())
6798            .unwrap_or("1000");
6799        let currency = self
6800            .config
6801            .companies
6802            .first()
6803            .map(|c| c.currency.as_str())
6804            .unwrap_or("USD");
6805
6806        // Convert config framework to standards framework.
6807        // If the user explicitly set a framework in the YAML config, use that.
6808        // Otherwise, fall back to the country pack's accounting.framework field,
6809        // and if that is also absent or unrecognised, default to US GAAP.
6810        let framework = match self.config.accounting_standards.framework {
6811            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6812                datasynth_standards::framework::AccountingFramework::UsGaap
6813            }
6814            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6815                datasynth_standards::framework::AccountingFramework::Ifrs
6816            }
6817            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6818                datasynth_standards::framework::AccountingFramework::DualReporting
6819            }
6820            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6821                datasynth_standards::framework::AccountingFramework::FrenchGaap
6822            }
6823            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6824                datasynth_standards::framework::AccountingFramework::GermanGaap
6825            }
6826            None => {
6827                // Derive framework from the primary company's country pack
6828                let pack = self.primary_pack();
6829                let pack_fw = pack.accounting.framework.as_str();
6830                match pack_fw {
6831                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6832                    "dual_reporting" => {
6833                        datasynth_standards::framework::AccountingFramework::DualReporting
6834                    }
6835                    "french_gaap" => {
6836                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6837                    }
6838                    "german_gaap" | "hgb" => {
6839                        datasynth_standards::framework::AccountingFramework::GermanGaap
6840                    }
6841                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6842                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6843                }
6844            }
6845        };
6846
6847        let mut snapshot = AccountingStandardsSnapshot::default();
6848
6849        // Revenue recognition
6850        if self.config.accounting_standards.revenue_recognition.enabled {
6851            let customer_ids: Vec<String> = self
6852                .master_data
6853                .customers
6854                .iter()
6855                .map(|c| c.customer_id.clone())
6856                .collect();
6857
6858            if !customer_ids.is_empty() {
6859                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6860                let contracts = rev_gen.generate(
6861                    company_code,
6862                    &customer_ids,
6863                    start_date,
6864                    end_date,
6865                    currency,
6866                    &self.config.accounting_standards.revenue_recognition,
6867                    framework,
6868                );
6869                snapshot.revenue_contract_count = contracts.len();
6870                snapshot.contracts = contracts;
6871            }
6872        }
6873
6874        // Impairment testing
6875        if self.config.accounting_standards.impairment.enabled {
6876            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6877                .master_data
6878                .assets
6879                .iter()
6880                .map(|a| {
6881                    (
6882                        a.asset_id.clone(),
6883                        a.description.clone(),
6884                        a.acquisition_cost,
6885                    )
6886                })
6887                .collect();
6888
6889            if !asset_data.is_empty() {
6890                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6891                let tests = imp_gen.generate(
6892                    company_code,
6893                    &asset_data,
6894                    end_date,
6895                    &self.config.accounting_standards.impairment,
6896                    framework,
6897                );
6898                snapshot.impairment_test_count = tests.len();
6899                snapshot.impairment_tests = tests;
6900            }
6901        }
6902
6903        // Business combinations (IFRS 3 / ASC 805)
6904        if self
6905            .config
6906            .accounting_standards
6907            .business_combinations
6908            .enabled
6909        {
6910            let bc_config = &self.config.accounting_standards.business_combinations;
6911            let framework_str = match framework {
6912                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6913                _ => "US_GAAP",
6914            };
6915            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6916            let bc_snap = bc_gen.generate(
6917                company_code,
6918                currency,
6919                start_date,
6920                end_date,
6921                bc_config.acquisition_count,
6922                framework_str,
6923            );
6924            snapshot.business_combination_count = bc_snap.combinations.len();
6925            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6926            snapshot.business_combinations = bc_snap.combinations;
6927        }
6928
6929        // Expected Credit Loss (IFRS 9 / ASC 326)
6930        if self
6931            .config
6932            .accounting_standards
6933            .expected_credit_loss
6934            .enabled
6935        {
6936            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6937            let framework_str = match framework {
6938                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6939                _ => "ASC_326",
6940            };
6941
6942            // Use AR aging data from the subledger snapshot if available;
6943            // otherwise generate synthetic bucket exposures.
6944            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6945
6946            let mut ecl_gen = EclGenerator::new(seed + 43);
6947
6948            // Collect combined bucket totals across all company AR aging reports.
6949            let bucket_exposures: Vec<(
6950                datasynth_core::models::subledger::ar::AgingBucket,
6951                rust_decimal::Decimal,
6952            )> = if ar_aging_reports.is_empty() {
6953                // No AR aging data — synthesise plausible bucket exposures.
6954                use datasynth_core::models::subledger::ar::AgingBucket;
6955                vec![
6956                    (
6957                        AgingBucket::Current,
6958                        rust_decimal::Decimal::from(500_000_u32),
6959                    ),
6960                    (
6961                        AgingBucket::Days1To30,
6962                        rust_decimal::Decimal::from(120_000_u32),
6963                    ),
6964                    (
6965                        AgingBucket::Days31To60,
6966                        rust_decimal::Decimal::from(45_000_u32),
6967                    ),
6968                    (
6969                        AgingBucket::Days61To90,
6970                        rust_decimal::Decimal::from(15_000_u32),
6971                    ),
6972                    (
6973                        AgingBucket::Over90Days,
6974                        rust_decimal::Decimal::from(8_000_u32),
6975                    ),
6976                ]
6977            } else {
6978                use datasynth_core::models::subledger::ar::AgingBucket;
6979                // Sum bucket totals from all reports.
6980                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6981                    std::collections::HashMap::new();
6982                for report in ar_aging_reports {
6983                    for (bucket, amount) in &report.bucket_totals {
6984                        *totals.entry(*bucket).or_default() += amount;
6985                    }
6986                }
6987                AgingBucket::all()
6988                    .into_iter()
6989                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6990                    .collect()
6991            };
6992
6993            let ecl_snap = ecl_gen.generate(
6994                company_code,
6995                end_date,
6996                &bucket_exposures,
6997                ecl_config,
6998                &period_label,
6999                framework_str,
7000            );
7001
7002            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7003            snapshot.ecl_models = ecl_snap.ecl_models;
7004            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7005            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7006        }
7007
7008        // Provisions and contingencies (IAS 37 / ASC 450)
7009        {
7010            let framework_str = match framework {
7011                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7012                _ => "US_GAAP",
7013            };
7014
7015            // Compute actual revenue from the journal entries generated so far.
7016            // The `journal_entries` slice passed to this phase contains all GL entries
7017            // up to and including Period Close. Fall back to a minimum of 100_000 to
7018            // avoid degenerate zero-based provision amounts on first-period datasets.
7019            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7020                .max(rust_decimal::Decimal::from(100_000_u32));
7021
7022            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7023
7024            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7025            let prov_snap = prov_gen.generate(
7026                company_code,
7027                currency,
7028                revenue_proxy,
7029                end_date,
7030                &period_label,
7031                framework_str,
7032                None, // prior_opening: no carry-forward data in single-period runs
7033            );
7034
7035            snapshot.provision_count = prov_snap.provisions.len();
7036            snapshot.provisions = prov_snap.provisions;
7037            snapshot.provision_movements = prov_snap.movements;
7038            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7039            snapshot.provision_journal_entries = prov_snap.journal_entries;
7040        }
7041
7042        // IAS 21 Functional Currency Translation
7043        // For each company whose functional currency differs from the presentation
7044        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7045        {
7046            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7047
7048            let presentation_currency = self
7049                .config
7050                .global
7051                .presentation_currency
7052                .clone()
7053                .unwrap_or_else(|| self.config.global.group_currency.clone());
7054
7055            // Build a minimal rate table populated with approximate rates from
7056            // the FX model base rates (USD-based) so we can do the translation.
7057            let mut rate_table = FxRateTable::new(&presentation_currency);
7058
7059            // Populate with base rates against USD; if presentation_currency is
7060            // not USD we do a best-effort two-step conversion using the table's
7061            // triangulation support.
7062            let base_rates = base_rates_usd();
7063            for (ccy, rate) in &base_rates {
7064                rate_table.add_rate(FxRate::new(
7065                    ccy,
7066                    "USD",
7067                    RateType::Closing,
7068                    end_date,
7069                    *rate,
7070                    "SYNTHETIC",
7071                ));
7072                // Average rate = 98% of closing (approximation).
7073                // 0.98 = 98/100 = Decimal::new(98, 2)
7074                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7075                rate_table.add_rate(FxRate::new(
7076                    ccy,
7077                    "USD",
7078                    RateType::Average,
7079                    end_date,
7080                    avg,
7081                    "SYNTHETIC",
7082                ));
7083            }
7084
7085            let mut translation_results = Vec::new();
7086            for company in &self.config.companies {
7087                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7088                // to ensure the translation produces non-trivial CTA amounts.
7089                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7090                    .max(rust_decimal::Decimal::from(100_000_u32));
7091
7092                let func_ccy = company
7093                    .functional_currency
7094                    .clone()
7095                    .unwrap_or_else(|| company.currency.clone());
7096
7097                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7098                    &company.code,
7099                    &func_ccy,
7100                    &presentation_currency,
7101                    &ias21_period_label,
7102                    end_date,
7103                    company_revenue,
7104                    &rate_table,
7105                );
7106                translation_results.push(result);
7107            }
7108
7109            snapshot.currency_translation_count = translation_results.len();
7110            snapshot.currency_translation_results = translation_results;
7111        }
7112
7113        stats.revenue_contract_count = snapshot.revenue_contract_count;
7114        stats.impairment_test_count = snapshot.impairment_test_count;
7115        stats.business_combination_count = snapshot.business_combination_count;
7116        stats.ecl_model_count = snapshot.ecl_model_count;
7117        stats.provision_count = snapshot.provision_count;
7118
7119        info!(
7120            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
7121            snapshot.revenue_contract_count,
7122            snapshot.impairment_test_count,
7123            snapshot.business_combination_count,
7124            snapshot.ecl_model_count,
7125            snapshot.provision_count,
7126            snapshot.currency_translation_count
7127        );
7128        self.check_resources_with_log("post-accounting-standards")?;
7129
7130        Ok(snapshot)
7131    }
7132
7133    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
7134    fn phase_manufacturing(
7135        &mut self,
7136        stats: &mut EnhancedGenerationStatistics,
7137    ) -> SynthResult<ManufacturingSnapshot> {
7138        if !self.phase_config.generate_manufacturing {
7139            debug!("Phase 18: Skipped (manufacturing generation disabled)");
7140            return Ok(ManufacturingSnapshot::default());
7141        }
7142        info!("Phase 18: Generating Manufacturing Data");
7143
7144        let seed = self.seed;
7145        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7146            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7147        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7148        let company_code = self
7149            .config
7150            .companies
7151            .first()
7152            .map(|c| c.code.as_str())
7153            .unwrap_or("1000");
7154
7155        let material_data: Vec<(String, String)> = self
7156            .master_data
7157            .materials
7158            .iter()
7159            .map(|m| (m.material_id.clone(), m.description.clone()))
7160            .collect();
7161
7162        if material_data.is_empty() {
7163            debug!("Phase 18: Skipped (no materials available)");
7164            return Ok(ManufacturingSnapshot::default());
7165        }
7166
7167        let mut snapshot = ManufacturingSnapshot::default();
7168
7169        // Generate production orders
7170        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7171        let production_orders = prod_gen.generate(
7172            company_code,
7173            &material_data,
7174            start_date,
7175            end_date,
7176            &self.config.manufacturing.production_orders,
7177            &self.config.manufacturing.costing,
7178            &self.config.manufacturing.routing,
7179        );
7180        snapshot.production_order_count = production_orders.len();
7181
7182        // Generate quality inspections from production orders
7183        let inspection_data: Vec<(String, String, String)> = production_orders
7184            .iter()
7185            .map(|po| {
7186                (
7187                    po.order_id.clone(),
7188                    po.material_id.clone(),
7189                    po.material_description.clone(),
7190                )
7191            })
7192            .collect();
7193
7194        snapshot.production_orders = production_orders;
7195
7196        if !inspection_data.is_empty() {
7197            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7198            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7199            snapshot.quality_inspection_count = inspections.len();
7200            snapshot.quality_inspections = inspections;
7201        }
7202
7203        // Generate cycle counts (one per month)
7204        let storage_locations: Vec<(String, String)> = material_data
7205            .iter()
7206            .enumerate()
7207            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7208            .collect();
7209
7210        let employee_ids: Vec<String> = self
7211            .master_data
7212            .employees
7213            .iter()
7214            .map(|e| e.employee_id.clone())
7215            .collect();
7216        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7217            .with_employee_pool(employee_ids);
7218        let mut cycle_count_total = 0usize;
7219        for month in 0..self.config.global.period_months {
7220            let count_date = start_date + chrono::Months::new(month);
7221            let items_per_count = storage_locations.len().clamp(10, 50);
7222            let cc = cc_gen.generate(
7223                company_code,
7224                &storage_locations,
7225                count_date,
7226                items_per_count,
7227            );
7228            snapshot.cycle_counts.push(cc);
7229            cycle_count_total += 1;
7230        }
7231        snapshot.cycle_count_count = cycle_count_total;
7232
7233        // Generate BOM components
7234        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7235        let bom_components = bom_gen.generate(company_code, &material_data);
7236        snapshot.bom_component_count = bom_components.len();
7237        snapshot.bom_components = bom_components;
7238
7239        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7240        let currency = self
7241            .config
7242            .companies
7243            .first()
7244            .map(|c| c.currency.as_str())
7245            .unwrap_or("USD");
7246        let production_order_ids: Vec<String> = snapshot
7247            .production_orders
7248            .iter()
7249            .map(|po| po.order_id.clone())
7250            .collect();
7251        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7252        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7253            company_code,
7254            &material_data,
7255            start_date,
7256            end_date,
7257            2,
7258            currency,
7259            &production_order_ids,
7260        );
7261        snapshot.inventory_movement_count = inventory_movements.len();
7262        snapshot.inventory_movements = inventory_movements;
7263
7264        stats.production_order_count = snapshot.production_order_count;
7265        stats.quality_inspection_count = snapshot.quality_inspection_count;
7266        stats.cycle_count_count = snapshot.cycle_count_count;
7267        stats.bom_component_count = snapshot.bom_component_count;
7268        stats.inventory_movement_count = snapshot.inventory_movement_count;
7269
7270        info!(
7271            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7272            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7273            snapshot.bom_component_count, snapshot.inventory_movement_count
7274        );
7275        self.check_resources_with_log("post-manufacturing")?;
7276
7277        Ok(snapshot)
7278    }
7279
7280    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7281    fn phase_sales_kpi_budgets(
7282        &mut self,
7283        coa: &Arc<ChartOfAccounts>,
7284        financial_reporting: &FinancialReportingSnapshot,
7285        stats: &mut EnhancedGenerationStatistics,
7286    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7287        if !self.phase_config.generate_sales_kpi_budgets {
7288            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7289            return Ok(SalesKpiBudgetsSnapshot::default());
7290        }
7291        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7292
7293        let seed = self.seed;
7294        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7295            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7296        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7297        let company_code = self
7298            .config
7299            .companies
7300            .first()
7301            .map(|c| c.code.as_str())
7302            .unwrap_or("1000");
7303
7304        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7305
7306        // Sales Quotes
7307        if self.config.sales_quotes.enabled {
7308            let customer_data: Vec<(String, String)> = self
7309                .master_data
7310                .customers
7311                .iter()
7312                .map(|c| (c.customer_id.clone(), c.name.clone()))
7313                .collect();
7314            let material_data: Vec<(String, String)> = self
7315                .master_data
7316                .materials
7317                .iter()
7318                .map(|m| (m.material_id.clone(), m.description.clone()))
7319                .collect();
7320
7321            if !customer_data.is_empty() && !material_data.is_empty() {
7322                let employee_ids: Vec<String> = self
7323                    .master_data
7324                    .employees
7325                    .iter()
7326                    .map(|e| e.employee_id.clone())
7327                    .collect();
7328                let customer_ids: Vec<String> = self
7329                    .master_data
7330                    .customers
7331                    .iter()
7332                    .map(|c| c.customer_id.clone())
7333                    .collect();
7334                let company_currency = self
7335                    .config
7336                    .companies
7337                    .first()
7338                    .map(|c| c.currency.as_str())
7339                    .unwrap_or("USD");
7340
7341                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7342                    .with_pools(employee_ids, customer_ids);
7343                let quotes = quote_gen.generate_with_currency(
7344                    company_code,
7345                    &customer_data,
7346                    &material_data,
7347                    start_date,
7348                    end_date,
7349                    &self.config.sales_quotes,
7350                    company_currency,
7351                );
7352                snapshot.sales_quote_count = quotes.len();
7353                snapshot.sales_quotes = quotes;
7354            }
7355        }
7356
7357        // Management KPIs
7358        if self.config.financial_reporting.management_kpis.enabled {
7359            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7360            let mut kpis = kpi_gen.generate(
7361                company_code,
7362                start_date,
7363                end_date,
7364                &self.config.financial_reporting.management_kpis,
7365            );
7366
7367            // Override financial KPIs with actual data from financial statements
7368            {
7369                use rust_decimal::Decimal;
7370
7371                if let Some(income_stmt) =
7372                    financial_reporting.financial_statements.iter().find(|fs| {
7373                        fs.statement_type == StatementType::IncomeStatement
7374                            && fs.company_code == company_code
7375                    })
7376                {
7377                    // Extract revenue and COGS from income statement line items
7378                    let total_revenue: Decimal = income_stmt
7379                        .line_items
7380                        .iter()
7381                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7382                        .map(|li| li.amount)
7383                        .sum();
7384                    let total_cogs: Decimal = income_stmt
7385                        .line_items
7386                        .iter()
7387                        .filter(|li| {
7388                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7389                                && !li.is_total
7390                        })
7391                        .map(|li| li.amount.abs())
7392                        .sum();
7393                    let total_opex: Decimal = income_stmt
7394                        .line_items
7395                        .iter()
7396                        .filter(|li| {
7397                            li.section.contains("Expense")
7398                                && !li.is_total
7399                                && !li.section.contains("Cost")
7400                        })
7401                        .map(|li| li.amount.abs())
7402                        .sum();
7403
7404                    if total_revenue > Decimal::ZERO {
7405                        let hundred = Decimal::from(100);
7406                        let gross_margin_pct =
7407                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7408                        let operating_income = total_revenue - total_cogs - total_opex;
7409                        let op_margin_pct =
7410                            (operating_income * hundred / total_revenue).round_dp(2);
7411
7412                        // Override gross margin and operating margin KPIs
7413                        for kpi in &mut kpis {
7414                            if kpi.name == "Gross Margin" {
7415                                kpi.value = gross_margin_pct;
7416                            } else if kpi.name == "Operating Margin" {
7417                                kpi.value = op_margin_pct;
7418                            }
7419                        }
7420                    }
7421                }
7422
7423                // Override Current Ratio from balance sheet
7424                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7425                    fs.statement_type == StatementType::BalanceSheet
7426                        && fs.company_code == company_code
7427                }) {
7428                    let current_assets: Decimal = bs
7429                        .line_items
7430                        .iter()
7431                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7432                        .map(|li| li.amount)
7433                        .sum();
7434                    let current_liabilities: Decimal = bs
7435                        .line_items
7436                        .iter()
7437                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7438                        .map(|li| li.amount.abs())
7439                        .sum();
7440
7441                    if current_liabilities > Decimal::ZERO {
7442                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7443                        for kpi in &mut kpis {
7444                            if kpi.name == "Current Ratio" {
7445                                kpi.value = current_ratio;
7446                            }
7447                        }
7448                    }
7449                }
7450            }
7451
7452            snapshot.kpi_count = kpis.len();
7453            snapshot.kpis = kpis;
7454        }
7455
7456        // Budgets
7457        if self.config.financial_reporting.budgets.enabled {
7458            let account_data: Vec<(String, String)> = coa
7459                .accounts
7460                .iter()
7461                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7462                .collect();
7463
7464            if !account_data.is_empty() {
7465                let fiscal_year = start_date.year() as u32;
7466                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7467                let budget = budget_gen.generate(
7468                    company_code,
7469                    fiscal_year,
7470                    &account_data,
7471                    &self.config.financial_reporting.budgets,
7472                );
7473                snapshot.budget_line_count = budget.line_items.len();
7474                snapshot.budgets.push(budget);
7475            }
7476        }
7477
7478        stats.sales_quote_count = snapshot.sales_quote_count;
7479        stats.kpi_count = snapshot.kpi_count;
7480        stats.budget_line_count = snapshot.budget_line_count;
7481
7482        info!(
7483            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7484            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7485        );
7486        self.check_resources_with_log("post-sales-kpi-budgets")?;
7487
7488        Ok(snapshot)
7489    }
7490
7491    /// Compute pre-tax income for a single company from actual journal entries.
7492    ///
7493    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7494    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7495    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7496    /// and the period-close engine so that all three use a consistent definition.
7497    fn compute_pre_tax_income(
7498        company_code: &str,
7499        journal_entries: &[JournalEntry],
7500    ) -> rust_decimal::Decimal {
7501        use datasynth_core::accounts::AccountCategory;
7502        use rust_decimal::Decimal;
7503
7504        let mut total_revenue = Decimal::ZERO;
7505        let mut total_expenses = Decimal::ZERO;
7506
7507        for je in journal_entries {
7508            if je.header.company_code != company_code {
7509                continue;
7510            }
7511            for line in &je.lines {
7512                let cat = AccountCategory::from_account(&line.gl_account);
7513                match cat {
7514                    AccountCategory::Revenue => {
7515                        total_revenue += line.credit_amount - line.debit_amount;
7516                    }
7517                    AccountCategory::Cogs
7518                    | AccountCategory::OperatingExpense
7519                    | AccountCategory::OtherIncomeExpense => {
7520                        total_expenses += line.debit_amount - line.credit_amount;
7521                    }
7522                    _ => {}
7523                }
7524            }
7525        }
7526
7527        let pti = (total_revenue - total_expenses).round_dp(2);
7528        if pti == rust_decimal::Decimal::ZERO {
7529            // No income statement activity yet — fall back to a synthetic value so the
7530            // tax provision generator can still produce meaningful output.
7531            rust_decimal::Decimal::from(1_000_000u32)
7532        } else {
7533            pti
7534        }
7535    }
7536
7537    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7538    fn phase_tax_generation(
7539        &mut self,
7540        document_flows: &DocumentFlowSnapshot,
7541        journal_entries: &[JournalEntry],
7542        stats: &mut EnhancedGenerationStatistics,
7543    ) -> SynthResult<TaxSnapshot> {
7544        if !self.phase_config.generate_tax {
7545            debug!("Phase 20: Skipped (tax generation disabled)");
7546            return Ok(TaxSnapshot::default());
7547        }
7548        info!("Phase 20: Generating Tax Data");
7549
7550        let seed = self.seed;
7551        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7552            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7553        let fiscal_year = start_date.year();
7554        let company_code = self
7555            .config
7556            .companies
7557            .first()
7558            .map(|c| c.code.as_str())
7559            .unwrap_or("1000");
7560
7561        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7562            seed + 370,
7563            self.config.tax.clone(),
7564        );
7565
7566        let pack = self.primary_pack().clone();
7567        let (jurisdictions, codes) =
7568            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7569
7570        // Generate tax provisions for each company
7571        let mut provisions = Vec::new();
7572        if self.config.tax.provisions.enabled {
7573            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7574            for company in &self.config.companies {
7575                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7576                let statutory_rate = rust_decimal::Decimal::new(
7577                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7578                    2,
7579                );
7580                let provision = provision_gen.generate(
7581                    &company.code,
7582                    start_date,
7583                    pre_tax_income,
7584                    statutory_rate,
7585                );
7586                provisions.push(provision);
7587            }
7588        }
7589
7590        // Generate tax lines from document invoices
7591        let mut tax_lines = Vec::new();
7592        if !codes.is_empty() {
7593            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7594                datasynth_generators::TaxLineGeneratorConfig::default(),
7595                codes.clone(),
7596                seed + 372,
7597            );
7598
7599            // Tax lines from vendor invoices (input tax)
7600            // Use the first company's country as buyer country
7601            let buyer_country = self
7602                .config
7603                .companies
7604                .first()
7605                .map(|c| c.country.as_str())
7606                .unwrap_or("US");
7607            for vi in &document_flows.vendor_invoices {
7608                let lines = tax_line_gen.generate_for_document(
7609                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7610                    &vi.header.document_id,
7611                    buyer_country, // seller approx same country
7612                    buyer_country,
7613                    vi.payable_amount,
7614                    vi.header.document_date,
7615                    None,
7616                );
7617                tax_lines.extend(lines);
7618            }
7619
7620            // Tax lines from customer invoices (output tax)
7621            for ci in &document_flows.customer_invoices {
7622                let lines = tax_line_gen.generate_for_document(
7623                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7624                    &ci.header.document_id,
7625                    buyer_country, // seller is the company
7626                    buyer_country,
7627                    ci.total_gross_amount,
7628                    ci.header.document_date,
7629                    None,
7630                );
7631                tax_lines.extend(lines);
7632            }
7633        }
7634
7635        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7636        let deferred_tax = {
7637            let companies: Vec<(&str, &str)> = self
7638                .config
7639                .companies
7640                .iter()
7641                .map(|c| (c.code.as_str(), c.country.as_str()))
7642                .collect();
7643            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7644            deferred_gen.generate(&companies, start_date, journal_entries)
7645        };
7646
7647        // Build a document_id → posting_date map so each tax JE uses its
7648        // source document's date rather than a blanket period-end date.
7649        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7650            std::collections::HashMap::new();
7651        for vi in &document_flows.vendor_invoices {
7652            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7653        }
7654        for ci in &document_flows.customer_invoices {
7655            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7656        }
7657
7658        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7659        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7660        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7661            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7662                &tax_lines,
7663                company_code,
7664                &doc_dates,
7665                end_date,
7666            );
7667            debug!("Generated {} tax posting JEs", jes.len());
7668            jes
7669        } else {
7670            Vec::new()
7671        };
7672
7673        let snapshot = TaxSnapshot {
7674            jurisdiction_count: jurisdictions.len(),
7675            code_count: codes.len(),
7676            jurisdictions,
7677            codes,
7678            tax_provisions: provisions,
7679            tax_lines,
7680            tax_returns: Vec::new(),
7681            withholding_records: Vec::new(),
7682            tax_anomaly_labels: Vec::new(),
7683            deferred_tax,
7684            tax_posting_journal_entries,
7685        };
7686
7687        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7688        stats.tax_code_count = snapshot.code_count;
7689        stats.tax_provision_count = snapshot.tax_provisions.len();
7690        stats.tax_line_count = snapshot.tax_lines.len();
7691
7692        info!(
7693            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7694            snapshot.jurisdiction_count,
7695            snapshot.code_count,
7696            snapshot.tax_provisions.len(),
7697            snapshot.deferred_tax.temporary_differences.len(),
7698            snapshot.deferred_tax.journal_entries.len(),
7699            snapshot.tax_posting_journal_entries.len(),
7700        );
7701        self.check_resources_with_log("post-tax")?;
7702
7703        Ok(snapshot)
7704    }
7705
7706    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7707    fn phase_esg_generation(
7708        &mut self,
7709        document_flows: &DocumentFlowSnapshot,
7710        manufacturing: &ManufacturingSnapshot,
7711        stats: &mut EnhancedGenerationStatistics,
7712    ) -> SynthResult<EsgSnapshot> {
7713        if !self.phase_config.generate_esg {
7714            debug!("Phase 21: Skipped (ESG generation disabled)");
7715            return Ok(EsgSnapshot::default());
7716        }
7717        let degradation = self.check_resources()?;
7718        if degradation >= DegradationLevel::Reduced {
7719            debug!(
7720                "Phase skipped due to resource pressure (degradation: {:?})",
7721                degradation
7722            );
7723            return Ok(EsgSnapshot::default());
7724        }
7725        info!("Phase 21: Generating ESG Data");
7726
7727        let seed = self.seed;
7728        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7729            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7730        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7731        let entity_id = self
7732            .config
7733            .companies
7734            .first()
7735            .map(|c| c.code.as_str())
7736            .unwrap_or("1000");
7737
7738        let esg_cfg = &self.config.esg;
7739        let mut snapshot = EsgSnapshot::default();
7740
7741        // Energy consumption (feeds into scope 1 & 2 emissions)
7742        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7743            esg_cfg.environmental.energy.clone(),
7744            seed + 80,
7745        );
7746        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7747
7748        // Water usage
7749        let facility_count = esg_cfg.environmental.energy.facility_count;
7750        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7751        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7752
7753        // Waste
7754        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7755            seed + 82,
7756            esg_cfg.environmental.waste.diversion_target,
7757            facility_count,
7758        );
7759        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7760
7761        // Emissions (scope 1, 2, 3)
7762        let mut emission_gen =
7763            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7764
7765        // Build EnergyInput from energy_records
7766        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7767            .iter()
7768            .map(|e| datasynth_generators::EnergyInput {
7769                facility_id: e.facility_id.clone(),
7770                energy_type: match e.energy_source {
7771                    EnergySourceType::NaturalGas => {
7772                        datasynth_generators::EnergyInputType::NaturalGas
7773                    }
7774                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7775                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7776                    _ => datasynth_generators::EnergyInputType::Electricity,
7777                },
7778                consumption_kwh: e.consumption_kwh,
7779                period: e.period,
7780            })
7781            .collect();
7782
7783        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7784        if !manufacturing.production_orders.is_empty() {
7785            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7786                &manufacturing.production_orders,
7787                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7788                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7789            );
7790            if !mfg_energy.is_empty() {
7791                info!(
7792                    "ESG: {} energy inputs derived from {} production orders",
7793                    mfg_energy.len(),
7794                    manufacturing.production_orders.len(),
7795                );
7796                energy_inputs.extend(mfg_energy);
7797            }
7798        }
7799
7800        let mut emissions = Vec::new();
7801        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7802        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7803
7804        // Scope 3: use vendor spend data from actual payments
7805        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7806            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7807            for payment in &document_flows.payments {
7808                if payment.is_vendor {
7809                    *totals
7810                        .entry(payment.business_partner_id.clone())
7811                        .or_default() += payment.amount;
7812                }
7813            }
7814            totals
7815        };
7816        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7817            .master_data
7818            .vendors
7819            .iter()
7820            .map(|v| {
7821                let spend = vendor_payment_totals
7822                    .get(&v.vendor_id)
7823                    .copied()
7824                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7825                datasynth_generators::VendorSpendInput {
7826                    vendor_id: v.vendor_id.clone(),
7827                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7828                    spend,
7829                    country: v.country.clone(),
7830                }
7831            })
7832            .collect();
7833        if !vendor_spend.is_empty() {
7834            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7835                entity_id,
7836                &vendor_spend,
7837                start_date,
7838                end_date,
7839            ));
7840        }
7841
7842        // Business travel & commuting (scope 3)
7843        let headcount = self.master_data.employees.len() as u32;
7844        if headcount > 0 {
7845            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7846            emissions.extend(emission_gen.generate_scope3_business_travel(
7847                entity_id,
7848                travel_spend,
7849                start_date,
7850            ));
7851            emissions
7852                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7853        }
7854
7855        snapshot.emission_count = emissions.len();
7856        snapshot.emissions = emissions;
7857        snapshot.energy = energy_records;
7858
7859        // Social: Workforce diversity, pay equity, safety
7860        let mut workforce_gen =
7861            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7862        let total_headcount = headcount.max(100);
7863        snapshot.diversity =
7864            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7865        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7866
7867        // v2.4: Derive additional workforce diversity metrics from actual employee data
7868        if !self.master_data.employees.is_empty() {
7869            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7870                entity_id,
7871                &self.master_data.employees,
7872                end_date,
7873            );
7874            if !hr_diversity.is_empty() {
7875                info!(
7876                    "ESG: {} diversity metrics derived from {} actual employees",
7877                    hr_diversity.len(),
7878                    self.master_data.employees.len(),
7879                );
7880                snapshot.diversity.extend(hr_diversity);
7881            }
7882        }
7883
7884        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7885            entity_id,
7886            facility_count,
7887            start_date,
7888            end_date,
7889        );
7890
7891        // Compute safety metrics
7892        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7893        let safety_metric = workforce_gen.compute_safety_metrics(
7894            entity_id,
7895            &snapshot.safety_incidents,
7896            total_hours,
7897            start_date,
7898        );
7899        snapshot.safety_metrics = vec![safety_metric];
7900
7901        // Governance
7902        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7903            seed + 85,
7904            esg_cfg.governance.board_size,
7905            esg_cfg.governance.independence_target,
7906        );
7907        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7908
7909        // Supplier ESG assessments
7910        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7911            esg_cfg.supply_chain_esg.clone(),
7912            seed + 86,
7913        );
7914        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7915            .master_data
7916            .vendors
7917            .iter()
7918            .map(|v| datasynth_generators::VendorInput {
7919                vendor_id: v.vendor_id.clone(),
7920                country: v.country.clone(),
7921                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7922                quality_score: None,
7923            })
7924            .collect();
7925        snapshot.supplier_assessments =
7926            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7927
7928        // Disclosures
7929        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7930            seed + 87,
7931            esg_cfg.reporting.clone(),
7932            esg_cfg.climate_scenarios.clone(),
7933        );
7934        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7935        snapshot.disclosures = disclosure_gen.generate_disclosures(
7936            entity_id,
7937            &snapshot.materiality,
7938            start_date,
7939            end_date,
7940        );
7941        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7942        snapshot.disclosure_count = snapshot.disclosures.len();
7943
7944        // Anomaly injection
7945        if esg_cfg.anomaly_rate > 0.0 {
7946            let mut anomaly_injector =
7947                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7948            let mut labels = Vec::new();
7949            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7950            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7951            labels.extend(
7952                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7953            );
7954            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7955            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7956            snapshot.anomaly_labels = labels;
7957        }
7958
7959        stats.esg_emission_count = snapshot.emission_count;
7960        stats.esg_disclosure_count = snapshot.disclosure_count;
7961
7962        info!(
7963            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7964            snapshot.emission_count,
7965            snapshot.disclosure_count,
7966            snapshot.supplier_assessments.len()
7967        );
7968        self.check_resources_with_log("post-esg")?;
7969
7970        Ok(snapshot)
7971    }
7972
7973    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7974    fn phase_treasury_data(
7975        &mut self,
7976        document_flows: &DocumentFlowSnapshot,
7977        subledger: &SubledgerSnapshot,
7978        intercompany: &IntercompanySnapshot,
7979        stats: &mut EnhancedGenerationStatistics,
7980    ) -> SynthResult<TreasurySnapshot> {
7981        if !self.phase_config.generate_treasury {
7982            debug!("Phase 22: Skipped (treasury generation disabled)");
7983            return Ok(TreasurySnapshot::default());
7984        }
7985        let degradation = self.check_resources()?;
7986        if degradation >= DegradationLevel::Reduced {
7987            debug!(
7988                "Phase skipped due to resource pressure (degradation: {:?})",
7989                degradation
7990            );
7991            return Ok(TreasurySnapshot::default());
7992        }
7993        info!("Phase 22: Generating Treasury Data");
7994
7995        let seed = self.seed;
7996        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7997            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7998        let currency = self
7999            .config
8000            .companies
8001            .first()
8002            .map(|c| c.currency.as_str())
8003            .unwrap_or("USD");
8004        let entity_id = self
8005            .config
8006            .companies
8007            .first()
8008            .map(|c| c.code.as_str())
8009            .unwrap_or("1000");
8010
8011        let mut snapshot = TreasurySnapshot::default();
8012
8013        // Generate debt instruments
8014        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8015            self.config.treasury.debt.clone(),
8016            seed + 90,
8017        );
8018        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8019
8020        // Generate hedging instruments (IR swaps for floating-rate debt)
8021        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8022            self.config.treasury.hedging.clone(),
8023            seed + 91,
8024        );
8025        for debt in &snapshot.debt_instruments {
8026            if debt.rate_type == InterestRateType::Variable {
8027                let swap = hedge_gen.generate_ir_swap(
8028                    currency,
8029                    debt.principal,
8030                    debt.origination_date,
8031                    debt.maturity_date,
8032                );
8033                snapshot.hedging_instruments.push(swap);
8034            }
8035        }
8036
8037        // Build FX exposures from foreign-currency payments and generate
8038        // FX forwards + hedge relationship designations via generate() API.
8039        {
8040            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8041            for payment in &document_flows.payments {
8042                if payment.currency != currency {
8043                    let entry = fx_map
8044                        .entry(payment.currency.clone())
8045                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8046                    entry.0 += payment.amount;
8047                    // Use the latest settlement date among grouped payments
8048                    if payment.header.document_date > entry.1 {
8049                        entry.1 = payment.header.document_date;
8050                    }
8051                }
8052            }
8053            if !fx_map.is_empty() {
8054                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8055                    .into_iter()
8056                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8057                        datasynth_generators::treasury::FxExposure {
8058                            currency_pair: format!("{foreign_ccy}/{currency}"),
8059                            foreign_currency: foreign_ccy,
8060                            net_amount,
8061                            settlement_date,
8062                            description: "AP payment FX exposure".to_string(),
8063                        }
8064                    })
8065                    .collect();
8066                let (fx_instruments, fx_relationships) =
8067                    hedge_gen.generate(start_date, &fx_exposures);
8068                snapshot.hedging_instruments.extend(fx_instruments);
8069                snapshot.hedge_relationships.extend(fx_relationships);
8070            }
8071        }
8072
8073        // Inject anomalies if configured
8074        if self.config.treasury.anomaly_rate > 0.0 {
8075            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8076                seed + 92,
8077                self.config.treasury.anomaly_rate,
8078            );
8079            let mut labels = Vec::new();
8080            labels.extend(
8081                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8082            );
8083            snapshot.treasury_anomaly_labels = labels;
8084        }
8085
8086        // Generate cash positions from payment flows
8087        if self.config.treasury.cash_positioning.enabled {
8088            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8089
8090            // AP payments as outflows
8091            for payment in &document_flows.payments {
8092                cash_flows.push(datasynth_generators::treasury::CashFlow {
8093                    date: payment.header.document_date,
8094                    account_id: format!("{entity_id}-MAIN"),
8095                    amount: payment.amount,
8096                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8097                });
8098            }
8099
8100            // Customer receipts (from O2C chains) as inflows
8101            for chain in &document_flows.o2c_chains {
8102                if let Some(ref receipt) = chain.customer_receipt {
8103                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8104                        date: receipt.header.document_date,
8105                        account_id: format!("{entity_id}-MAIN"),
8106                        amount: receipt.amount,
8107                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8108                    });
8109                }
8110                // Remainder receipts (follow-up to partial payments)
8111                for receipt in &chain.remainder_receipts {
8112                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8113                        date: receipt.header.document_date,
8114                        account_id: format!("{entity_id}-MAIN"),
8115                        amount: receipt.amount,
8116                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8117                    });
8118                }
8119            }
8120
8121            if !cash_flows.is_empty() {
8122                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8123                    self.config.treasury.cash_positioning.clone(),
8124                    seed + 93,
8125                );
8126                let account_id = format!("{entity_id}-MAIN");
8127                snapshot.cash_positions = cash_gen.generate(
8128                    entity_id,
8129                    &account_id,
8130                    currency,
8131                    &cash_flows,
8132                    start_date,
8133                    start_date + chrono::Months::new(self.config.global.period_months),
8134                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
8135                );
8136            }
8137        }
8138
8139        // Generate cash forecasts from AR/AP aging
8140        if self.config.treasury.cash_forecasting.enabled {
8141            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8142
8143            // Build AR aging items from subledger AR invoices
8144            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8145                .ar_invoices
8146                .iter()
8147                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8148                .map(|inv| {
8149                    let days_past_due = if inv.due_date < end_date {
8150                        (end_date - inv.due_date).num_days().max(0) as u32
8151                    } else {
8152                        0
8153                    };
8154                    datasynth_generators::treasury::ArAgingItem {
8155                        expected_date: inv.due_date,
8156                        amount: inv.amount_remaining,
8157                        days_past_due,
8158                        document_id: inv.invoice_number.clone(),
8159                    }
8160                })
8161                .collect();
8162
8163            // Build AP aging items from subledger AP invoices
8164            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8165                .ap_invoices
8166                .iter()
8167                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8168                .map(|inv| datasynth_generators::treasury::ApAgingItem {
8169                    payment_date: inv.due_date,
8170                    amount: inv.amount_remaining,
8171                    document_id: inv.invoice_number.clone(),
8172                })
8173                .collect();
8174
8175            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8176                self.config.treasury.cash_forecasting.clone(),
8177                seed + 94,
8178            );
8179            let forecast = forecast_gen.generate(
8180                entity_id,
8181                currency,
8182                end_date,
8183                &ar_items,
8184                &ap_items,
8185                &[], // scheduled disbursements - empty for now
8186            );
8187            snapshot.cash_forecasts.push(forecast);
8188        }
8189
8190        // Generate cash pools and sweeps
8191        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8192            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8193            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8194                self.config.treasury.cash_pooling.clone(),
8195                seed + 95,
8196            );
8197
8198            // Create a pool from available accounts
8199            let account_ids: Vec<String> = snapshot
8200                .cash_positions
8201                .iter()
8202                .map(|cp| cp.bank_account_id.clone())
8203                .collect::<std::collections::HashSet<_>>()
8204                .into_iter()
8205                .collect();
8206
8207            if let Some(pool) =
8208                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8209            {
8210                // Generate sweeps - build participant balances from last cash position per account
8211                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8212                for cp in &snapshot.cash_positions {
8213                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8214                }
8215
8216                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8217                    latest_balances
8218                        .into_iter()
8219                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8220                        .map(
8221                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8222                                account_id: id,
8223                                balance,
8224                            },
8225                        )
8226                        .collect();
8227
8228                let sweeps =
8229                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8230                snapshot.cash_pool_sweeps = sweeps;
8231                snapshot.cash_pools.push(pool);
8232            }
8233        }
8234
8235        // Generate bank guarantees
8236        if self.config.treasury.bank_guarantees.enabled {
8237            let vendor_names: Vec<String> = self
8238                .master_data
8239                .vendors
8240                .iter()
8241                .map(|v| v.name.clone())
8242                .collect();
8243            if !vendor_names.is_empty() {
8244                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8245                    self.config.treasury.bank_guarantees.clone(),
8246                    seed + 96,
8247                );
8248                snapshot.bank_guarantees =
8249                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8250            }
8251        }
8252
8253        // Generate netting runs from intercompany matched pairs
8254        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8255            let entity_ids: Vec<String> = self
8256                .config
8257                .companies
8258                .iter()
8259                .map(|c| c.code.clone())
8260                .collect();
8261            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8262                .matched_pairs
8263                .iter()
8264                .map(|mp| {
8265                    (
8266                        mp.seller_company.clone(),
8267                        mp.buyer_company.clone(),
8268                        mp.amount,
8269                    )
8270                })
8271                .collect();
8272            if entity_ids.len() >= 2 {
8273                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8274                    self.config.treasury.netting.clone(),
8275                    seed + 97,
8276                );
8277                snapshot.netting_runs = netting_gen.generate(
8278                    &entity_ids,
8279                    currency,
8280                    start_date,
8281                    self.config.global.period_months,
8282                    &ic_amounts,
8283                );
8284            }
8285        }
8286
8287        // Generate treasury journal entries from the instruments we just created.
8288        {
8289            use datasynth_generators::treasury::TreasuryAccounting;
8290
8291            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8292            let mut treasury_jes = Vec::new();
8293
8294            // Debt interest accrual JEs
8295            if !snapshot.debt_instruments.is_empty() {
8296                let debt_jes =
8297                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8298                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8299                treasury_jes.extend(debt_jes);
8300            }
8301
8302            // Hedge mark-to-market JEs
8303            if !snapshot.hedging_instruments.is_empty() {
8304                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8305                    &snapshot.hedging_instruments,
8306                    &snapshot.hedge_relationships,
8307                    end_date,
8308                    entity_id,
8309                );
8310                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8311                treasury_jes.extend(hedge_jes);
8312            }
8313
8314            // Cash pool sweep JEs
8315            if !snapshot.cash_pool_sweeps.is_empty() {
8316                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8317                    &snapshot.cash_pool_sweeps,
8318                    entity_id,
8319                );
8320                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8321                treasury_jes.extend(sweep_jes);
8322            }
8323
8324            if !treasury_jes.is_empty() {
8325                debug!("Total treasury journal entries: {}", treasury_jes.len());
8326            }
8327            snapshot.journal_entries = treasury_jes;
8328        }
8329
8330        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8331        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8332        stats.cash_position_count = snapshot.cash_positions.len();
8333        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8334        stats.cash_pool_count = snapshot.cash_pools.len();
8335
8336        info!(
8337            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8338            snapshot.debt_instruments.len(),
8339            snapshot.hedging_instruments.len(),
8340            snapshot.cash_positions.len(),
8341            snapshot.cash_forecasts.len(),
8342            snapshot.cash_pools.len(),
8343            snapshot.bank_guarantees.len(),
8344            snapshot.netting_runs.len(),
8345            snapshot.journal_entries.len(),
8346        );
8347        self.check_resources_with_log("post-treasury")?;
8348
8349        Ok(snapshot)
8350    }
8351
8352    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8353    fn phase_project_accounting(
8354        &mut self,
8355        document_flows: &DocumentFlowSnapshot,
8356        hr: &HrSnapshot,
8357        stats: &mut EnhancedGenerationStatistics,
8358    ) -> SynthResult<ProjectAccountingSnapshot> {
8359        if !self.phase_config.generate_project_accounting {
8360            debug!("Phase 23: Skipped (project accounting disabled)");
8361            return Ok(ProjectAccountingSnapshot::default());
8362        }
8363        let degradation = self.check_resources()?;
8364        if degradation >= DegradationLevel::Reduced {
8365            debug!(
8366                "Phase skipped due to resource pressure (degradation: {:?})",
8367                degradation
8368            );
8369            return Ok(ProjectAccountingSnapshot::default());
8370        }
8371        info!("Phase 23: Generating Project Accounting Data");
8372
8373        let seed = self.seed;
8374        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8375            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8376        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8377        let company_code = self
8378            .config
8379            .companies
8380            .first()
8381            .map(|c| c.code.as_str())
8382            .unwrap_or("1000");
8383
8384        let mut snapshot = ProjectAccountingSnapshot::default();
8385
8386        // Generate projects with WBS hierarchies
8387        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8388            self.config.project_accounting.clone(),
8389            seed + 95,
8390        );
8391        let pool = project_gen.generate(company_code, start_date, end_date);
8392        snapshot.projects = pool.projects.clone();
8393
8394        // Link source documents to projects for cost allocation
8395        {
8396            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8397                Vec::new();
8398
8399            // Time entries
8400            for te in &hr.time_entries {
8401                let total_hours = te.hours_regular + te.hours_overtime;
8402                if total_hours > 0.0 {
8403                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8404                        id: te.entry_id.clone(),
8405                        entity_id: company_code.to_string(),
8406                        date: te.date,
8407                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8408                            .unwrap_or(rust_decimal::Decimal::ZERO),
8409                        source_type: CostSourceType::TimeEntry,
8410                        hours: Some(
8411                            rust_decimal::Decimal::from_f64_retain(total_hours)
8412                                .unwrap_or(rust_decimal::Decimal::ZERO),
8413                        ),
8414                    });
8415                }
8416            }
8417
8418            // Expense reports
8419            for er in &hr.expense_reports {
8420                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8421                    id: er.report_id.clone(),
8422                    entity_id: company_code.to_string(),
8423                    date: er.submission_date,
8424                    amount: er.total_amount,
8425                    source_type: CostSourceType::ExpenseReport,
8426                    hours: None,
8427                });
8428            }
8429
8430            // Purchase orders
8431            for po in &document_flows.purchase_orders {
8432                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8433                    id: po.header.document_id.clone(),
8434                    entity_id: company_code.to_string(),
8435                    date: po.header.document_date,
8436                    amount: po.total_net_amount,
8437                    source_type: CostSourceType::PurchaseOrder,
8438                    hours: None,
8439                });
8440            }
8441
8442            // Vendor invoices
8443            for vi in &document_flows.vendor_invoices {
8444                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8445                    id: vi.header.document_id.clone(),
8446                    entity_id: company_code.to_string(),
8447                    date: vi.header.document_date,
8448                    amount: vi.payable_amount,
8449                    source_type: CostSourceType::VendorInvoice,
8450                    hours: None,
8451                });
8452            }
8453
8454            if !source_docs.is_empty() && !pool.projects.is_empty() {
8455                let mut cost_gen =
8456                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8457                        self.config.project_accounting.cost_allocation.clone(),
8458                        seed + 99,
8459                    );
8460                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8461            }
8462        }
8463
8464        // Generate change orders
8465        if self.config.project_accounting.change_orders.enabled {
8466            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8467                self.config.project_accounting.change_orders.clone(),
8468                seed + 96,
8469            );
8470            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8471        }
8472
8473        // Generate milestones
8474        if self.config.project_accounting.milestones.enabled {
8475            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8476                self.config.project_accounting.milestones.clone(),
8477                seed + 97,
8478            );
8479            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8480        }
8481
8482        // Generate earned value metrics (needs cost lines, so only if we have projects)
8483        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8484            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8485                self.config.project_accounting.earned_value.clone(),
8486                seed + 98,
8487            );
8488            snapshot.earned_value_metrics =
8489                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8490        }
8491
8492        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8493        if self.config.project_accounting.revenue_recognition.enabled
8494            && !snapshot.projects.is_empty()
8495            && !snapshot.cost_lines.is_empty()
8496        {
8497            use datasynth_generators::project_accounting::RevenueGenerator;
8498            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8499            let avg_contract_value =
8500                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8501                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8502
8503            // Build contract value tuples: only customer-type projects get revenue recognition.
8504            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8505            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8506                snapshot
8507                    .projects
8508                    .iter()
8509                    .filter(|p| {
8510                        matches!(
8511                            p.project_type,
8512                            datasynth_core::models::ProjectType::Customer
8513                        )
8514                    })
8515                    .map(|p| {
8516                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8517                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8518                        // budget × 1.25 → contract value
8519                        } else {
8520                            avg_contract_value
8521                        };
8522                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8523                        (p.project_id.clone(), cv, etc)
8524                    })
8525                    .collect();
8526
8527            if !contract_values.is_empty() {
8528                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8529                snapshot.revenue_records = rev_gen.generate(
8530                    &snapshot.projects,
8531                    &snapshot.cost_lines,
8532                    &contract_values,
8533                    start_date,
8534                    end_date,
8535                );
8536                debug!(
8537                    "Generated {} revenue recognition records for {} customer projects",
8538                    snapshot.revenue_records.len(),
8539                    contract_values.len()
8540                );
8541            }
8542        }
8543
8544        stats.project_count = snapshot.projects.len();
8545        stats.project_change_order_count = snapshot.change_orders.len();
8546        stats.project_cost_line_count = snapshot.cost_lines.len();
8547
8548        info!(
8549            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8550            snapshot.projects.len(),
8551            snapshot.change_orders.len(),
8552            snapshot.milestones.len(),
8553            snapshot.earned_value_metrics.len()
8554        );
8555        self.check_resources_with_log("post-project-accounting")?;
8556
8557        Ok(snapshot)
8558    }
8559
8560    /// Phase 24: Generate process evolution and organizational events.
8561    fn phase_evolution_events(
8562        &mut self,
8563        stats: &mut EnhancedGenerationStatistics,
8564    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8565        if !self.phase_config.generate_evolution_events {
8566            debug!("Phase 24: Skipped (evolution events disabled)");
8567            return Ok((Vec::new(), Vec::new()));
8568        }
8569        info!("Phase 24: Generating Process Evolution + Organizational Events");
8570
8571        let seed = self.seed;
8572        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8573            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8574        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8575
8576        // Process evolution events
8577        let mut proc_gen =
8578            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8579                seed + 100,
8580            );
8581        let process_events = proc_gen.generate_events(start_date, end_date);
8582
8583        // Organizational events
8584        let company_codes: Vec<String> = self
8585            .config
8586            .companies
8587            .iter()
8588            .map(|c| c.code.clone())
8589            .collect();
8590        let mut org_gen =
8591            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8592                seed + 101,
8593            );
8594        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8595
8596        stats.process_evolution_event_count = process_events.len();
8597        stats.organizational_event_count = org_events.len();
8598
8599        info!(
8600            "Evolution events generated: {} process evolution, {} organizational",
8601            process_events.len(),
8602            org_events.len()
8603        );
8604        self.check_resources_with_log("post-evolution-events")?;
8605
8606        Ok((process_events, org_events))
8607    }
8608
8609    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8610    /// data recovery, and regulatory changes).
8611    fn phase_disruption_events(
8612        &self,
8613        stats: &mut EnhancedGenerationStatistics,
8614    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8615        if !self.config.organizational_events.enabled {
8616            debug!("Phase 24b: Skipped (organizational events disabled)");
8617            return Ok(Vec::new());
8618        }
8619        info!("Phase 24b: Generating Disruption Events");
8620
8621        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8622            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8623        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8624
8625        let company_codes: Vec<String> = self
8626            .config
8627            .companies
8628            .iter()
8629            .map(|c| c.code.clone())
8630            .collect();
8631
8632        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8633        let events = gen.generate(start_date, end_date, &company_codes);
8634
8635        stats.disruption_event_count = events.len();
8636        info!("Disruption events generated: {} events", events.len());
8637        self.check_resources_with_log("post-disruption-events")?;
8638
8639        Ok(events)
8640    }
8641
8642    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8643    ///
8644    /// Produces paired examples where each pair contains the original clean JE
8645    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8646    /// split transaction). Useful for training anomaly detection models with
8647    /// known ground truth.
8648    fn phase_counterfactuals(
8649        &self,
8650        journal_entries: &[JournalEntry],
8651        stats: &mut EnhancedGenerationStatistics,
8652    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8653        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8654            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8655            return Ok(Vec::new());
8656        }
8657        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8658
8659        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8660
8661        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8662
8663        // Rotating set of specs to produce diverse mutation types
8664        let specs = [
8665            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8666            CounterfactualSpec::ShiftDate { days: -14 },
8667            CounterfactualSpec::SelfApprove,
8668            CounterfactualSpec::SplitTransaction { split_count: 3 },
8669        ];
8670
8671        let pairs: Vec<_> = journal_entries
8672            .iter()
8673            .enumerate()
8674            .map(|(i, je)| {
8675                let spec = &specs[i % specs.len()];
8676                gen.generate(je, spec)
8677            })
8678            .collect();
8679
8680        stats.counterfactual_pair_count = pairs.len();
8681        info!(
8682            "Counterfactual pairs generated: {} pairs from {} journal entries",
8683            pairs.len(),
8684            journal_entries.len()
8685        );
8686        self.check_resources_with_log("post-counterfactuals")?;
8687
8688        Ok(pairs)
8689    }
8690
8691    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8692    ///
8693    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8694    /// fraudulent, then generates probabilistic red flags on all chain documents.
8695    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8696    /// to produce realistic ML training data.
8697    fn phase_red_flags(
8698        &self,
8699        anomaly_labels: &AnomalyLabels,
8700        document_flows: &DocumentFlowSnapshot,
8701        stats: &mut EnhancedGenerationStatistics,
8702    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8703        if !self.config.fraud.enabled {
8704            debug!("Phase 26: Skipped (fraud generation disabled)");
8705            return Ok(Vec::new());
8706        }
8707        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8708
8709        use datasynth_generators::fraud::RedFlagGenerator;
8710
8711        let generator = RedFlagGenerator::new();
8712        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8713
8714        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8715        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8716            .labels
8717            .iter()
8718            .filter(|label| label.anomaly_type.is_intentional())
8719            .map(|label| label.document_id.as_str())
8720            .collect();
8721
8722        let mut flags = Vec::new();
8723
8724        // Iterate P2P chains: use the purchase order document ID as the chain key.
8725        for chain in &document_flows.p2p_chains {
8726            let doc_id = &chain.purchase_order.header.document_id;
8727            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8728            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8729        }
8730
8731        // Iterate O2C chains: use the sales order document ID as the chain key.
8732        for chain in &document_flows.o2c_chains {
8733            let doc_id = &chain.sales_order.header.document_id;
8734            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8735            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8736        }
8737
8738        stats.red_flag_count = flags.len();
8739        info!(
8740            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8741            flags.len(),
8742            document_flows.p2p_chains.len(),
8743            document_flows.o2c_chains.len(),
8744            fraud_doc_ids.len()
8745        );
8746        self.check_resources_with_log("post-red-flags")?;
8747
8748        Ok(flags)
8749    }
8750
8751    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8752    ///
8753    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8754    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8755    /// advance them over the simulation period.
8756    fn phase_collusion_rings(
8757        &mut self,
8758        stats: &mut EnhancedGenerationStatistics,
8759    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8760        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8761            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8762            return Ok(Vec::new());
8763        }
8764        info!("Phase 26b: Generating Collusion Rings");
8765
8766        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8767            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8768        let months = self.config.global.period_months;
8769
8770        let employee_ids: Vec<String> = self
8771            .master_data
8772            .employees
8773            .iter()
8774            .map(|e| e.employee_id.clone())
8775            .collect();
8776        let vendor_ids: Vec<String> = self
8777            .master_data
8778            .vendors
8779            .iter()
8780            .map(|v| v.vendor_id.clone())
8781            .collect();
8782
8783        let mut generator =
8784            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8785        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8786
8787        stats.collusion_ring_count = rings.len();
8788        info!(
8789            "Collusion rings generated: {} rings, total members: {}",
8790            rings.len(),
8791            rings
8792                .iter()
8793                .map(datasynth_generators::fraud::CollusionRing::size)
8794                .sum::<usize>()
8795        );
8796        self.check_resources_with_log("post-collusion-rings")?;
8797
8798        Ok(rings)
8799    }
8800
8801    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8802    ///
8803    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8804    /// master data changes over time, supporting bi-temporal audit queries.
8805    fn phase_temporal_attributes(
8806        &mut self,
8807        stats: &mut EnhancedGenerationStatistics,
8808    ) -> SynthResult<
8809        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8810    > {
8811        if !self.config.temporal_attributes.enabled {
8812            debug!("Phase 27: Skipped (temporal attributes disabled)");
8813            return Ok(Vec::new());
8814        }
8815        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8816
8817        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8818            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8819
8820        // Build a TemporalAttributeConfig from the user's config.
8821        // Since Phase 27 is already gated on temporal_attributes.enabled,
8822        // default to enabling version chains so users get actual mutations.
8823        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8824            || self.config.temporal_attributes.enabled;
8825        let temporal_config = {
8826            let ta = &self.config.temporal_attributes;
8827            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8828                .enabled(ta.enabled)
8829                .closed_probability(ta.valid_time.closed_probability)
8830                .avg_validity_days(ta.valid_time.avg_validity_days)
8831                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8832                .with_version_chains(if generate_version_chains {
8833                    ta.avg_versions_per_entity
8834                } else {
8835                    1.0
8836                })
8837                .build()
8838        };
8839        // Apply backdating settings if configured
8840        let temporal_config = if self
8841            .config
8842            .temporal_attributes
8843            .transaction_time
8844            .allow_backdating
8845        {
8846            let mut c = temporal_config;
8847            c.transaction_time.allow_backdating = true;
8848            c.transaction_time.backdating_probability = self
8849                .config
8850                .temporal_attributes
8851                .transaction_time
8852                .backdating_probability;
8853            c.transaction_time.max_backdate_days = self
8854                .config
8855                .temporal_attributes
8856                .transaction_time
8857                .max_backdate_days;
8858            c
8859        } else {
8860            temporal_config
8861        };
8862        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8863            temporal_config,
8864            self.seed + 130,
8865            start_date,
8866        );
8867
8868        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8869            self.seed + 130,
8870            datasynth_core::GeneratorType::Vendor,
8871        );
8872
8873        let chains: Vec<_> = self
8874            .master_data
8875            .vendors
8876            .iter()
8877            .map(|vendor| {
8878                let id = uuid_factory.next();
8879                gen.generate_version_chain(vendor.clone(), id)
8880            })
8881            .collect();
8882
8883        stats.temporal_version_chain_count = chains.len();
8884        info!("Temporal version chains generated: {} chains", chains.len());
8885        self.check_resources_with_log("post-temporal-attributes")?;
8886
8887        Ok(chains)
8888    }
8889
8890    /// Phase 28: Build entity relationship graph and cross-process links.
8891    ///
8892    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8893    /// `EntityGraph` from master-data vendor/customer entities and
8894    /// journal-entry-derived transaction summaries.
8895    ///
8896    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8897    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8898    /// generates inventory-movement cross-process links.
8899    fn phase_entity_relationships(
8900        &self,
8901        journal_entries: &[JournalEntry],
8902        document_flows: &DocumentFlowSnapshot,
8903        stats: &mut EnhancedGenerationStatistics,
8904    ) -> SynthResult<(
8905        Option<datasynth_core::models::EntityGraph>,
8906        Vec<datasynth_core::models::CrossProcessLink>,
8907    )> {
8908        use datasynth_generators::relationships::{
8909            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8910            TransactionSummary,
8911        };
8912
8913        let rs_enabled = self.config.relationship_strength.enabled;
8914        let cpl_enabled = self.config.cross_process_links.enabled
8915            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8916
8917        if !rs_enabled && !cpl_enabled {
8918            debug!(
8919                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8920            );
8921            return Ok((None, Vec::new()));
8922        }
8923
8924        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8925
8926        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8927            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8928
8929        let company_code = self
8930            .config
8931            .companies
8932            .first()
8933            .map(|c| c.code.as_str())
8934            .unwrap_or("1000");
8935
8936        // Build the generator with matching config flags
8937        let gen_config = EntityGraphConfig {
8938            enabled: rs_enabled,
8939            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8940                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8941                enable_return_flows: false,
8942                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8943                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8944                // Use higher link rate for small datasets to avoid probabilistic empty results
8945                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8946                    1.0
8947                } else {
8948                    0.30
8949                },
8950                ..Default::default()
8951            },
8952            strength_config: datasynth_generators::relationships::StrengthConfig {
8953                transaction_volume_weight: self
8954                    .config
8955                    .relationship_strength
8956                    .calculation
8957                    .transaction_volume_weight,
8958                transaction_count_weight: self
8959                    .config
8960                    .relationship_strength
8961                    .calculation
8962                    .transaction_count_weight,
8963                duration_weight: self
8964                    .config
8965                    .relationship_strength
8966                    .calculation
8967                    .relationship_duration_weight,
8968                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8969                mutual_connections_weight: self
8970                    .config
8971                    .relationship_strength
8972                    .calculation
8973                    .mutual_connections_weight,
8974                recency_half_life_days: self
8975                    .config
8976                    .relationship_strength
8977                    .calculation
8978                    .recency_half_life_days,
8979            },
8980            ..Default::default()
8981        };
8982
8983        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8984
8985        // --- Part 1: Entity Relationship Graph ---
8986        let entity_graph = if rs_enabled {
8987            // Build EntitySummary lists from master data
8988            let vendor_summaries: Vec<EntitySummary> = self
8989                .master_data
8990                .vendors
8991                .iter()
8992                .map(|v| {
8993                    EntitySummary::new(
8994                        &v.vendor_id,
8995                        &v.name,
8996                        datasynth_core::models::GraphEntityType::Vendor,
8997                        start_date,
8998                    )
8999                })
9000                .collect();
9001
9002            let customer_summaries: Vec<EntitySummary> = self
9003                .master_data
9004                .customers
9005                .iter()
9006                .map(|c| {
9007                    EntitySummary::new(
9008                        &c.customer_id,
9009                        &c.name,
9010                        datasynth_core::models::GraphEntityType::Customer,
9011                        start_date,
9012                    )
9013                })
9014                .collect();
9015
9016            // Build transaction summaries from journal entries.
9017            // Key = (company_code, trading_partner) for entries that have a
9018            // trading partner.  This captures intercompany flows and any JE
9019            // whose line items carry a trading_partner reference.
9020            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9021                std::collections::HashMap::new();
9022
9023            for je in journal_entries {
9024                let cc = je.header.company_code.clone();
9025                let posting_date = je.header.posting_date;
9026                for line in &je.lines {
9027                    if let Some(ref tp) = line.trading_partner {
9028                        let amount = if line.debit_amount > line.credit_amount {
9029                            line.debit_amount
9030                        } else {
9031                            line.credit_amount
9032                        };
9033                        let entry = txn_summaries
9034                            .entry((cc.clone(), tp.clone()))
9035                            .or_insert_with(|| TransactionSummary {
9036                                total_volume: rust_decimal::Decimal::ZERO,
9037                                transaction_count: 0,
9038                                first_transaction_date: posting_date,
9039                                last_transaction_date: posting_date,
9040                                related_entities: std::collections::HashSet::new(),
9041                            });
9042                        entry.total_volume += amount;
9043                        entry.transaction_count += 1;
9044                        if posting_date < entry.first_transaction_date {
9045                            entry.first_transaction_date = posting_date;
9046                        }
9047                        if posting_date > entry.last_transaction_date {
9048                            entry.last_transaction_date = posting_date;
9049                        }
9050                        entry.related_entities.insert(cc.clone());
9051                    }
9052                }
9053            }
9054
9055            // Also extract transaction relationships from document flow chains.
9056            // P2P chains: Company → Vendor relationships
9057            for chain in &document_flows.p2p_chains {
9058                let cc = chain.purchase_order.header.company_code.clone();
9059                let vendor_id = chain.purchase_order.vendor_id.clone();
9060                let po_date = chain.purchase_order.header.document_date;
9061                let amount = chain.purchase_order.total_net_amount;
9062
9063                let entry = txn_summaries
9064                    .entry((cc.clone(), vendor_id))
9065                    .or_insert_with(|| TransactionSummary {
9066                        total_volume: rust_decimal::Decimal::ZERO,
9067                        transaction_count: 0,
9068                        first_transaction_date: po_date,
9069                        last_transaction_date: po_date,
9070                        related_entities: std::collections::HashSet::new(),
9071                    });
9072                entry.total_volume += amount;
9073                entry.transaction_count += 1;
9074                if po_date < entry.first_transaction_date {
9075                    entry.first_transaction_date = po_date;
9076                }
9077                if po_date > entry.last_transaction_date {
9078                    entry.last_transaction_date = po_date;
9079                }
9080                entry.related_entities.insert(cc);
9081            }
9082
9083            // O2C chains: Company → Customer relationships
9084            for chain in &document_flows.o2c_chains {
9085                let cc = chain.sales_order.header.company_code.clone();
9086                let customer_id = chain.sales_order.customer_id.clone();
9087                let so_date = chain.sales_order.header.document_date;
9088                let amount = chain.sales_order.total_net_amount;
9089
9090                let entry = txn_summaries
9091                    .entry((cc.clone(), customer_id))
9092                    .or_insert_with(|| TransactionSummary {
9093                        total_volume: rust_decimal::Decimal::ZERO,
9094                        transaction_count: 0,
9095                        first_transaction_date: so_date,
9096                        last_transaction_date: so_date,
9097                        related_entities: std::collections::HashSet::new(),
9098                    });
9099                entry.total_volume += amount;
9100                entry.transaction_count += 1;
9101                if so_date < entry.first_transaction_date {
9102                    entry.first_transaction_date = so_date;
9103                }
9104                if so_date > entry.last_transaction_date {
9105                    entry.last_transaction_date = so_date;
9106                }
9107                entry.related_entities.insert(cc);
9108            }
9109
9110            let as_of_date = journal_entries
9111                .last()
9112                .map(|je| je.header.posting_date)
9113                .unwrap_or(start_date);
9114
9115            let graph = gen.generate_entity_graph(
9116                company_code,
9117                as_of_date,
9118                &vendor_summaries,
9119                &customer_summaries,
9120                &txn_summaries,
9121            );
9122
9123            info!(
9124                "Entity relationship graph: {} nodes, {} edges",
9125                graph.nodes.len(),
9126                graph.edges.len()
9127            );
9128            stats.entity_relationship_node_count = graph.nodes.len();
9129            stats.entity_relationship_edge_count = graph.edges.len();
9130            Some(graph)
9131        } else {
9132            None
9133        };
9134
9135        // --- Part 2: Cross-Process Links ---
9136        let cross_process_links = if cpl_enabled {
9137            // Build GoodsReceiptRef from P2P chains
9138            let gr_refs: Vec<GoodsReceiptRef> = document_flows
9139                .p2p_chains
9140                .iter()
9141                .flat_map(|chain| {
9142                    let vendor_id = chain.purchase_order.vendor_id.clone();
9143                    let cc = chain.purchase_order.header.company_code.clone();
9144                    chain.goods_receipts.iter().flat_map(move |gr| {
9145                        gr.items.iter().filter_map({
9146                            let doc_id = gr.header.document_id.clone();
9147                            let v_id = vendor_id.clone();
9148                            let company = cc.clone();
9149                            let receipt_date = gr.header.document_date;
9150                            move |item| {
9151                                item.base
9152                                    .material_id
9153                                    .as_ref()
9154                                    .map(|mat_id| GoodsReceiptRef {
9155                                        document_id: doc_id.clone(),
9156                                        material_id: mat_id.clone(),
9157                                        quantity: item.base.quantity,
9158                                        receipt_date,
9159                                        vendor_id: v_id.clone(),
9160                                        company_code: company.clone(),
9161                                    })
9162                            }
9163                        })
9164                    })
9165                })
9166                .collect();
9167
9168            // Build DeliveryRef from O2C chains
9169            let del_refs: Vec<DeliveryRef> = document_flows
9170                .o2c_chains
9171                .iter()
9172                .flat_map(|chain| {
9173                    let customer_id = chain.sales_order.customer_id.clone();
9174                    let cc = chain.sales_order.header.company_code.clone();
9175                    chain.deliveries.iter().flat_map(move |del| {
9176                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9177                        del.items.iter().filter_map({
9178                            let doc_id = del.header.document_id.clone();
9179                            let c_id = customer_id.clone();
9180                            let company = cc.clone();
9181                            move |item| {
9182                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9183                                    document_id: doc_id.clone(),
9184                                    material_id: mat_id.clone(),
9185                                    quantity: item.base.quantity,
9186                                    delivery_date,
9187                                    customer_id: c_id.clone(),
9188                                    company_code: company.clone(),
9189                                })
9190                            }
9191                        })
9192                    })
9193                })
9194                .collect();
9195
9196            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9197            info!("Cross-process links generated: {} links", links.len());
9198            stats.cross_process_link_count = links.len();
9199            links
9200        } else {
9201            Vec::new()
9202        };
9203
9204        self.check_resources_with_log("post-entity-relationships")?;
9205        Ok((entity_graph, cross_process_links))
9206    }
9207
9208    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9209    fn phase_industry_data(
9210        &self,
9211        stats: &mut EnhancedGenerationStatistics,
9212    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9213        if !self.config.industry_specific.enabled {
9214            return None;
9215        }
9216        info!("Phase 29: Generating industry-specific data");
9217        let output = datasynth_generators::industry::factory::generate_industry_output(
9218            self.config.global.industry,
9219        );
9220        stats.industry_gl_account_count = output.gl_accounts.len();
9221        info!(
9222            "Industry data generated: {} GL accounts for {:?}",
9223            output.gl_accounts.len(),
9224            self.config.global.industry
9225        );
9226        Some(output)
9227    }
9228
9229    /// Phase 3b: Generate opening balances for each company.
9230    fn phase_opening_balances(
9231        &mut self,
9232        coa: &Arc<ChartOfAccounts>,
9233        stats: &mut EnhancedGenerationStatistics,
9234    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9235        if !self.config.balance.generate_opening_balances {
9236            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9237            return Ok(Vec::new());
9238        }
9239        info!("Phase 3b: Generating Opening Balances");
9240
9241        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9242            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9243        let fiscal_year = start_date.year();
9244
9245        let industry = match self.config.global.industry {
9246            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9247            IndustrySector::Retail => IndustryType::Retail,
9248            IndustrySector::FinancialServices => IndustryType::Financial,
9249            IndustrySector::Healthcare => IndustryType::Healthcare,
9250            IndustrySector::Technology => IndustryType::Technology,
9251            _ => IndustryType::Manufacturing,
9252        };
9253
9254        let config = datasynth_generators::OpeningBalanceConfig {
9255            industry,
9256            ..Default::default()
9257        };
9258        let mut gen =
9259            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9260
9261        let mut results = Vec::new();
9262        for company in &self.config.companies {
9263            let spec = OpeningBalanceSpec::new(
9264                company.code.clone(),
9265                start_date,
9266                fiscal_year,
9267                company.currency.clone(),
9268                rust_decimal::Decimal::new(10_000_000, 0),
9269                industry,
9270            );
9271            let ob = gen.generate(&spec, coa, start_date, &company.code);
9272            results.push(ob);
9273        }
9274
9275        stats.opening_balance_count = results.len();
9276        info!("Opening balances generated: {} companies", results.len());
9277        self.check_resources_with_log("post-opening-balances")?;
9278
9279        Ok(results)
9280    }
9281
9282    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9283    fn phase_subledger_reconciliation(
9284        &mut self,
9285        subledger: &SubledgerSnapshot,
9286        entries: &[JournalEntry],
9287        stats: &mut EnhancedGenerationStatistics,
9288    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9289        if !self.config.balance.reconcile_subledgers {
9290            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9291            return Ok(Vec::new());
9292        }
9293        info!("Phase 9b: Reconciling GL to subledger balances");
9294
9295        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9296            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9297            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9298
9299        // Build GL balance map from journal entries using a balance tracker
9300        let tracker_config = BalanceTrackerConfig {
9301            validate_on_each_entry: false,
9302            track_history: false,
9303            fail_on_validation_error: false,
9304            ..Default::default()
9305        };
9306        let recon_currency = self
9307            .config
9308            .companies
9309            .first()
9310            .map(|c| c.currency.clone())
9311            .unwrap_or_else(|| "USD".to_string());
9312        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9313        let validation_errors = tracker.apply_entries(entries);
9314        if !validation_errors.is_empty() {
9315            warn!(
9316                error_count = validation_errors.len(),
9317                "Balance tracker encountered validation errors during subledger reconciliation"
9318            );
9319            for err in &validation_errors {
9320                debug!("Balance validation error: {:?}", err);
9321            }
9322        }
9323
9324        let mut engine = datasynth_generators::ReconciliationEngine::new(
9325            datasynth_generators::ReconciliationConfig::default(),
9326        );
9327
9328        let mut results = Vec::new();
9329        let company_code = self
9330            .config
9331            .companies
9332            .first()
9333            .map(|c| c.code.as_str())
9334            .unwrap_or("1000");
9335
9336        // Reconcile AR
9337        if !subledger.ar_invoices.is_empty() {
9338            let gl_balance = tracker
9339                .get_account_balance(
9340                    company_code,
9341                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9342                )
9343                .map(|b| b.closing_balance)
9344                .unwrap_or_default();
9345            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9346            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9347        }
9348
9349        // Reconcile AP
9350        if !subledger.ap_invoices.is_empty() {
9351            let gl_balance = tracker
9352                .get_account_balance(
9353                    company_code,
9354                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9355                )
9356                .map(|b| b.closing_balance)
9357                .unwrap_or_default();
9358            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9359            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9360        }
9361
9362        // Reconcile FA
9363        if !subledger.fa_records.is_empty() {
9364            let gl_asset_balance = tracker
9365                .get_account_balance(
9366                    company_code,
9367                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9368                )
9369                .map(|b| b.closing_balance)
9370                .unwrap_or_default();
9371            let gl_accum_depr_balance = tracker
9372                .get_account_balance(
9373                    company_code,
9374                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9375                )
9376                .map(|b| b.closing_balance)
9377                .unwrap_or_default();
9378            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9379                subledger.fa_records.iter().collect();
9380            let (asset_recon, depr_recon) = engine.reconcile_fa(
9381                company_code,
9382                end_date,
9383                gl_asset_balance,
9384                gl_accum_depr_balance,
9385                &fa_refs,
9386            );
9387            results.push(asset_recon);
9388            results.push(depr_recon);
9389        }
9390
9391        // Reconcile Inventory
9392        if !subledger.inventory_positions.is_empty() {
9393            let gl_balance = tracker
9394                .get_account_balance(
9395                    company_code,
9396                    datasynth_core::accounts::control_accounts::INVENTORY,
9397                )
9398                .map(|b| b.closing_balance)
9399                .unwrap_or_default();
9400            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9401                subledger.inventory_positions.iter().collect();
9402            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9403        }
9404
9405        stats.subledger_reconciliation_count = results.len();
9406        let passed = results.iter().filter(|r| r.is_balanced()).count();
9407        let failed = results.len() - passed;
9408        info!(
9409            "Subledger reconciliation: {} checks, {} passed, {} failed",
9410            results.len(),
9411            passed,
9412            failed
9413        );
9414        self.check_resources_with_log("post-subledger-reconciliation")?;
9415
9416        Ok(results)
9417    }
9418
9419    /// Generate the chart of accounts.
9420    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9421        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9422
9423        let coa_framework = self.resolve_coa_framework();
9424
9425        let mut gen = ChartOfAccountsGenerator::new(
9426            self.config.chart_of_accounts.complexity,
9427            self.config.global.industry,
9428            self.seed,
9429        )
9430        .with_coa_framework(coa_framework);
9431
9432        let coa = Arc::new(gen.generate());
9433        self.coa = Some(Arc::clone(&coa));
9434
9435        if let Some(pb) = pb {
9436            pb.finish_with_message("Chart of Accounts complete");
9437        }
9438
9439        Ok(coa)
9440    }
9441
9442    /// Generate master data entities.
9443    fn generate_master_data(&mut self) -> SynthResult<()> {
9444        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9445            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9446        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9447
9448        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9449        let pb = self.create_progress_bar(total, "Generating Master Data");
9450
9451        // Resolve country pack once for all companies (uses primary company's country)
9452        let pack = self.primary_pack().clone();
9453
9454        // Capture config values needed inside the parallel closure
9455        let vendors_per_company = self.phase_config.vendors_per_company;
9456        let customers_per_company = self.phase_config.customers_per_company;
9457        let materials_per_company = self.phase_config.materials_per_company;
9458        let assets_per_company = self.phase_config.assets_per_company;
9459        let coa_framework = self.resolve_coa_framework();
9460
9461        // Generate all master data in parallel across companies.
9462        // Each company's data is independent, making this embarrassingly parallel.
9463        let per_company_results: Vec<_> = self
9464            .config
9465            .companies
9466            .par_iter()
9467            .enumerate()
9468            .map(|(i, company)| {
9469                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9470                let pack = pack.clone();
9471
9472                // Generate vendors (offset counter so IDs are globally unique across companies)
9473                let mut vendor_gen = VendorGenerator::new(company_seed);
9474                vendor_gen.set_country_pack(pack.clone());
9475                vendor_gen.set_coa_framework(coa_framework);
9476                vendor_gen.set_counter_offset(i * vendors_per_company);
9477                // Wire vendor network config when enabled
9478                if self.config.vendor_network.enabled {
9479                    let vn = &self.config.vendor_network;
9480                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9481                        enabled: true,
9482                        depth: vn.depth,
9483                        tier1_count: datasynth_generators::TierCountConfig::new(
9484                            vn.tier1.min,
9485                            vn.tier1.max,
9486                        ),
9487                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9488                            vn.tier2_per_parent.min,
9489                            vn.tier2_per_parent.max,
9490                        ),
9491                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9492                            vn.tier3_per_parent.min,
9493                            vn.tier3_per_parent.max,
9494                        ),
9495                        cluster_distribution: datasynth_generators::ClusterDistribution {
9496                            reliable_strategic: vn.clusters.reliable_strategic,
9497                            standard_operational: vn.clusters.standard_operational,
9498                            transactional: vn.clusters.transactional,
9499                            problematic: vn.clusters.problematic,
9500                        },
9501                        concentration_limits: datasynth_generators::ConcentrationLimits {
9502                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9503                            max_top5: vn.dependencies.top_5_concentration,
9504                        },
9505                        ..datasynth_generators::VendorNetworkConfig::default()
9506                    });
9507                }
9508                let vendor_pool =
9509                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9510
9511                // Generate customers (offset counter so IDs are globally unique across companies)
9512                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9513                customer_gen.set_country_pack(pack.clone());
9514                customer_gen.set_coa_framework(coa_framework);
9515                customer_gen.set_counter_offset(i * customers_per_company);
9516                // Wire customer segmentation config when enabled
9517                if self.config.customer_segmentation.enabled {
9518                    let cs = &self.config.customer_segmentation;
9519                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9520                        enabled: true,
9521                        segment_distribution: datasynth_generators::SegmentDistribution {
9522                            enterprise: cs.value_segments.enterprise.customer_share,
9523                            mid_market: cs.value_segments.mid_market.customer_share,
9524                            smb: cs.value_segments.smb.customer_share,
9525                            consumer: cs.value_segments.consumer.customer_share,
9526                        },
9527                        referral_config: datasynth_generators::ReferralConfig {
9528                            enabled: cs.networks.referrals.enabled,
9529                            referral_rate: cs.networks.referrals.referral_rate,
9530                            ..Default::default()
9531                        },
9532                        hierarchy_config: datasynth_generators::HierarchyConfig {
9533                            enabled: cs.networks.corporate_hierarchies.enabled,
9534                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9535                            ..Default::default()
9536                        },
9537                        ..Default::default()
9538                    };
9539                    customer_gen.set_segmentation_config(seg_cfg);
9540                }
9541                let customer_pool = customer_gen.generate_customer_pool(
9542                    customers_per_company,
9543                    &company.code,
9544                    start_date,
9545                );
9546
9547                // Generate materials (offset counter so IDs are globally unique across companies)
9548                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9549                material_gen.set_country_pack(pack.clone());
9550                material_gen.set_counter_offset(i * materials_per_company);
9551                let material_pool = material_gen.generate_material_pool(
9552                    materials_per_company,
9553                    &company.code,
9554                    start_date,
9555                );
9556
9557                // Generate fixed assets
9558                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9559                let asset_pool = asset_gen.generate_asset_pool(
9560                    assets_per_company,
9561                    &company.code,
9562                    (start_date, end_date),
9563                );
9564
9565                // Generate employees
9566                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9567                employee_gen.set_country_pack(pack);
9568                let employee_pool =
9569                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9570
9571                // Generate employee change history (2-5 events per employee)
9572                let employee_change_history =
9573                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9574
9575                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9576                let employee_ids: Vec<String> = employee_pool
9577                    .employees
9578                    .iter()
9579                    .map(|e| e.employee_id.clone())
9580                    .collect();
9581                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9582                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9583
9584                (
9585                    vendor_pool.vendors,
9586                    customer_pool.customers,
9587                    material_pool.materials,
9588                    asset_pool.assets,
9589                    employee_pool.employees,
9590                    employee_change_history,
9591                    cost_centers,
9592                )
9593            })
9594            .collect();
9595
9596        // Aggregate results from all companies
9597        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9598            per_company_results
9599        {
9600            self.master_data.vendors.extend(vendors);
9601            self.master_data.customers.extend(customers);
9602            self.master_data.materials.extend(materials);
9603            self.master_data.assets.extend(assets);
9604            self.master_data.employees.extend(employees);
9605            self.master_data.cost_centers.extend(cost_centers);
9606            self.master_data
9607                .employee_change_history
9608                .extend(change_history);
9609        }
9610
9611        if let Some(pb) = &pb {
9612            pb.inc(total);
9613        }
9614        if let Some(pb) = pb {
9615            pb.finish_with_message("Master data generation complete");
9616        }
9617
9618        Ok(())
9619    }
9620
9621    /// Generate document flows (P2P and O2C).
9622    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9623        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9624            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9625
9626        // Generate P2P chains
9627        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9628        let months = (self.config.global.period_months as usize).max(1);
9629        let p2p_count = self
9630            .phase_config
9631            .p2p_chains
9632            .min(self.master_data.vendors.len() * 2 * months);
9633        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9634
9635        // Convert P2P config from schema to generator config
9636        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9637        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9638        p2p_gen.set_country_pack(self.primary_pack().clone());
9639
9640        for i in 0..p2p_count {
9641            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9642            let materials: Vec<&Material> = self
9643                .master_data
9644                .materials
9645                .iter()
9646                .skip(i % self.master_data.materials.len().max(1))
9647                .take(2.min(self.master_data.materials.len()))
9648                .collect();
9649
9650            if materials.is_empty() {
9651                continue;
9652            }
9653
9654            let company = &self.config.companies[i % self.config.companies.len()];
9655            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9656            let fiscal_period = po_date.month() as u8;
9657            let created_by = if self.master_data.employees.is_empty() {
9658                "SYSTEM"
9659            } else {
9660                self.master_data.employees[i % self.master_data.employees.len()]
9661                    .user_id
9662                    .as_str()
9663            };
9664
9665            let chain = p2p_gen.generate_chain(
9666                &company.code,
9667                vendor,
9668                &materials,
9669                po_date,
9670                start_date.year() as u16,
9671                fiscal_period,
9672                created_by,
9673            );
9674
9675            // Flatten documents
9676            flows.purchase_orders.push(chain.purchase_order.clone());
9677            flows.goods_receipts.extend(chain.goods_receipts.clone());
9678            if let Some(vi) = &chain.vendor_invoice {
9679                flows.vendor_invoices.push(vi.clone());
9680            }
9681            if let Some(payment) = &chain.payment {
9682                flows.payments.push(payment.clone());
9683            }
9684            for remainder in &chain.remainder_payments {
9685                flows.payments.push(remainder.clone());
9686            }
9687            flows.p2p_chains.push(chain);
9688
9689            if let Some(pb) = &pb {
9690                pb.inc(1);
9691            }
9692        }
9693
9694        if let Some(pb) = pb {
9695            pb.finish_with_message("P2P document flows complete");
9696        }
9697
9698        // Generate O2C chains
9699        // Cap at ~2 SOs per customer per month to keep order volume realistic
9700        let o2c_count = self
9701            .phase_config
9702            .o2c_chains
9703            .min(self.master_data.customers.len() * 2 * months);
9704        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9705
9706        // Convert O2C config from schema to generator config
9707        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9708        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9709        o2c_gen.set_country_pack(self.primary_pack().clone());
9710
9711        for i in 0..o2c_count {
9712            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9713            let materials: Vec<&Material> = self
9714                .master_data
9715                .materials
9716                .iter()
9717                .skip(i % self.master_data.materials.len().max(1))
9718                .take(2.min(self.master_data.materials.len()))
9719                .collect();
9720
9721            if materials.is_empty() {
9722                continue;
9723            }
9724
9725            let company = &self.config.companies[i % self.config.companies.len()];
9726            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9727            let fiscal_period = so_date.month() as u8;
9728            let created_by = if self.master_data.employees.is_empty() {
9729                "SYSTEM"
9730            } else {
9731                self.master_data.employees[i % self.master_data.employees.len()]
9732                    .user_id
9733                    .as_str()
9734            };
9735
9736            let chain = o2c_gen.generate_chain(
9737                &company.code,
9738                customer,
9739                &materials,
9740                so_date,
9741                start_date.year() as u16,
9742                fiscal_period,
9743                created_by,
9744            );
9745
9746            // Flatten documents
9747            flows.sales_orders.push(chain.sales_order.clone());
9748            flows.deliveries.extend(chain.deliveries.clone());
9749            if let Some(ci) = &chain.customer_invoice {
9750                flows.customer_invoices.push(ci.clone());
9751            }
9752            if let Some(receipt) = &chain.customer_receipt {
9753                flows.payments.push(receipt.clone());
9754            }
9755            // Extract remainder receipts (follow-up to partial payments)
9756            for receipt in &chain.remainder_receipts {
9757                flows.payments.push(receipt.clone());
9758            }
9759            flows.o2c_chains.push(chain);
9760
9761            if let Some(pb) = &pb {
9762                pb.inc(1);
9763            }
9764        }
9765
9766        if let Some(pb) = pb {
9767            pb.finish_with_message("O2C document flows complete");
9768        }
9769
9770        // Collect all document cross-references from document headers.
9771        // Each document embeds references to its predecessor(s) via add_reference(); here we
9772        // denormalise them into a flat list for the document_references.json output file.
9773        {
9774            let mut refs = Vec::new();
9775            for doc in &flows.purchase_orders {
9776                refs.extend(doc.header.document_references.iter().cloned());
9777            }
9778            for doc in &flows.goods_receipts {
9779                refs.extend(doc.header.document_references.iter().cloned());
9780            }
9781            for doc in &flows.vendor_invoices {
9782                refs.extend(doc.header.document_references.iter().cloned());
9783            }
9784            for doc in &flows.sales_orders {
9785                refs.extend(doc.header.document_references.iter().cloned());
9786            }
9787            for doc in &flows.deliveries {
9788                refs.extend(doc.header.document_references.iter().cloned());
9789            }
9790            for doc in &flows.customer_invoices {
9791                refs.extend(doc.header.document_references.iter().cloned());
9792            }
9793            for doc in &flows.payments {
9794                refs.extend(doc.header.document_references.iter().cloned());
9795            }
9796            debug!(
9797                "Collected {} document cross-references from document headers",
9798                refs.len()
9799            );
9800            flows.document_references = refs;
9801        }
9802
9803        Ok(())
9804    }
9805
9806    /// Generate journal entries using parallel generation across multiple cores.
9807    fn generate_journal_entries(
9808        &mut self,
9809        coa: &Arc<ChartOfAccounts>,
9810    ) -> SynthResult<Vec<JournalEntry>> {
9811        use datasynth_core::traits::ParallelGenerator;
9812
9813        let total = self.calculate_total_transactions();
9814        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9815
9816        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9817            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9818        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9819
9820        let company_codes: Vec<String> = self
9821            .config
9822            .companies
9823            .iter()
9824            .map(|c| c.code.clone())
9825            .collect();
9826
9827        let mut generator = JournalEntryGenerator::new_with_params(
9828            self.config.transactions.clone(),
9829            Arc::clone(coa),
9830            company_codes,
9831            start_date,
9832            end_date,
9833            self.seed,
9834        );
9835        // Wire the `business_processes.*_weight` config through (phantom knob
9836        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
9837        let bp = &self.config.business_processes;
9838        generator.set_business_process_weights(
9839            bp.o2c_weight,
9840            bp.p2p_weight,
9841            bp.r2r_weight,
9842            bp.h2r_weight,
9843            bp.a2r_weight,
9844        );
9845        let generator = generator;
9846
9847        // Connect generated master data to ensure JEs reference real entities
9848        // Enable persona-based error injection for realistic human behavior
9849        // Pass fraud configuration for fraud injection
9850        let je_pack = self.primary_pack();
9851
9852        let mut generator = generator
9853            .with_master_data(
9854                &self.master_data.vendors,
9855                &self.master_data.customers,
9856                &self.master_data.materials,
9857            )
9858            .with_country_pack_names(je_pack)
9859            .with_country_pack_temporal(
9860                self.config.temporal_patterns.clone(),
9861                self.seed + 200,
9862                je_pack,
9863            )
9864            .with_persona_errors(true)
9865            .with_fraud_config(self.config.fraud.clone());
9866
9867        // Apply temporal drift if configured
9868        if self.config.temporal.enabled {
9869            let drift_config = self.config.temporal.to_core_config();
9870            generator = generator.with_drift_config(drift_config, self.seed + 100);
9871        }
9872
9873        // Check memory limit at start
9874        self.check_memory_limit()?;
9875
9876        // Determine parallelism: use available cores, but cap at total entries
9877        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9878
9879        // Use parallel generation for datasets with 10K+ entries.
9880        // Below this threshold, the statistical properties of a single-seeded
9881        // generator (e.g. Benford compliance) are better preserved.
9882        let entries = if total >= 10_000 && num_threads > 1 {
9883            // Parallel path: split the generator across cores and generate in parallel.
9884            // Each sub-generator gets a unique seed for deterministic, independent generation.
9885            let sub_generators = generator.split(num_threads);
9886            let entries_per_thread = total as usize / num_threads;
9887            let remainder = total as usize % num_threads;
9888
9889            let batches: Vec<Vec<JournalEntry>> = sub_generators
9890                .into_par_iter()
9891                .enumerate()
9892                .map(|(i, mut gen)| {
9893                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9894                    gen.generate_batch(count)
9895                })
9896                .collect();
9897
9898            // Merge all batches into a single Vec
9899            let entries = JournalEntryGenerator::merge_results(batches);
9900
9901            if let Some(pb) = &pb {
9902                pb.inc(total);
9903            }
9904            entries
9905        } else {
9906            // Sequential path for small datasets (< 1000 entries)
9907            let mut entries = Vec::with_capacity(total as usize);
9908            for _ in 0..total {
9909                let entry = generator.generate();
9910                entries.push(entry);
9911                if let Some(pb) = &pb {
9912                    pb.inc(1);
9913                }
9914            }
9915            entries
9916        };
9917
9918        if let Some(pb) = pb {
9919            pb.finish_with_message("Journal entries complete");
9920        }
9921
9922        Ok(entries)
9923    }
9924
9925    /// Generate journal entries from document flows.
9926    ///
9927    /// This creates proper GL entries for each document in the P2P and O2C flows,
9928    /// ensuring that document activity is reflected in the general ledger.
9929    fn generate_jes_from_document_flows(
9930        &mut self,
9931        flows: &DocumentFlowSnapshot,
9932    ) -> SynthResult<Vec<JournalEntry>> {
9933        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9934        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9935
9936        let je_config = match self.resolve_coa_framework() {
9937            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9938            CoAFramework::GermanSkr04 => {
9939                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9940                DocumentFlowJeConfig::from(&fa)
9941            }
9942            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9943        };
9944
9945        let populate_fec = je_config.populate_fec_fields;
9946        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9947
9948        // Build auxiliary account lookup from vendor/customer master data so that
9949        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9950        // PCG "4010001") instead of raw partner IDs.
9951        if populate_fec {
9952            let mut aux_lookup = std::collections::HashMap::new();
9953            for vendor in &self.master_data.vendors {
9954                if let Some(ref aux) = vendor.auxiliary_gl_account {
9955                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9956                }
9957            }
9958            for customer in &self.master_data.customers {
9959                if let Some(ref aux) = customer.auxiliary_gl_account {
9960                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9961                }
9962            }
9963            if !aux_lookup.is_empty() {
9964                generator.set_auxiliary_account_lookup(aux_lookup);
9965            }
9966        }
9967
9968        let mut entries = Vec::new();
9969
9970        // Generate JEs from P2P chains
9971        for chain in &flows.p2p_chains {
9972            let chain_entries = generator.generate_from_p2p_chain(chain);
9973            entries.extend(chain_entries);
9974            if let Some(pb) = &pb {
9975                pb.inc(1);
9976            }
9977        }
9978
9979        // Generate JEs from O2C chains
9980        for chain in &flows.o2c_chains {
9981            let chain_entries = generator.generate_from_o2c_chain(chain);
9982            entries.extend(chain_entries);
9983            if let Some(pb) = &pb {
9984                pb.inc(1);
9985            }
9986        }
9987
9988        if let Some(pb) = pb {
9989            pb.finish_with_message(format!(
9990                "Generated {} JEs from document flows",
9991                entries.len()
9992            ));
9993        }
9994
9995        Ok(entries)
9996    }
9997
9998    /// Generate journal entries from payroll runs.
9999    ///
10000    /// Creates one JE per payroll run:
10001    /// - DR Salaries & Wages (6100) for gross pay
10002    /// - CR Payroll Clearing (9100) for gross pay
10003    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10004        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10005
10006        let mut jes = Vec::with_capacity(payroll_runs.len());
10007
10008        for run in payroll_runs {
10009            let mut je = JournalEntry::new_simple(
10010                format!("JE-PAYROLL-{}", run.payroll_id),
10011                run.company_code.clone(),
10012                run.run_date,
10013                format!("Payroll {}", run.payroll_id),
10014            );
10015
10016            // Debit Salaries & Wages for gross pay
10017            je.add_line(JournalEntryLine {
10018                line_number: 1,
10019                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10020                debit_amount: run.total_gross,
10021                reference: Some(run.payroll_id.clone()),
10022                text: Some(format!(
10023                    "Payroll {} ({} employees)",
10024                    run.payroll_id, run.employee_count
10025                )),
10026                ..Default::default()
10027            });
10028
10029            // Credit Payroll Clearing for gross pay
10030            je.add_line(JournalEntryLine {
10031                line_number: 2,
10032                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10033                credit_amount: run.total_gross,
10034                reference: Some(run.payroll_id.clone()),
10035                ..Default::default()
10036            });
10037
10038            jes.push(je);
10039        }
10040
10041        jes
10042    }
10043
10044    /// Link document flows to subledger records.
10045    ///
10046    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
10047    /// ensuring subledger data is coherent with document flow data.
10048    fn link_document_flows_to_subledgers(
10049        &mut self,
10050        flows: &DocumentFlowSnapshot,
10051    ) -> SynthResult<SubledgerSnapshot> {
10052        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10053        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10054
10055        // Build vendor/customer name maps from master data for realistic subledger names
10056        let vendor_names: std::collections::HashMap<String, String> = self
10057            .master_data
10058            .vendors
10059            .iter()
10060            .map(|v| (v.vendor_id.clone(), v.name.clone()))
10061            .collect();
10062        let customer_names: std::collections::HashMap<String, String> = self
10063            .master_data
10064            .customers
10065            .iter()
10066            .map(|c| (c.customer_id.clone(), c.name.clone()))
10067            .collect();
10068
10069        let mut linker = DocumentFlowLinker::new()
10070            .with_vendor_names(vendor_names)
10071            .with_customer_names(customer_names);
10072
10073        // Convert vendor invoices to AP invoices
10074        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10075        if let Some(pb) = &pb {
10076            pb.inc(flows.vendor_invoices.len() as u64);
10077        }
10078
10079        // Convert customer invoices to AR invoices
10080        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10081        if let Some(pb) = &pb {
10082            pb.inc(flows.customer_invoices.len() as u64);
10083        }
10084
10085        if let Some(pb) = pb {
10086            pb.finish_with_message(format!(
10087                "Linked {} AP and {} AR invoices",
10088                ap_invoices.len(),
10089                ar_invoices.len()
10090            ));
10091        }
10092
10093        Ok(SubledgerSnapshot {
10094            ap_invoices,
10095            ar_invoices,
10096            fa_records: Vec::new(),
10097            inventory_positions: Vec::new(),
10098            inventory_movements: Vec::new(),
10099            // Aging reports are computed after payment settlement in phase_document_flows.
10100            ar_aging_reports: Vec::new(),
10101            ap_aging_reports: Vec::new(),
10102            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
10103            depreciation_runs: Vec::new(),
10104            inventory_valuations: Vec::new(),
10105            // Dunning runs and letters are populated in phase_document_flows after AR aging.
10106            dunning_runs: Vec::new(),
10107            dunning_letters: Vec::new(),
10108        })
10109    }
10110
10111    /// Generate OCPM events from document flows.
10112    ///
10113    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
10114    /// capturing the object-centric process perspective.
10115    #[allow(clippy::too_many_arguments)]
10116    fn generate_ocpm_events(
10117        &mut self,
10118        flows: &DocumentFlowSnapshot,
10119        sourcing: &SourcingSnapshot,
10120        hr: &HrSnapshot,
10121        manufacturing: &ManufacturingSnapshot,
10122        banking: &BankingSnapshot,
10123        audit: &AuditSnapshot,
10124        financial_reporting: &FinancialReportingSnapshot,
10125    ) -> SynthResult<OcpmSnapshot> {
10126        let total_chains = flows.p2p_chains.len()
10127            + flows.o2c_chains.len()
10128            + sourcing.sourcing_projects.len()
10129            + hr.payroll_runs.len()
10130            + manufacturing.production_orders.len()
10131            + banking.customers.len()
10132            + audit.engagements.len()
10133            + financial_reporting.bank_reconciliations.len();
10134        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10135
10136        // Create OCPM event log with standard types
10137        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10138        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10139
10140        // Configure the OCPM generator
10141        let ocpm_config = OcpmGeneratorConfig {
10142            generate_p2p: true,
10143            generate_o2c: true,
10144            generate_s2c: !sourcing.sourcing_projects.is_empty(),
10145            generate_h2r: !hr.payroll_runs.is_empty(),
10146            generate_mfg: !manufacturing.production_orders.is_empty(),
10147            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10148            generate_bank: !banking.customers.is_empty(),
10149            generate_audit: !audit.engagements.is_empty(),
10150            happy_path_rate: 0.75,
10151            exception_path_rate: 0.20,
10152            error_path_rate: 0.05,
10153            add_duration_variability: true,
10154            duration_std_dev_factor: 0.3,
10155        };
10156        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10157        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10158
10159        // Get available users for resource assignment
10160        let available_users: Vec<String> = self
10161            .master_data
10162            .employees
10163            .iter()
10164            .take(20)
10165            .map(|e| e.user_id.clone())
10166            .collect();
10167
10168        // Deterministic base date from config (avoids Utc::now() non-determinism)
10169        let fallback_date =
10170            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10171        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10172            .unwrap_or(fallback_date);
10173        let base_midnight = base_date
10174            .and_hms_opt(0, 0, 0)
10175            .expect("midnight is always valid");
10176        let base_datetime =
10177            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10178
10179        // Helper closure to add case results to event log
10180        let add_result = |event_log: &mut OcpmEventLog,
10181                          result: datasynth_ocpm::CaseGenerationResult| {
10182            for event in result.events {
10183                event_log.add_event(event);
10184            }
10185            for object in result.objects {
10186                event_log.add_object(object);
10187            }
10188            for relationship in result.relationships {
10189                event_log.add_relationship(relationship);
10190            }
10191            for corr in result.correlation_events {
10192                event_log.add_correlation_event(corr);
10193            }
10194            event_log.add_case(result.case_trace);
10195        };
10196
10197        // Generate events from P2P chains
10198        for chain in &flows.p2p_chains {
10199            let po = &chain.purchase_order;
10200            let documents = P2pDocuments::new(
10201                &po.header.document_id,
10202                &po.vendor_id,
10203                &po.header.company_code,
10204                po.total_net_amount,
10205                &po.header.currency,
10206                &ocpm_uuid_factory,
10207            )
10208            .with_goods_receipt(
10209                chain
10210                    .goods_receipts
10211                    .first()
10212                    .map(|gr| gr.header.document_id.as_str())
10213                    .unwrap_or(""),
10214                &ocpm_uuid_factory,
10215            )
10216            .with_invoice(
10217                chain
10218                    .vendor_invoice
10219                    .as_ref()
10220                    .map(|vi| vi.header.document_id.as_str())
10221                    .unwrap_or(""),
10222                &ocpm_uuid_factory,
10223            )
10224            .with_payment(
10225                chain
10226                    .payment
10227                    .as_ref()
10228                    .map(|p| p.header.document_id.as_str())
10229                    .unwrap_or(""),
10230                &ocpm_uuid_factory,
10231            );
10232
10233            let start_time =
10234                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10235            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10236            add_result(&mut event_log, result);
10237
10238            if let Some(pb) = &pb {
10239                pb.inc(1);
10240            }
10241        }
10242
10243        // Generate events from O2C chains
10244        for chain in &flows.o2c_chains {
10245            let so = &chain.sales_order;
10246            let documents = O2cDocuments::new(
10247                &so.header.document_id,
10248                &so.customer_id,
10249                &so.header.company_code,
10250                so.total_net_amount,
10251                &so.header.currency,
10252                &ocpm_uuid_factory,
10253            )
10254            .with_delivery(
10255                chain
10256                    .deliveries
10257                    .first()
10258                    .map(|d| d.header.document_id.as_str())
10259                    .unwrap_or(""),
10260                &ocpm_uuid_factory,
10261            )
10262            .with_invoice(
10263                chain
10264                    .customer_invoice
10265                    .as_ref()
10266                    .map(|ci| ci.header.document_id.as_str())
10267                    .unwrap_or(""),
10268                &ocpm_uuid_factory,
10269            )
10270            .with_receipt(
10271                chain
10272                    .customer_receipt
10273                    .as_ref()
10274                    .map(|r| r.header.document_id.as_str())
10275                    .unwrap_or(""),
10276                &ocpm_uuid_factory,
10277            );
10278
10279            let start_time =
10280                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10281            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10282            add_result(&mut event_log, result);
10283
10284            if let Some(pb) = &pb {
10285                pb.inc(1);
10286            }
10287        }
10288
10289        // Generate events from S2C sourcing projects
10290        for project in &sourcing.sourcing_projects {
10291            // Find vendor from contracts or qualifications
10292            let vendor_id = sourcing
10293                .contracts
10294                .iter()
10295                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10296                .map(|c| c.vendor_id.clone())
10297                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10298                .or_else(|| {
10299                    self.master_data
10300                        .vendors
10301                        .first()
10302                        .map(|v| v.vendor_id.clone())
10303                })
10304                .unwrap_or_else(|| "V000".to_string());
10305            let mut docs = S2cDocuments::new(
10306                &project.project_id,
10307                &vendor_id,
10308                &project.company_code,
10309                project.estimated_annual_spend,
10310                &ocpm_uuid_factory,
10311            );
10312            // Link RFx if available
10313            if let Some(rfx) = sourcing
10314                .rfx_events
10315                .iter()
10316                .find(|r| r.sourcing_project_id == project.project_id)
10317            {
10318                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10319                // Link winning bid (status == Accepted)
10320                if let Some(bid) = sourcing.bids.iter().find(|b| {
10321                    b.rfx_id == rfx.rfx_id
10322                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10323                }) {
10324                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10325                }
10326            }
10327            // Link contract
10328            if let Some(contract) = sourcing
10329                .contracts
10330                .iter()
10331                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10332            {
10333                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10334            }
10335            let start_time = base_datetime - chrono::Duration::days(90);
10336            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10337            add_result(&mut event_log, result);
10338
10339            if let Some(pb) = &pb {
10340                pb.inc(1);
10341            }
10342        }
10343
10344        // Generate events from H2R payroll runs
10345        for run in &hr.payroll_runs {
10346            // Use first matching payroll line item's employee, or fallback
10347            let employee_id = hr
10348                .payroll_line_items
10349                .iter()
10350                .find(|li| li.payroll_id == run.payroll_id)
10351                .map(|li| li.employee_id.as_str())
10352                .unwrap_or("EMP000");
10353            let docs = H2rDocuments::new(
10354                &run.payroll_id,
10355                employee_id,
10356                &run.company_code,
10357                run.total_gross,
10358                &ocpm_uuid_factory,
10359            )
10360            .with_time_entries(
10361                hr.time_entries
10362                    .iter()
10363                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10364                    .take(5)
10365                    .map(|t| t.entry_id.as_str())
10366                    .collect(),
10367            );
10368            let start_time = base_datetime - chrono::Duration::days(30);
10369            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10370            add_result(&mut event_log, result);
10371
10372            if let Some(pb) = &pb {
10373                pb.inc(1);
10374            }
10375        }
10376
10377        // Generate events from MFG production orders
10378        for order in &manufacturing.production_orders {
10379            let mut docs = MfgDocuments::new(
10380                &order.order_id,
10381                &order.material_id,
10382                &order.company_code,
10383                order.planned_quantity,
10384                &ocpm_uuid_factory,
10385            )
10386            .with_operations(
10387                order
10388                    .operations
10389                    .iter()
10390                    .map(|o| format!("OP-{:04}", o.operation_number))
10391                    .collect::<Vec<_>>()
10392                    .iter()
10393                    .map(std::string::String::as_str)
10394                    .collect(),
10395            );
10396            // Link quality inspection if available (via reference_id matching order_id)
10397            if let Some(insp) = manufacturing
10398                .quality_inspections
10399                .iter()
10400                .find(|i| i.reference_id == order.order_id)
10401            {
10402                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10403            }
10404            // Link cycle count if available (match by material_id in items)
10405            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10406                cc.items
10407                    .iter()
10408                    .any(|item| item.material_id == order.material_id)
10409            }) {
10410                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10411            }
10412            let start_time = base_datetime - chrono::Duration::days(60);
10413            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10414            add_result(&mut event_log, result);
10415
10416            if let Some(pb) = &pb {
10417                pb.inc(1);
10418            }
10419        }
10420
10421        // Generate events from Banking customers
10422        for customer in &banking.customers {
10423            let customer_id_str = customer.customer_id.to_string();
10424            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10425            // Link accounts (primary_owner_id matches customer_id)
10426            if let Some(account) = banking
10427                .accounts
10428                .iter()
10429                .find(|a| a.primary_owner_id == customer.customer_id)
10430            {
10431                let account_id_str = account.account_id.to_string();
10432                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10433                // Link transactions for this account
10434                let txn_strs: Vec<String> = banking
10435                    .transactions
10436                    .iter()
10437                    .filter(|t| t.account_id == account.account_id)
10438                    .take(10)
10439                    .map(|t| t.transaction_id.to_string())
10440                    .collect();
10441                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10442                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10443                    .transactions
10444                    .iter()
10445                    .filter(|t| t.account_id == account.account_id)
10446                    .take(10)
10447                    .map(|t| t.amount)
10448                    .collect();
10449                if !txn_ids.is_empty() {
10450                    docs = docs.with_transactions(txn_ids, txn_amounts);
10451                }
10452            }
10453            let start_time = base_datetime - chrono::Duration::days(180);
10454            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10455            add_result(&mut event_log, result);
10456
10457            if let Some(pb) = &pb {
10458                pb.inc(1);
10459            }
10460        }
10461
10462        // Generate events from Audit engagements
10463        for engagement in &audit.engagements {
10464            let engagement_id_str = engagement.engagement_id.to_string();
10465            let docs = AuditDocuments::new(
10466                &engagement_id_str,
10467                &engagement.client_entity_id,
10468                &ocpm_uuid_factory,
10469            )
10470            .with_workpapers(
10471                audit
10472                    .workpapers
10473                    .iter()
10474                    .filter(|w| w.engagement_id == engagement.engagement_id)
10475                    .take(10)
10476                    .map(|w| w.workpaper_id.to_string())
10477                    .collect::<Vec<_>>()
10478                    .iter()
10479                    .map(std::string::String::as_str)
10480                    .collect(),
10481            )
10482            .with_evidence(
10483                audit
10484                    .evidence
10485                    .iter()
10486                    .filter(|e| e.engagement_id == engagement.engagement_id)
10487                    .take(10)
10488                    .map(|e| e.evidence_id.to_string())
10489                    .collect::<Vec<_>>()
10490                    .iter()
10491                    .map(std::string::String::as_str)
10492                    .collect(),
10493            )
10494            .with_risks(
10495                audit
10496                    .risk_assessments
10497                    .iter()
10498                    .filter(|r| r.engagement_id == engagement.engagement_id)
10499                    .take(5)
10500                    .map(|r| r.risk_id.to_string())
10501                    .collect::<Vec<_>>()
10502                    .iter()
10503                    .map(std::string::String::as_str)
10504                    .collect(),
10505            )
10506            .with_findings(
10507                audit
10508                    .findings
10509                    .iter()
10510                    .filter(|f| f.engagement_id == engagement.engagement_id)
10511                    .take(5)
10512                    .map(|f| f.finding_id.to_string())
10513                    .collect::<Vec<_>>()
10514                    .iter()
10515                    .map(std::string::String::as_str)
10516                    .collect(),
10517            )
10518            .with_judgments(
10519                audit
10520                    .judgments
10521                    .iter()
10522                    .filter(|j| j.engagement_id == engagement.engagement_id)
10523                    .take(5)
10524                    .map(|j| j.judgment_id.to_string())
10525                    .collect::<Vec<_>>()
10526                    .iter()
10527                    .map(std::string::String::as_str)
10528                    .collect(),
10529            );
10530            let start_time = base_datetime - chrono::Duration::days(120);
10531            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10532            add_result(&mut event_log, result);
10533
10534            if let Some(pb) = &pb {
10535                pb.inc(1);
10536            }
10537        }
10538
10539        // Generate events from Bank Reconciliations
10540        for recon in &financial_reporting.bank_reconciliations {
10541            let docs = BankReconDocuments::new(
10542                &recon.reconciliation_id,
10543                &recon.bank_account_id,
10544                &recon.company_code,
10545                recon.bank_ending_balance,
10546                &ocpm_uuid_factory,
10547            )
10548            .with_statement_lines(
10549                recon
10550                    .statement_lines
10551                    .iter()
10552                    .take(20)
10553                    .map(|l| l.line_id.as_str())
10554                    .collect(),
10555            )
10556            .with_reconciling_items(
10557                recon
10558                    .reconciling_items
10559                    .iter()
10560                    .take(10)
10561                    .map(|i| i.item_id.as_str())
10562                    .collect(),
10563            );
10564            let start_time = base_datetime - chrono::Duration::days(30);
10565            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10566            add_result(&mut event_log, result);
10567
10568            if let Some(pb) = &pb {
10569                pb.inc(1);
10570            }
10571        }
10572
10573        // Compute process variants
10574        event_log.compute_variants();
10575
10576        let summary = event_log.summary();
10577
10578        if let Some(pb) = pb {
10579            pb.finish_with_message(format!(
10580                "Generated {} OCPM events, {} objects",
10581                summary.event_count, summary.object_count
10582            ));
10583        }
10584
10585        Ok(OcpmSnapshot {
10586            event_count: summary.event_count,
10587            object_count: summary.object_count,
10588            case_count: summary.case_count,
10589            event_log: Some(event_log),
10590        })
10591    }
10592
10593    /// Inject anomalies into journal entries.
10594    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10595        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10596
10597        // Read anomaly rates from config instead of using hardcoded values.
10598        // Priority: anomaly_injection config > fraud config > default 0.02
10599        let total_rate = if self.config.anomaly_injection.enabled {
10600            self.config.anomaly_injection.rates.total_rate
10601        } else if self.config.fraud.enabled {
10602            self.config.fraud.fraud_rate
10603        } else {
10604            0.02
10605        };
10606
10607        let fraud_rate = if self.config.anomaly_injection.enabled {
10608            self.config.anomaly_injection.rates.fraud_rate
10609        } else {
10610            AnomalyRateConfig::default().fraud_rate
10611        };
10612
10613        let error_rate = if self.config.anomaly_injection.enabled {
10614            self.config.anomaly_injection.rates.error_rate
10615        } else {
10616            AnomalyRateConfig::default().error_rate
10617        };
10618
10619        let process_issue_rate = if self.config.anomaly_injection.enabled {
10620            self.config.anomaly_injection.rates.process_rate
10621        } else {
10622            AnomalyRateConfig::default().process_issue_rate
10623        };
10624
10625        let anomaly_config = AnomalyInjectorConfig {
10626            rates: AnomalyRateConfig {
10627                total_rate,
10628                fraud_rate,
10629                error_rate,
10630                process_issue_rate,
10631                ..Default::default()
10632            },
10633            seed: self.seed + 5000,
10634            ..Default::default()
10635        };
10636
10637        let mut injector = AnomalyInjector::new(anomaly_config);
10638        let result = injector.process_entries(entries);
10639
10640        if let Some(pb) = &pb {
10641            pb.inc(entries.len() as u64);
10642            pb.finish_with_message("Anomaly injection complete");
10643        }
10644
10645        let mut by_type = HashMap::new();
10646        for label in &result.labels {
10647            *by_type
10648                .entry(format!("{:?}", label.anomaly_type))
10649                .or_insert(0) += 1;
10650        }
10651
10652        Ok(AnomalyLabels {
10653            labels: result.labels,
10654            summary: Some(result.summary),
10655            by_type,
10656        })
10657    }
10658
10659    /// Validate journal entries using running balance tracker.
10660    ///
10661    /// Applies all entries to the balance tracker and validates:
10662    /// - Each entry is internally balanced (debits = credits)
10663    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10664    ///
10665    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10666    /// excluded from balance validation as they may be intentionally unbalanced.
10667    fn validate_journal_entries(
10668        &mut self,
10669        entries: &[JournalEntry],
10670    ) -> SynthResult<BalanceValidationResult> {
10671        // Filter out entries with human errors as they may be intentionally unbalanced
10672        let clean_entries: Vec<&JournalEntry> = entries
10673            .iter()
10674            .filter(|e| {
10675                e.header
10676                    .header_text
10677                    .as_ref()
10678                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10679                    .unwrap_or(true)
10680            })
10681            .collect();
10682
10683        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10684
10685        // Configure tracker to not fail on errors (collect them instead)
10686        let config = BalanceTrackerConfig {
10687            validate_on_each_entry: false,   // We'll validate at the end
10688            track_history: false,            // Skip history for performance
10689            fail_on_validation_error: false, // Collect errors, don't fail
10690            ..Default::default()
10691        };
10692        let validation_currency = self
10693            .config
10694            .companies
10695            .first()
10696            .map(|c| c.currency.clone())
10697            .unwrap_or_else(|| "USD".to_string());
10698
10699        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10700
10701        // Apply clean entries (without human errors)
10702        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10703        let errors = tracker.apply_entries(&clean_refs);
10704
10705        if let Some(pb) = &pb {
10706            pb.inc(entries.len() as u64);
10707        }
10708
10709        // Check if any entries were unbalanced
10710        // Note: When fail_on_validation_error is false, errors are stored in tracker
10711        let has_unbalanced = tracker
10712            .get_validation_errors()
10713            .iter()
10714            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10715
10716        // Validate balance sheet for each company
10717        // Include both returned errors and collected validation errors
10718        let mut all_errors = errors;
10719        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10720        let company_codes: Vec<String> = self
10721            .config
10722            .companies
10723            .iter()
10724            .map(|c| c.code.clone())
10725            .collect();
10726
10727        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10729            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10730
10731        for company_code in &company_codes {
10732            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10733                all_errors.push(e);
10734            }
10735        }
10736
10737        // Get statistics after all mutable operations are done
10738        let stats = tracker.get_statistics();
10739
10740        // Determine if balanced overall
10741        let is_balanced = all_errors.is_empty();
10742
10743        if let Some(pb) = pb {
10744            let msg = if is_balanced {
10745                "Balance validation passed"
10746            } else {
10747                "Balance validation completed with errors"
10748            };
10749            pb.finish_with_message(msg);
10750        }
10751
10752        Ok(BalanceValidationResult {
10753            validated: true,
10754            is_balanced,
10755            entries_processed: stats.entries_processed,
10756            total_debits: stats.total_debits,
10757            total_credits: stats.total_credits,
10758            accounts_tracked: stats.accounts_tracked,
10759            companies_tracked: stats.companies_tracked,
10760            validation_errors: all_errors,
10761            has_unbalanced_entries: has_unbalanced,
10762        })
10763    }
10764
10765    /// Inject data quality variations into journal entries.
10766    ///
10767    /// Applies typos, missing values, and format variations to make
10768    /// the synthetic data more realistic for testing data cleaning pipelines.
10769    fn inject_data_quality(
10770        &mut self,
10771        entries: &mut [JournalEntry],
10772    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10773        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10774
10775        // Build config from user-specified schema settings when data_quality is enabled;
10776        // otherwise fall back to the low-rate minimal() preset.
10777        let config = if self.config.data_quality.enabled {
10778            let dq = &self.config.data_quality;
10779            DataQualityConfig {
10780                enable_missing_values: dq.missing_values.enabled,
10781                missing_values: datasynth_generators::MissingValueConfig {
10782                    global_rate: dq.effective_missing_rate(),
10783                    ..Default::default()
10784                },
10785                enable_format_variations: dq.format_variations.enabled,
10786                format_variations: datasynth_generators::FormatVariationConfig {
10787                    date_variation_rate: dq.format_variations.dates.rate,
10788                    amount_variation_rate: dq.format_variations.amounts.rate,
10789                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10790                    ..Default::default()
10791                },
10792                enable_duplicates: dq.duplicates.enabled,
10793                duplicates: datasynth_generators::DuplicateConfig {
10794                    duplicate_rate: dq.effective_duplicate_rate(),
10795                    ..Default::default()
10796                },
10797                enable_typos: dq.typos.enabled,
10798                typos: datasynth_generators::TypoConfig {
10799                    char_error_rate: dq.effective_typo_rate(),
10800                    ..Default::default()
10801                },
10802                enable_encoding_issues: dq.encoding_issues.enabled,
10803                encoding_issue_rate: dq.encoding_issues.rate,
10804                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10805                track_statistics: true,
10806            }
10807        } else {
10808            DataQualityConfig::minimal()
10809        };
10810        let mut injector = DataQualityInjector::new(config);
10811
10812        // Wire country pack for locale-aware format baselines
10813        injector.set_country_pack(self.primary_pack().clone());
10814
10815        // Build context for missing value decisions
10816        let context = HashMap::new();
10817
10818        for entry in entries.iter_mut() {
10819            // Process header_text field (common target for typos)
10820            if let Some(text) = &entry.header.header_text {
10821                let processed = injector.process_text_field(
10822                    "header_text",
10823                    text,
10824                    &entry.header.document_id.to_string(),
10825                    &context,
10826                );
10827                match processed {
10828                    Some(new_text) if new_text != *text => {
10829                        entry.header.header_text = Some(new_text);
10830                    }
10831                    None => {
10832                        entry.header.header_text = None; // Missing value
10833                    }
10834                    _ => {}
10835                }
10836            }
10837
10838            // Process reference field
10839            if let Some(ref_text) = &entry.header.reference {
10840                let processed = injector.process_text_field(
10841                    "reference",
10842                    ref_text,
10843                    &entry.header.document_id.to_string(),
10844                    &context,
10845                );
10846                match processed {
10847                    Some(new_text) if new_text != *ref_text => {
10848                        entry.header.reference = Some(new_text);
10849                    }
10850                    None => {
10851                        entry.header.reference = None;
10852                    }
10853                    _ => {}
10854                }
10855            }
10856
10857            // Process user_persona field (potential for typos in user IDs)
10858            let user_persona = entry.header.user_persona.clone();
10859            if let Some(processed) = injector.process_text_field(
10860                "user_persona",
10861                &user_persona,
10862                &entry.header.document_id.to_string(),
10863                &context,
10864            ) {
10865                if processed != user_persona {
10866                    entry.header.user_persona = processed;
10867                }
10868            }
10869
10870            // Process line items
10871            for line in &mut entry.lines {
10872                // Process line description if present
10873                if let Some(ref text) = line.line_text {
10874                    let processed = injector.process_text_field(
10875                        "line_text",
10876                        text,
10877                        &entry.header.document_id.to_string(),
10878                        &context,
10879                    );
10880                    match processed {
10881                        Some(new_text) if new_text != *text => {
10882                            line.line_text = Some(new_text);
10883                        }
10884                        None => {
10885                            line.line_text = None;
10886                        }
10887                        _ => {}
10888                    }
10889                }
10890
10891                // Process cost_center if present
10892                if let Some(cc) = &line.cost_center {
10893                    let processed = injector.process_text_field(
10894                        "cost_center",
10895                        cc,
10896                        &entry.header.document_id.to_string(),
10897                        &context,
10898                    );
10899                    match processed {
10900                        Some(new_cc) if new_cc != *cc => {
10901                            line.cost_center = Some(new_cc);
10902                        }
10903                        None => {
10904                            line.cost_center = None;
10905                        }
10906                        _ => {}
10907                    }
10908                }
10909            }
10910
10911            if let Some(pb) = &pb {
10912                pb.inc(1);
10913            }
10914        }
10915
10916        if let Some(pb) = pb {
10917            pb.finish_with_message("Data quality injection complete");
10918        }
10919
10920        let quality_issues = injector.issues().to_vec();
10921        Ok((injector.stats().clone(), quality_issues))
10922    }
10923
10924    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10925    ///
10926    /// Creates complete audit documentation for each company in the configuration,
10927    /// following ISA standards:
10928    /// - ISA 210/220: Engagement acceptance and terms
10929    /// - ISA 230: Audit documentation (workpapers)
10930    /// - ISA 265: Control deficiencies (findings)
10931    /// - ISA 315/330: Risk assessment and response
10932    /// - ISA 500: Audit evidence
10933    /// - ISA 200: Professional judgment
10934    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10935        // Check if FSM-driven audit generation is enabled
10936        let use_fsm = self
10937            .config
10938            .audit
10939            .fsm
10940            .as_ref()
10941            .map(|f| f.enabled)
10942            .unwrap_or(false);
10943
10944        if use_fsm {
10945            return self.generate_audit_data_with_fsm(entries);
10946        }
10947
10948        // --- Legacy (non-FSM) audit generation follows ---
10949        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10950            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10951        let fiscal_year = start_date.year() as u16;
10952        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10953
10954        // Calculate rough total revenue from entries for materiality
10955        let total_revenue: rust_decimal::Decimal = entries
10956            .iter()
10957            .flat_map(|e| e.lines.iter())
10958            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10959            .map(|l| l.credit_amount)
10960            .sum();
10961
10962        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10963        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10964
10965        let mut snapshot = AuditSnapshot::default();
10966
10967        // Initialize generators
10968        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10969        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10970        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10971        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10972        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10973        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10974        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10975        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10976        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10977        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10978        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10979        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10980
10981        // Get list of accounts from CoA for risk assessment
10982        let accounts: Vec<String> = self
10983            .coa
10984            .as_ref()
10985            .map(|coa| {
10986                coa.get_postable_accounts()
10987                    .iter()
10988                    .map(|acc| acc.account_code().to_string())
10989                    .collect()
10990            })
10991            .unwrap_or_default();
10992
10993        // Generate engagements for each company
10994        for (i, company) in self.config.companies.iter().enumerate() {
10995            // Calculate company-specific revenue (proportional to volume weight)
10996            let company_revenue = total_revenue
10997                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10998
10999            // Generate engagements for this company
11000            let engagements_for_company =
11001                self.phase_config.audit_engagements / self.config.companies.len().max(1);
11002            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11003                1
11004            } else {
11005                0
11006            };
11007
11008            for _eng_idx in 0..(engagements_for_company + extra) {
11009                // Generate the engagement
11010                let mut engagement = engagement_gen.generate_engagement(
11011                    &company.code,
11012                    &company.name,
11013                    fiscal_year,
11014                    period_end,
11015                    company_revenue,
11016                    None, // Use default engagement type
11017                );
11018
11019                // Replace synthetic team IDs with real employee IDs from master data
11020                if !self.master_data.employees.is_empty() {
11021                    let emp_count = self.master_data.employees.len();
11022                    // Use employee IDs deterministically based on engagement index
11023                    let base = (i * 10 + _eng_idx) % emp_count;
11024                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11025                        .employee_id
11026                        .clone();
11027                    engagement.engagement_manager_id = self.master_data.employees
11028                        [(base + 1) % emp_count]
11029                        .employee_id
11030                        .clone();
11031                    let real_team: Vec<String> = engagement
11032                        .team_member_ids
11033                        .iter()
11034                        .enumerate()
11035                        .map(|(j, _)| {
11036                            self.master_data.employees[(base + 2 + j) % emp_count]
11037                                .employee_id
11038                                .clone()
11039                        })
11040                        .collect();
11041                    engagement.team_member_ids = real_team;
11042                }
11043
11044                if let Some(pb) = &pb {
11045                    pb.inc(1);
11046                }
11047
11048                // Get team members from the engagement
11049                let team_members: Vec<String> = engagement.team_member_ids.clone();
11050
11051                // Generate workpapers for the engagement
11052                let workpapers =
11053                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
11054
11055                for wp in &workpapers {
11056                    if let Some(pb) = &pb {
11057                        pb.inc(1);
11058                    }
11059
11060                    // Generate evidence for each workpaper
11061                    let evidence = evidence_gen.generate_evidence_for_workpaper(
11062                        wp,
11063                        &team_members,
11064                        wp.preparer_date,
11065                    );
11066
11067                    for _ in &evidence {
11068                        if let Some(pb) = &pb {
11069                            pb.inc(1);
11070                        }
11071                    }
11072
11073                    snapshot.evidence.extend(evidence);
11074                }
11075
11076                // Generate risk assessments for the engagement
11077                let risks =
11078                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11079
11080                for _ in &risks {
11081                    if let Some(pb) = &pb {
11082                        pb.inc(1);
11083                    }
11084                }
11085                snapshot.risk_assessments.extend(risks);
11086
11087                // Generate findings for the engagement
11088                let findings = finding_gen.generate_findings_for_engagement(
11089                    &engagement,
11090                    &workpapers,
11091                    &team_members,
11092                );
11093
11094                for _ in &findings {
11095                    if let Some(pb) = &pb {
11096                        pb.inc(1);
11097                    }
11098                }
11099                snapshot.findings.extend(findings);
11100
11101                // Generate professional judgments for the engagement
11102                let judgments =
11103                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11104
11105                for _ in &judgments {
11106                    if let Some(pb) = &pb {
11107                        pb.inc(1);
11108                    }
11109                }
11110                snapshot.judgments.extend(judgments);
11111
11112                // ISA 505: External confirmations and responses
11113                let (confs, resps) =
11114                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11115                snapshot.confirmations.extend(confs);
11116                snapshot.confirmation_responses.extend(resps);
11117
11118                // ISA 330: Procedure steps per workpaper
11119                let team_pairs: Vec<(String, String)> = team_members
11120                    .iter()
11121                    .map(|id| {
11122                        let name = self
11123                            .master_data
11124                            .employees
11125                            .iter()
11126                            .find(|e| e.employee_id == *id)
11127                            .map(|e| e.display_name.clone())
11128                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11129                        (id.clone(), name)
11130                    })
11131                    .collect();
11132                for wp in &workpapers {
11133                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11134                    snapshot.procedure_steps.extend(steps);
11135                }
11136
11137                // ISA 530: Samples per workpaper
11138                for wp in &workpapers {
11139                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11140                        snapshot.samples.push(sample);
11141                    }
11142                }
11143
11144                // ISA 520: Analytical procedures
11145                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11146                snapshot.analytical_results.extend(analytical);
11147
11148                // ISA 610: Internal audit function and reports
11149                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11150                snapshot.ia_functions.push(ia_func);
11151                snapshot.ia_reports.extend(ia_reports);
11152
11153                // ISA 550: Related parties and transactions
11154                let vendor_names: Vec<String> = self
11155                    .master_data
11156                    .vendors
11157                    .iter()
11158                    .map(|v| v.name.clone())
11159                    .collect();
11160                let customer_names: Vec<String> = self
11161                    .master_data
11162                    .customers
11163                    .iter()
11164                    .map(|c| c.name.clone())
11165                    .collect();
11166                let (parties, rp_txns) =
11167                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11168                snapshot.related_parties.extend(parties);
11169                snapshot.related_party_transactions.extend(rp_txns);
11170
11171                // Add workpapers after findings since findings need them
11172                snapshot.workpapers.extend(workpapers);
11173
11174                // Generate audit scope record for this engagement (one per engagement)
11175                {
11176                    let scope_id = format!(
11177                        "SCOPE-{}-{}",
11178                        engagement.engagement_id.simple(),
11179                        &engagement.client_entity_id
11180                    );
11181                    let scope = datasynth_core::models::audit::AuditScope::new(
11182                        scope_id.clone(),
11183                        engagement.engagement_id.to_string(),
11184                        engagement.client_entity_id.clone(),
11185                        engagement.materiality,
11186                    );
11187                    // Wire scope_id back to engagement
11188                    let mut eng = engagement;
11189                    eng.scope_id = Some(scope_id);
11190                    snapshot.audit_scopes.push(scope);
11191                    snapshot.engagements.push(eng);
11192                }
11193            }
11194        }
11195
11196        // ----------------------------------------------------------------
11197        // ISA 600: Group audit — component auditors, plan, instructions, reports
11198        // ----------------------------------------------------------------
11199        if self.config.companies.len() > 1 {
11200            // Use materiality from the first engagement if available, otherwise
11201            // derive a reasonable figure from total revenue.
11202            let group_materiality = snapshot
11203                .engagements
11204                .first()
11205                .map(|e| e.materiality)
11206                .unwrap_or_else(|| {
11207                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11208                    total_revenue * pct
11209                });
11210
11211            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11212            let group_engagement_id = snapshot
11213                .engagements
11214                .first()
11215                .map(|e| e.engagement_id.to_string())
11216                .unwrap_or_else(|| "GROUP-ENG".to_string());
11217
11218            let component_snapshot = component_gen.generate(
11219                &self.config.companies,
11220                group_materiality,
11221                &group_engagement_id,
11222                period_end,
11223            );
11224
11225            snapshot.component_auditors = component_snapshot.component_auditors;
11226            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11227            snapshot.component_instructions = component_snapshot.component_instructions;
11228            snapshot.component_reports = component_snapshot.component_reports;
11229
11230            info!(
11231                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11232                snapshot.component_auditors.len(),
11233                snapshot.component_instructions.len(),
11234                snapshot.component_reports.len(),
11235            );
11236        }
11237
11238        // ----------------------------------------------------------------
11239        // ISA 210: Engagement letters — one per engagement
11240        // ----------------------------------------------------------------
11241        {
11242            let applicable_framework = self
11243                .config
11244                .accounting_standards
11245                .framework
11246                .as_ref()
11247                .map(|f| format!("{f:?}"))
11248                .unwrap_or_else(|| "IFRS".to_string());
11249
11250            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11251            let entity_count = self.config.companies.len();
11252
11253            for engagement in &snapshot.engagements {
11254                let company = self
11255                    .config
11256                    .companies
11257                    .iter()
11258                    .find(|c| c.code == engagement.client_entity_id);
11259                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11260                let letter_date = engagement.planning_start;
11261                let letter = letter_gen.generate(
11262                    &engagement.engagement_id.to_string(),
11263                    &engagement.client_name,
11264                    entity_count,
11265                    engagement.period_end_date,
11266                    currency,
11267                    &applicable_framework,
11268                    letter_date,
11269                );
11270                snapshot.engagement_letters.push(letter);
11271            }
11272
11273            info!(
11274                "ISA 210 engagement letters: {} generated",
11275                snapshot.engagement_letters.len()
11276            );
11277        }
11278
11279        // ----------------------------------------------------------------
11280        // ISA 560 / IAS 10: Subsequent events
11281        // ----------------------------------------------------------------
11282        {
11283            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11284            let entity_codes: Vec<String> = self
11285                .config
11286                .companies
11287                .iter()
11288                .map(|c| c.code.clone())
11289                .collect();
11290            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11291            info!(
11292                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11293                subsequent.len(),
11294                subsequent
11295                    .iter()
11296                    .filter(|e| matches!(
11297                        e.classification,
11298                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11299                    ))
11300                    .count(),
11301                subsequent
11302                    .iter()
11303                    .filter(|e| matches!(
11304                        e.classification,
11305                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11306                    ))
11307                    .count(),
11308            );
11309            snapshot.subsequent_events = subsequent;
11310        }
11311
11312        // ----------------------------------------------------------------
11313        // ISA 402: Service organization controls
11314        // ----------------------------------------------------------------
11315        {
11316            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11317            let entity_codes: Vec<String> = self
11318                .config
11319                .companies
11320                .iter()
11321                .map(|c| c.code.clone())
11322                .collect();
11323            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11324            info!(
11325                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11326                soc_snapshot.service_organizations.len(),
11327                soc_snapshot.soc_reports.len(),
11328                soc_snapshot.user_entity_controls.len(),
11329            );
11330            snapshot.service_organizations = soc_snapshot.service_organizations;
11331            snapshot.soc_reports = soc_snapshot.soc_reports;
11332            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11333        }
11334
11335        // ----------------------------------------------------------------
11336        // ISA 570: Going concern assessments
11337        // ----------------------------------------------------------------
11338        {
11339            use datasynth_generators::audit::going_concern_generator::{
11340                GoingConcernGenerator, GoingConcernInput,
11341            };
11342            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11343            let entity_codes: Vec<String> = self
11344                .config
11345                .companies
11346                .iter()
11347                .map(|c| c.code.clone())
11348                .collect();
11349            // Assessment date = period end + 75 days (typical sign-off window).
11350            let assessment_date = period_end + chrono::Duration::days(75);
11351            let period_label = format!("FY{}", period_end.year());
11352
11353            // Build financial inputs from actual journal entries.
11354            //
11355            // We derive approximate P&L, working capital, and operating cash flow
11356            // by aggregating GL account balances from the journal entry population.
11357            // Account ranges used (standard chart):
11358            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11359            //   Expenses:        6xxx (debit-normal)
11360            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11361            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11362            //   Operating CF:    net income adjusted for D&A (rough proxy)
11363            let gc_inputs: Vec<GoingConcernInput> = self
11364                .config
11365                .companies
11366                .iter()
11367                .map(|company| {
11368                    let code = &company.code;
11369                    let mut revenue = rust_decimal::Decimal::ZERO;
11370                    let mut expenses = rust_decimal::Decimal::ZERO;
11371                    let mut current_assets = rust_decimal::Decimal::ZERO;
11372                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11373                    let mut total_debt = rust_decimal::Decimal::ZERO;
11374
11375                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11376                        for line in &je.lines {
11377                            let acct = line.gl_account.as_str();
11378                            let net = line.debit_amount - line.credit_amount;
11379                            if acct.starts_with('4') {
11380                                // Revenue accounts: credit-normal, so negative net = revenue earned
11381                                revenue -= net;
11382                            } else if acct.starts_with('6') {
11383                                // Expense accounts: debit-normal
11384                                expenses += net;
11385                            }
11386                            // Balance sheet accounts for working capital
11387                            if acct.starts_with('1') {
11388                                // Current asset accounts (1000–1499)
11389                                if let Ok(n) = acct.parse::<u32>() {
11390                                    if (1000..=1499).contains(&n) {
11391                                        current_assets += net;
11392                                    }
11393                                }
11394                            } else if acct.starts_with('2') {
11395                                if let Ok(n) = acct.parse::<u32>() {
11396                                    if (2000..=2499).contains(&n) {
11397                                        // Current liabilities
11398                                        current_liabs -= net; // credit-normal
11399                                    } else if (2500..=2999).contains(&n) {
11400                                        // Long-term debt
11401                                        total_debt -= net;
11402                                    }
11403                                }
11404                            }
11405                        }
11406                    }
11407
11408                    let net_income = revenue - expenses;
11409                    let working_capital = current_assets - current_liabs;
11410                    // Rough operating CF proxy: net income (full accrual CF calculation
11411                    // is done separately in the cash flow statement generator)
11412                    let operating_cash_flow = net_income;
11413
11414                    GoingConcernInput {
11415                        entity_code: code.clone(),
11416                        net_income,
11417                        working_capital,
11418                        operating_cash_flow,
11419                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11420                        assessment_date,
11421                    }
11422                })
11423                .collect();
11424
11425            let assessments = if gc_inputs.is_empty() {
11426                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11427            } else {
11428                gc_gen.generate_for_entities_with_inputs(
11429                    &entity_codes,
11430                    &gc_inputs,
11431                    assessment_date,
11432                    &period_label,
11433                )
11434            };
11435            info!(
11436                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11437                assessments.len(),
11438                assessments.iter().filter(|a| matches!(
11439                    a.auditor_conclusion,
11440                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11441                )).count(),
11442                assessments.iter().filter(|a| matches!(
11443                    a.auditor_conclusion,
11444                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11445                )).count(),
11446                assessments.iter().filter(|a| matches!(
11447                    a.auditor_conclusion,
11448                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11449                )).count(),
11450            );
11451            snapshot.going_concern_assessments = assessments;
11452        }
11453
11454        // ----------------------------------------------------------------
11455        // ISA 540: Accounting estimates
11456        // ----------------------------------------------------------------
11457        {
11458            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11459            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11460            let entity_codes: Vec<String> = self
11461                .config
11462                .companies
11463                .iter()
11464                .map(|c| c.code.clone())
11465                .collect();
11466            let estimates = est_gen.generate_for_entities(&entity_codes);
11467            info!(
11468                "ISA 540 accounting estimates: {} estimates across {} entities \
11469                 ({} with retrospective reviews, {} with auditor point estimates)",
11470                estimates.len(),
11471                entity_codes.len(),
11472                estimates
11473                    .iter()
11474                    .filter(|e| e.retrospective_review.is_some())
11475                    .count(),
11476                estimates
11477                    .iter()
11478                    .filter(|e| e.auditor_point_estimate.is_some())
11479                    .count(),
11480            );
11481            snapshot.accounting_estimates = estimates;
11482        }
11483
11484        // ----------------------------------------------------------------
11485        // ISA 700/701/705/706: Audit opinions (one per engagement)
11486        // ----------------------------------------------------------------
11487        {
11488            use datasynth_generators::audit::audit_opinion_generator::{
11489                AuditOpinionGenerator, AuditOpinionInput,
11490            };
11491
11492            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11493
11494            // Build inputs — one per engagement, linking findings and going concern.
11495            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11496                .engagements
11497                .iter()
11498                .map(|eng| {
11499                    // Collect findings for this engagement.
11500                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11501                        .findings
11502                        .iter()
11503                        .filter(|f| f.engagement_id == eng.engagement_id)
11504                        .cloned()
11505                        .collect();
11506
11507                    // Going concern for this entity.
11508                    let gc = snapshot
11509                        .going_concern_assessments
11510                        .iter()
11511                        .find(|g| g.entity_code == eng.client_entity_id)
11512                        .cloned();
11513
11514                    // Component reports relevant to this engagement.
11515                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11516                        snapshot.component_reports.clone();
11517
11518                    let auditor = self
11519                        .master_data
11520                        .employees
11521                        .first()
11522                        .map(|e| e.display_name.clone())
11523                        .unwrap_or_else(|| "Global Audit LLP".into());
11524
11525                    let partner = self
11526                        .master_data
11527                        .employees
11528                        .get(1)
11529                        .map(|e| e.display_name.clone())
11530                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11531
11532                    AuditOpinionInput {
11533                        entity_code: eng.client_entity_id.clone(),
11534                        entity_name: eng.client_name.clone(),
11535                        engagement_id: eng.engagement_id,
11536                        period_end: eng.period_end_date,
11537                        findings: eng_findings,
11538                        going_concern: gc,
11539                        component_reports: comp_reports,
11540                        // Mark as US-listed when audit standards include PCAOB.
11541                        is_us_listed: {
11542                            let fw = &self.config.audit_standards.isa_compliance.framework;
11543                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11544                        },
11545                        auditor_name: auditor,
11546                        engagement_partner: partner,
11547                    }
11548                })
11549                .collect();
11550
11551            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11552
11553            for go in &generated_opinions {
11554                snapshot
11555                    .key_audit_matters
11556                    .extend(go.key_audit_matters.clone());
11557            }
11558            snapshot.audit_opinions = generated_opinions
11559                .into_iter()
11560                .map(|go| go.opinion)
11561                .collect();
11562
11563            info!(
11564                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11565                snapshot.audit_opinions.len(),
11566                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11567                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11568                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11569                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11570            );
11571        }
11572
11573        // ----------------------------------------------------------------
11574        // SOX 302 / 404 assessments
11575        // ----------------------------------------------------------------
11576        {
11577            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11578
11579            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11580
11581            for (i, company) in self.config.companies.iter().enumerate() {
11582                // Collect findings for this company's engagements.
11583                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11584                    .engagements
11585                    .iter()
11586                    .filter(|e| e.client_entity_id == company.code)
11587                    .map(|e| e.engagement_id)
11588                    .collect();
11589
11590                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11591                    .findings
11592                    .iter()
11593                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11594                    .cloned()
11595                    .collect();
11596
11597                // Derive executive names from employee list.
11598                let emp_count = self.master_data.employees.len();
11599                let ceo_name = if emp_count > 0 {
11600                    self.master_data.employees[i % emp_count]
11601                        .display_name
11602                        .clone()
11603                } else {
11604                    format!("CEO of {}", company.name)
11605                };
11606                let cfo_name = if emp_count > 1 {
11607                    self.master_data.employees[(i + 1) % emp_count]
11608                        .display_name
11609                        .clone()
11610                } else {
11611                    format!("CFO of {}", company.name)
11612                };
11613
11614                // Use engagement materiality if available.
11615                let materiality = snapshot
11616                    .engagements
11617                    .iter()
11618                    .find(|e| e.client_entity_id == company.code)
11619                    .map(|e| e.materiality)
11620                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11621
11622                let input = SoxGeneratorInput {
11623                    company_code: company.code.clone(),
11624                    company_name: company.name.clone(),
11625                    fiscal_year,
11626                    period_end,
11627                    findings: company_findings,
11628                    ceo_name,
11629                    cfo_name,
11630                    materiality_threshold: materiality,
11631                    revenue_percent: rust_decimal::Decimal::from(100),
11632                    assets_percent: rust_decimal::Decimal::from(100),
11633                    significant_accounts: vec![
11634                        "Revenue".into(),
11635                        "Accounts Receivable".into(),
11636                        "Inventory".into(),
11637                        "Fixed Assets".into(),
11638                        "Accounts Payable".into(),
11639                    ],
11640                };
11641
11642                let (certs, assessment) = sox_gen.generate(&input);
11643                snapshot.sox_302_certifications.extend(certs);
11644                snapshot.sox_404_assessments.push(assessment);
11645            }
11646
11647            info!(
11648                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11649                snapshot.sox_302_certifications.len(),
11650                snapshot.sox_404_assessments.len(),
11651                snapshot
11652                    .sox_404_assessments
11653                    .iter()
11654                    .filter(|a| a.icfr_effective)
11655                    .count(),
11656                snapshot
11657                    .sox_404_assessments
11658                    .iter()
11659                    .filter(|a| !a.icfr_effective)
11660                    .count(),
11661            );
11662        }
11663
11664        // ----------------------------------------------------------------
11665        // ISA 320: Materiality calculations (one per entity)
11666        // ----------------------------------------------------------------
11667        {
11668            use datasynth_generators::audit::materiality_generator::{
11669                MaterialityGenerator, MaterialityInput,
11670            };
11671
11672            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11673
11674            // Compute per-company financials from JEs.
11675            // Asset accounts start with '1', revenue with '4',
11676            // expense accounts with '5' or '6'.
11677            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11678
11679            for company in &self.config.companies {
11680                let company_code = company.code.clone();
11681
11682                // Revenue: credit-side entries on 4xxx accounts
11683                let company_revenue: rust_decimal::Decimal = entries
11684                    .iter()
11685                    .filter(|e| e.company_code() == company_code)
11686                    .flat_map(|e| e.lines.iter())
11687                    .filter(|l| l.account_code.starts_with('4'))
11688                    .map(|l| l.credit_amount)
11689                    .sum();
11690
11691                // Total assets: debit balances on 1xxx accounts
11692                let total_assets: rust_decimal::Decimal = entries
11693                    .iter()
11694                    .filter(|e| e.company_code() == company_code)
11695                    .flat_map(|e| e.lines.iter())
11696                    .filter(|l| l.account_code.starts_with('1'))
11697                    .map(|l| l.debit_amount)
11698                    .sum();
11699
11700                // Expenses: debit-side entries on 5xxx/6xxx accounts
11701                let total_expenses: rust_decimal::Decimal = entries
11702                    .iter()
11703                    .filter(|e| e.company_code() == company_code)
11704                    .flat_map(|e| e.lines.iter())
11705                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11706                    .map(|l| l.debit_amount)
11707                    .sum();
11708
11709                // Equity: credit balances on 3xxx accounts
11710                let equity: rust_decimal::Decimal = entries
11711                    .iter()
11712                    .filter(|e| e.company_code() == company_code)
11713                    .flat_map(|e| e.lines.iter())
11714                    .filter(|l| l.account_code.starts_with('3'))
11715                    .map(|l| l.credit_amount)
11716                    .sum();
11717
11718                let pretax_income = company_revenue - total_expenses;
11719
11720                // If no company-specific data, fall back to proportional share
11721                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11722                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11723                        .unwrap_or(rust_decimal::Decimal::ONE);
11724                    (
11725                        total_revenue * w,
11726                        total_revenue * w * rust_decimal::Decimal::from(3),
11727                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11728                        total_revenue * w * rust_decimal::Decimal::from(2),
11729                    )
11730                } else {
11731                    (company_revenue, total_assets, pretax_income, equity)
11732                };
11733
11734                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11735
11736                materiality_inputs.push(MaterialityInput {
11737                    entity_code: company_code,
11738                    period: format!("FY{}", fiscal_year),
11739                    revenue: rev,
11740                    pretax_income: pti,
11741                    total_assets: assets,
11742                    equity: eq,
11743                    gross_profit,
11744                });
11745            }
11746
11747            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11748
11749            info!(
11750                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11751                 {} total assets, {} equity benchmarks)",
11752                snapshot.materiality_calculations.len(),
11753                snapshot
11754                    .materiality_calculations
11755                    .iter()
11756                    .filter(|m| matches!(
11757                        m.benchmark,
11758                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11759                    ))
11760                    .count(),
11761                snapshot
11762                    .materiality_calculations
11763                    .iter()
11764                    .filter(|m| matches!(
11765                        m.benchmark,
11766                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11767                    ))
11768                    .count(),
11769                snapshot
11770                    .materiality_calculations
11771                    .iter()
11772                    .filter(|m| matches!(
11773                        m.benchmark,
11774                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11775                    ))
11776                    .count(),
11777                snapshot
11778                    .materiality_calculations
11779                    .iter()
11780                    .filter(|m| matches!(
11781                        m.benchmark,
11782                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11783                    ))
11784                    .count(),
11785            );
11786        }
11787
11788        // ----------------------------------------------------------------
11789        // ISA 315: Combined Risk Assessments (per entity, per account area)
11790        // ----------------------------------------------------------------
11791        {
11792            use datasynth_generators::audit::cra_generator::CraGenerator;
11793
11794            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11795
11796            // Build entity → scope_id map from already-generated scopes
11797            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11798                .audit_scopes
11799                .iter()
11800                .map(|s| (s.entity_code.clone(), s.id.clone()))
11801                .collect();
11802
11803            for company in &self.config.companies {
11804                let cras = cra_gen.generate_for_entity(&company.code, None);
11805                let scope_id = entity_scope_map.get(&company.code).cloned();
11806                let cras_with_scope: Vec<_> = cras
11807                    .into_iter()
11808                    .map(|mut cra| {
11809                        cra.scope_id = scope_id.clone();
11810                        cra
11811                    })
11812                    .collect();
11813                snapshot.combined_risk_assessments.extend(cras_with_scope);
11814            }
11815
11816            let significant_count = snapshot
11817                .combined_risk_assessments
11818                .iter()
11819                .filter(|c| c.significant_risk)
11820                .count();
11821            let high_cra_count = snapshot
11822                .combined_risk_assessments
11823                .iter()
11824                .filter(|c| {
11825                    matches!(
11826                        c.combined_risk,
11827                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11828                    )
11829                })
11830                .count();
11831
11832            info!(
11833                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11834                snapshot.combined_risk_assessments.len(),
11835                significant_count,
11836                high_cra_count,
11837            );
11838        }
11839
11840        // ----------------------------------------------------------------
11841        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11842        // ----------------------------------------------------------------
11843        {
11844            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11845
11846            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11847
11848            // Group CRAs by entity and use per-entity tolerable error from materiality
11849            for company in &self.config.companies {
11850                let entity_code = company.code.clone();
11851
11852                // Find tolerable error for this entity (= performance materiality)
11853                let tolerable_error = snapshot
11854                    .materiality_calculations
11855                    .iter()
11856                    .find(|m| m.entity_code == entity_code)
11857                    .map(|m| m.tolerable_error);
11858
11859                // Collect CRAs for this entity
11860                let entity_cras: Vec<_> = snapshot
11861                    .combined_risk_assessments
11862                    .iter()
11863                    .filter(|c| c.entity_code == entity_code)
11864                    .cloned()
11865                    .collect();
11866
11867                if !entity_cras.is_empty() {
11868                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11869                    snapshot.sampling_plans.extend(plans);
11870                    snapshot.sampled_items.extend(items);
11871                }
11872            }
11873
11874            let misstatement_count = snapshot
11875                .sampled_items
11876                .iter()
11877                .filter(|i| i.misstatement_found)
11878                .count();
11879
11880            info!(
11881                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11882                snapshot.sampling_plans.len(),
11883                snapshot.sampled_items.len(),
11884                misstatement_count,
11885            );
11886        }
11887
11888        // ----------------------------------------------------------------
11889        // ISA 315: Significant Classes of Transactions (SCOTS)
11890        // ----------------------------------------------------------------
11891        {
11892            use datasynth_generators::audit::scots_generator::{
11893                ScotsGenerator, ScotsGeneratorConfig,
11894            };
11895
11896            let ic_enabled = self.config.intercompany.enabled;
11897
11898            let config = ScotsGeneratorConfig {
11899                intercompany_enabled: ic_enabled,
11900                ..ScotsGeneratorConfig::default()
11901            };
11902            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11903
11904            for company in &self.config.companies {
11905                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11906                snapshot
11907                    .significant_transaction_classes
11908                    .extend(entity_scots);
11909            }
11910
11911            let estimation_count = snapshot
11912                .significant_transaction_classes
11913                .iter()
11914                .filter(|s| {
11915                    matches!(
11916                        s.transaction_type,
11917                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11918                    )
11919                })
11920                .count();
11921
11922            info!(
11923                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11924                snapshot.significant_transaction_classes.len(),
11925                estimation_count,
11926            );
11927        }
11928
11929        // ----------------------------------------------------------------
11930        // ISA 520: Unusual Item Markers
11931        // ----------------------------------------------------------------
11932        {
11933            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11934
11935            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11936            let entity_codes: Vec<String> = self
11937                .config
11938                .companies
11939                .iter()
11940                .map(|c| c.code.clone())
11941                .collect();
11942            let unusual_flags =
11943                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11944            info!(
11945                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11946                unusual_flags.len(),
11947                unusual_flags
11948                    .iter()
11949                    .filter(|f| matches!(
11950                        f.severity,
11951                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11952                    ))
11953                    .count(),
11954                unusual_flags
11955                    .iter()
11956                    .filter(|f| matches!(
11957                        f.severity,
11958                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11959                    ))
11960                    .count(),
11961                unusual_flags
11962                    .iter()
11963                    .filter(|f| matches!(
11964                        f.severity,
11965                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11966                    ))
11967                    .count(),
11968            );
11969            snapshot.unusual_items = unusual_flags;
11970        }
11971
11972        // ----------------------------------------------------------------
11973        // ISA 520: Analytical Relationships
11974        // ----------------------------------------------------------------
11975        {
11976            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11977
11978            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11979            let entity_codes: Vec<String> = self
11980                .config
11981                .companies
11982                .iter()
11983                .map(|c| c.code.clone())
11984                .collect();
11985            let current_period_label = format!("FY{fiscal_year}");
11986            let prior_period_label = format!("FY{}", fiscal_year - 1);
11987            let analytical_rels = ar_gen.generate_for_entities(
11988                &entity_codes,
11989                entries,
11990                &current_period_label,
11991                &prior_period_label,
11992            );
11993            let out_of_range = analytical_rels
11994                .iter()
11995                .filter(|r| !r.within_expected_range)
11996                .count();
11997            info!(
11998                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11999                analytical_rels.len(),
12000                out_of_range,
12001            );
12002            snapshot.analytical_relationships = analytical_rels;
12003        }
12004
12005        if let Some(pb) = pb {
12006            pb.finish_with_message(format!(
12007                "Audit data: {} engagements, {} workpapers, {} evidence, \
12008                 {} confirmations, {} procedure steps, {} samples, \
12009                 {} analytical, {} IA funcs, {} related parties, \
12010                 {} component auditors, {} letters, {} subsequent events, \
12011                 {} service orgs, {} going concern, {} accounting estimates, \
12012                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12013                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12014                 {} unusual items, {} analytical relationships",
12015                snapshot.engagements.len(),
12016                snapshot.workpapers.len(),
12017                snapshot.evidence.len(),
12018                snapshot.confirmations.len(),
12019                snapshot.procedure_steps.len(),
12020                snapshot.samples.len(),
12021                snapshot.analytical_results.len(),
12022                snapshot.ia_functions.len(),
12023                snapshot.related_parties.len(),
12024                snapshot.component_auditors.len(),
12025                snapshot.engagement_letters.len(),
12026                snapshot.subsequent_events.len(),
12027                snapshot.service_organizations.len(),
12028                snapshot.going_concern_assessments.len(),
12029                snapshot.accounting_estimates.len(),
12030                snapshot.audit_opinions.len(),
12031                snapshot.key_audit_matters.len(),
12032                snapshot.sox_302_certifications.len(),
12033                snapshot.sox_404_assessments.len(),
12034                snapshot.materiality_calculations.len(),
12035                snapshot.combined_risk_assessments.len(),
12036                snapshot.sampling_plans.len(),
12037                snapshot.significant_transaction_classes.len(),
12038                snapshot.unusual_items.len(),
12039                snapshot.analytical_relationships.len(),
12040            ));
12041        }
12042
12043        // ----------------------------------------------------------------
12044        // PCAOB-ISA cross-reference mappings
12045        // ----------------------------------------------------------------
12046        // Always include the standard PCAOB-ISA mappings when audit generation is
12047        // enabled. These are static reference data (no randomness required) so we
12048        // call standard_mappings() directly.
12049        {
12050            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12051            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12052            debug!(
12053                "PCAOB-ISA mappings generated: {} mappings",
12054                snapshot.isa_pcaob_mappings.len()
12055            );
12056        }
12057
12058        // ----------------------------------------------------------------
12059        // ISA standard reference entries
12060        // ----------------------------------------------------------------
12061        // Emit flat ISA standard reference data (number, title, series) so
12062        // consumers get a machine-readable listing of all 34 ISA standards in
12063        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
12064        {
12065            use datasynth_standards::audit::isa_reference::IsaStandard;
12066            snapshot.isa_mappings = IsaStandard::standard_entries();
12067            debug!(
12068                "ISA standard entries generated: {} standards",
12069                snapshot.isa_mappings.len()
12070            );
12071        }
12072
12073        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
12074        // For each RPT, find the chronologically closest JE for the engagement's entity.
12075        {
12076            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12077                .engagements
12078                .iter()
12079                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12080                .collect();
12081
12082            for rpt in &mut snapshot.related_party_transactions {
12083                if rpt.journal_entry_id.is_some() {
12084                    continue; // already set
12085                }
12086                let entity = engagement_by_id
12087                    .get(&rpt.engagement_id.to_string())
12088                    .copied()
12089                    .unwrap_or("");
12090
12091                // Find closest JE by date in the entity's company
12092                let best_je = entries
12093                    .iter()
12094                    .filter(|je| je.header.company_code == entity)
12095                    .min_by_key(|je| {
12096                        (je.header.posting_date - rpt.transaction_date)
12097                            .num_days()
12098                            .abs()
12099                    });
12100
12101                if let Some(je) = best_je {
12102                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
12103                }
12104            }
12105
12106            let linked = snapshot
12107                .related_party_transactions
12108                .iter()
12109                .filter(|t| t.journal_entry_id.is_some())
12110                .count();
12111            debug!(
12112                "Linked {}/{} related party transactions to journal entries",
12113                linked,
12114                snapshot.related_party_transactions.len()
12115            );
12116        }
12117
12118        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
12119        // One opinion per engagement, derived from that engagement's findings,
12120        // going-concern assessment, and any component-auditor reports. Fills
12121        // `audit_opinions` + a flattened `key_audit_matters` for downstream
12122        // export.
12123        if !snapshot.engagements.is_empty() {
12124            use datasynth_generators::audit_opinion_generator::{
12125                AuditOpinionGenerator, AuditOpinionInput,
12126            };
12127
12128            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12129            let inputs: Vec<AuditOpinionInput> = snapshot
12130                .engagements
12131                .iter()
12132                .map(|eng| {
12133                    let findings = snapshot
12134                        .findings
12135                        .iter()
12136                        .filter(|f| f.engagement_id == eng.engagement_id)
12137                        .cloned()
12138                        .collect();
12139                    let going_concern = snapshot
12140                        .going_concern_assessments
12141                        .iter()
12142                        .find(|gc| gc.entity_code == eng.client_entity_id)
12143                        .cloned();
12144                    // ComponentAuditorReport doesn't carry an engagement id, but
12145                    // component scope is keyed by `entity_code`, so filter on that.
12146                    let component_reports = snapshot
12147                        .component_reports
12148                        .iter()
12149                        .filter(|r| r.entity_code == eng.client_entity_id)
12150                        .cloned()
12151                        .collect();
12152
12153                    AuditOpinionInput {
12154                        entity_code: eng.client_entity_id.clone(),
12155                        entity_name: eng.client_name.clone(),
12156                        engagement_id: eng.engagement_id,
12157                        period_end: eng.period_end_date,
12158                        findings,
12159                        going_concern,
12160                        component_reports,
12161                        is_us_listed: matches!(
12162                            eng.engagement_type,
12163                            datasynth_core::audit::EngagementType::IntegratedAudit
12164                                | datasynth_core::audit::EngagementType::Sox404
12165                        ),
12166                        auditor_name: "DataSynth Audit LLP".to_string(),
12167                        engagement_partner: "Engagement Partner".to_string(),
12168                    }
12169                })
12170                .collect();
12171
12172            let generated = opinion_gen.generate_batch(&inputs);
12173            for g in generated {
12174                snapshot.key_audit_matters.extend(g.key_audit_matters);
12175                snapshot.audit_opinions.push(g.opinion);
12176            }
12177            debug!(
12178                "Generated {} audit opinions with {} key audit matters",
12179                snapshot.audit_opinions.len(),
12180                snapshot.key_audit_matters.len()
12181            );
12182        }
12183
12184        Ok(snapshot)
12185    }
12186
12187    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
12188    ///
12189    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
12190    /// from the current orchestrator state, runs the FSM engine, and maps the
12191    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
12192    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
12193    fn generate_audit_data_with_fsm(
12194        &mut self,
12195        entries: &[JournalEntry],
12196    ) -> SynthResult<AuditSnapshot> {
12197        use datasynth_audit_fsm::{
12198            context::EngagementContext,
12199            engine::AuditFsmEngine,
12200            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12201        };
12202        use rand::SeedableRng;
12203        use rand_chacha::ChaCha8Rng;
12204
12205        info!("Audit FSM: generating audit data via FSM engine");
12206
12207        let fsm_config = self
12208            .config
12209            .audit
12210            .fsm
12211            .as_ref()
12212            .expect("FSM config must be present when FSM is enabled");
12213
12214        // 1. Load blueprint from config string.
12215        let bwp = match fsm_config.blueprint.as_str() {
12216            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12217            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12218            _ => {
12219                warn!(
12220                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12221                    fsm_config.blueprint
12222                );
12223                BlueprintWithPreconditions::load_builtin_fsa()
12224            }
12225        }
12226        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12227
12228        // 2. Load overlay from config string.
12229        let overlay = match fsm_config.overlay.as_str() {
12230            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12231            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12232            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12233            _ => {
12234                warn!(
12235                    "Unknown FSM overlay '{}', falling back to builtin:default",
12236                    fsm_config.overlay
12237                );
12238                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12239            }
12240        }
12241        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12242
12243        // 3. Build EngagementContext from orchestrator state.
12244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12246        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12247
12248        // Determine the engagement entity early so we can filter JEs.
12249        let company = self.config.companies.first();
12250        let company_code = company
12251            .map(|c| c.code.clone())
12252            .unwrap_or_else(|| "UNKNOWN".to_string());
12253        let company_name = company
12254            .map(|c| c.name.clone())
12255            .unwrap_or_else(|| "Unknown Company".to_string());
12256        let currency = company
12257            .map(|c| c.currency.clone())
12258            .unwrap_or_else(|| "USD".to_string());
12259
12260        // Filter JEs to the engagement entity for single-company coherence.
12261        let entity_entries: Vec<_> = entries
12262            .iter()
12263            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12264            .cloned()
12265            .collect();
12266        let entries = &entity_entries; // Shadow the parameter for remaining usage
12267
12268        // Financial aggregates from journal entries.
12269        let total_revenue: rust_decimal::Decimal = entries
12270            .iter()
12271            .flat_map(|e| e.lines.iter())
12272            .filter(|l| l.account_code.starts_with('4'))
12273            .map(|l| l.credit_amount - l.debit_amount)
12274            .sum();
12275
12276        let total_assets: rust_decimal::Decimal = entries
12277            .iter()
12278            .flat_map(|e| e.lines.iter())
12279            .filter(|l| l.account_code.starts_with('1'))
12280            .map(|l| l.debit_amount - l.credit_amount)
12281            .sum();
12282
12283        let total_expenses: rust_decimal::Decimal = entries
12284            .iter()
12285            .flat_map(|e| e.lines.iter())
12286            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12287            .map(|l| l.debit_amount)
12288            .sum();
12289
12290        let equity: rust_decimal::Decimal = entries
12291            .iter()
12292            .flat_map(|e| e.lines.iter())
12293            .filter(|l| l.account_code.starts_with('3'))
12294            .map(|l| l.credit_amount - l.debit_amount)
12295            .sum();
12296
12297        let total_debt: rust_decimal::Decimal = entries
12298            .iter()
12299            .flat_map(|e| e.lines.iter())
12300            .filter(|l| l.account_code.starts_with('2'))
12301            .map(|l| l.credit_amount - l.debit_amount)
12302            .sum();
12303
12304        let pretax_income = total_revenue - total_expenses;
12305
12306        let cogs: rust_decimal::Decimal = entries
12307            .iter()
12308            .flat_map(|e| e.lines.iter())
12309            .filter(|l| l.account_code.starts_with('5'))
12310            .map(|l| l.debit_amount)
12311            .sum();
12312        let gross_profit = total_revenue - cogs;
12313
12314        let current_assets: rust_decimal::Decimal = entries
12315            .iter()
12316            .flat_map(|e| e.lines.iter())
12317            .filter(|l| {
12318                l.account_code.starts_with("10")
12319                    || l.account_code.starts_with("11")
12320                    || l.account_code.starts_with("12")
12321                    || l.account_code.starts_with("13")
12322            })
12323            .map(|l| l.debit_amount - l.credit_amount)
12324            .sum();
12325        let current_liabilities: rust_decimal::Decimal = entries
12326            .iter()
12327            .flat_map(|e| e.lines.iter())
12328            .filter(|l| {
12329                l.account_code.starts_with("20")
12330                    || l.account_code.starts_with("21")
12331                    || l.account_code.starts_with("22")
12332            })
12333            .map(|l| l.credit_amount - l.debit_amount)
12334            .sum();
12335        let working_capital = current_assets - current_liabilities;
12336
12337        let depreciation: rust_decimal::Decimal = entries
12338            .iter()
12339            .flat_map(|e| e.lines.iter())
12340            .filter(|l| l.account_code.starts_with("60"))
12341            .map(|l| l.debit_amount)
12342            .sum();
12343        let operating_cash_flow = pretax_income + depreciation;
12344
12345        // GL accounts for reference data.
12346        let accounts: Vec<String> = self
12347            .coa
12348            .as_ref()
12349            .map(|coa| {
12350                coa.get_postable_accounts()
12351                    .iter()
12352                    .map(|acc| acc.account_code().to_string())
12353                    .collect()
12354            })
12355            .unwrap_or_default();
12356
12357        // Team member IDs and display names from master data.
12358        let team_member_ids: Vec<String> = self
12359            .master_data
12360            .employees
12361            .iter()
12362            .take(8) // Cap team size
12363            .map(|e| e.employee_id.clone())
12364            .collect();
12365        let team_member_pairs: Vec<(String, String)> = self
12366            .master_data
12367            .employees
12368            .iter()
12369            .take(8)
12370            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12371            .collect();
12372
12373        let vendor_names: Vec<String> = self
12374            .master_data
12375            .vendors
12376            .iter()
12377            .map(|v| v.name.clone())
12378            .collect();
12379        let customer_names: Vec<String> = self
12380            .master_data
12381            .customers
12382            .iter()
12383            .map(|c| c.name.clone())
12384            .collect();
12385
12386        let entity_codes: Vec<String> = self
12387            .config
12388            .companies
12389            .iter()
12390            .map(|c| c.code.clone())
12391            .collect();
12392
12393        // Journal entry IDs for evidence tracing (sample up to 50).
12394        let journal_entry_ids: Vec<String> = entries
12395            .iter()
12396            .take(50)
12397            .map(|e| e.header.document_id.to_string())
12398            .collect();
12399
12400        // Account balances for risk weighting (aggregate debit - credit per account).
12401        let mut account_balances = std::collections::HashMap::<String, f64>::new();
12402        for entry in entries {
12403            for line in &entry.lines {
12404                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12405                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12406                *account_balances
12407                    .entry(line.account_code.clone())
12408                    .or_insert(0.0) += debit_f64 - credit_f64;
12409            }
12410        }
12411
12412        // Internal control IDs and anomaly refs are populated by the
12413        // caller when available; here we default to empty because the
12414        // orchestrator state may not have generated controls/anomalies
12415        // yet at this point in the pipeline.
12416        let control_ids: Vec<String> = Vec::new();
12417        let anomaly_refs: Vec<String> = Vec::new();
12418
12419        let mut context = EngagementContext {
12420            company_code,
12421            company_name,
12422            fiscal_year: start_date.year(),
12423            currency,
12424            total_revenue,
12425            total_assets,
12426            engagement_start: start_date,
12427            report_date: period_end,
12428            pretax_income,
12429            equity,
12430            gross_profit,
12431            working_capital,
12432            operating_cash_flow,
12433            total_debt,
12434            team_member_ids,
12435            team_member_pairs,
12436            accounts,
12437            vendor_names,
12438            customer_names,
12439            journal_entry_ids,
12440            account_balances,
12441            control_ids,
12442            anomaly_refs,
12443            journal_entries: entries.to_vec(),
12444            is_us_listed: false,
12445            entity_codes,
12446            auditor_firm_name: "DataSynth Audit LLP".into(),
12447            accounting_framework: self
12448                .config
12449                .accounting_standards
12450                .framework
12451                .map(|f| match f {
12452                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12453                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12454                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12455                        "French GAAP"
12456                    }
12457                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12458                        "German GAAP"
12459                    }
12460                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12461                        "Dual Reporting"
12462                    }
12463                })
12464                .unwrap_or("IFRS")
12465                .into(),
12466        };
12467
12468        // 4. Create and run the FSM engine.
12469        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12470        let rng = ChaCha8Rng::seed_from_u64(seed);
12471        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12472
12473        let mut result = engine
12474            .run_engagement(&context)
12475            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12476
12477        info!(
12478            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12479             {} phases completed, duration {:.1}h",
12480            result.event_log.len(),
12481            result.artifacts.total_artifacts(),
12482            result.anomalies.len(),
12483            result.phases_completed.len(),
12484            result.total_duration_hours,
12485        );
12486
12487        // 4b. Populate financial data in the artifact bag for downstream consumers.
12488        let tb_entity = context.company_code.clone();
12489        let tb_fy = context.fiscal_year;
12490        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12491        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12492            entries,
12493            &tb_entity,
12494            tb_fy,
12495            self.coa.as_ref().map(|c| c.as_ref()),
12496        );
12497
12498        // 5. Map ArtifactBag fields to AuditSnapshot.
12499        let bag = result.artifacts;
12500        let mut snapshot = AuditSnapshot {
12501            engagements: bag.engagements,
12502            engagement_letters: bag.engagement_letters,
12503            materiality_calculations: bag.materiality_calculations,
12504            risk_assessments: bag.risk_assessments,
12505            combined_risk_assessments: bag.combined_risk_assessments,
12506            workpapers: bag.workpapers,
12507            evidence: bag.evidence,
12508            findings: bag.findings,
12509            judgments: bag.judgments,
12510            sampling_plans: bag.sampling_plans,
12511            sampled_items: bag.sampled_items,
12512            analytical_results: bag.analytical_results,
12513            going_concern_assessments: bag.going_concern_assessments,
12514            subsequent_events: bag.subsequent_events,
12515            audit_opinions: bag.audit_opinions,
12516            key_audit_matters: bag.key_audit_matters,
12517            procedure_steps: bag.procedure_steps,
12518            samples: bag.samples,
12519            confirmations: bag.confirmations,
12520            confirmation_responses: bag.confirmation_responses,
12521            // Store the event trail for downstream export.
12522            fsm_event_trail: Some(result.event_log),
12523            // Fields not produced by the FSM engine remain at their defaults.
12524            ..Default::default()
12525        };
12526
12527        // 6. Add static reference data (same as legacy path).
12528        {
12529            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12530            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12531        }
12532        {
12533            use datasynth_standards::audit::isa_reference::IsaStandard;
12534            snapshot.isa_mappings = IsaStandard::standard_entries();
12535        }
12536
12537        info!(
12538            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12539             {} risk assessments, {} findings, {} materiality calcs",
12540            snapshot.engagements.len(),
12541            snapshot.workpapers.len(),
12542            snapshot.evidence.len(),
12543            snapshot.risk_assessments.len(),
12544            snapshot.findings.len(),
12545            snapshot.materiality_calculations.len(),
12546        );
12547
12548        Ok(snapshot)
12549    }
12550
12551    /// Export journal entries as graph data for ML training and network reconstruction.
12552    ///
12553    /// Builds a transaction graph where:
12554    /// - Nodes are GL accounts
12555    /// - Edges are money flows from credit to debit accounts
12556    /// - Edge attributes include amount, date, business process, anomaly flags
12557    fn export_graphs(
12558        &mut self,
12559        entries: &[JournalEntry],
12560        _coa: &Arc<ChartOfAccounts>,
12561        stats: &mut EnhancedGenerationStatistics,
12562    ) -> SynthResult<GraphExportSnapshot> {
12563        let pb = self.create_progress_bar(100, "Exporting Graphs");
12564
12565        let mut snapshot = GraphExportSnapshot::default();
12566
12567        // Get output directory
12568        let output_dir = self
12569            .output_path
12570            .clone()
12571            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12572        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12573
12574        // Process each graph type configuration
12575        for graph_type in &self.config.graph_export.graph_types {
12576            if let Some(pb) = &pb {
12577                pb.inc(10);
12578            }
12579
12580            // Build transaction graph
12581            let graph_config = TransactionGraphConfig {
12582                include_vendors: false,
12583                include_customers: false,
12584                create_debit_credit_edges: true,
12585                include_document_nodes: graph_type.include_document_nodes,
12586                min_edge_weight: graph_type.min_edge_weight,
12587                aggregate_parallel_edges: graph_type.aggregate_edges,
12588                framework: None,
12589            };
12590
12591            let mut builder = TransactionGraphBuilder::new(graph_config);
12592            builder.add_journal_entries(entries);
12593            let graph = builder.build();
12594
12595            // Update stats
12596            stats.graph_node_count += graph.node_count();
12597            stats.graph_edge_count += graph.edge_count();
12598
12599            if let Some(pb) = &pb {
12600                pb.inc(40);
12601            }
12602
12603            // Export to each configured format
12604            for format in &self.config.graph_export.formats {
12605                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12606
12607                // Create output directory
12608                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12609                    warn!("Failed to create graph output directory: {}", e);
12610                    continue;
12611                }
12612
12613                match format {
12614                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12615                        let pyg_config = PyGExportConfig {
12616                            common: datasynth_graph::CommonExportConfig {
12617                                export_node_features: true,
12618                                export_edge_features: true,
12619                                export_node_labels: true,
12620                                export_edge_labels: true,
12621                                export_masks: true,
12622                                train_ratio: self.config.graph_export.train_ratio,
12623                                val_ratio: self.config.graph_export.validation_ratio,
12624                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12625                            },
12626                            one_hot_categoricals: false,
12627                        };
12628
12629                        let exporter = PyGExporter::new(pyg_config);
12630                        match exporter.export(&graph, &format_dir) {
12631                            Ok(metadata) => {
12632                                snapshot.exports.insert(
12633                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12634                                    GraphExportInfo {
12635                                        name: graph_type.name.clone(),
12636                                        format: "pytorch_geometric".to_string(),
12637                                        output_path: format_dir.clone(),
12638                                        node_count: metadata.num_nodes,
12639                                        edge_count: metadata.num_edges,
12640                                    },
12641                                );
12642                                snapshot.graph_count += 1;
12643                            }
12644                            Err(e) => {
12645                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12646                            }
12647                        }
12648                    }
12649                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12650                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12651
12652                        let neo4j_config = Neo4jExportConfig {
12653                            export_node_properties: true,
12654                            export_edge_properties: true,
12655                            export_features: true,
12656                            generate_cypher: true,
12657                            generate_admin_import: true,
12658                            database_name: "synth".to_string(),
12659                            cypher_batch_size: 1000,
12660                        };
12661
12662                        let exporter = Neo4jExporter::new(neo4j_config);
12663                        match exporter.export(&graph, &format_dir) {
12664                            Ok(metadata) => {
12665                                snapshot.exports.insert(
12666                                    format!("{}_{}", graph_type.name, "neo4j"),
12667                                    GraphExportInfo {
12668                                        name: graph_type.name.clone(),
12669                                        format: "neo4j".to_string(),
12670                                        output_path: format_dir.clone(),
12671                                        node_count: metadata.num_nodes,
12672                                        edge_count: metadata.num_edges,
12673                                    },
12674                                );
12675                                snapshot.graph_count += 1;
12676                            }
12677                            Err(e) => {
12678                                warn!("Failed to export Neo4j graph: {}", e);
12679                            }
12680                        }
12681                    }
12682                    datasynth_config::schema::GraphExportFormat::Dgl => {
12683                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12684
12685                        let dgl_config = DGLExportConfig {
12686                            common: datasynth_graph::CommonExportConfig {
12687                                export_node_features: true,
12688                                export_edge_features: true,
12689                                export_node_labels: true,
12690                                export_edge_labels: true,
12691                                export_masks: true,
12692                                train_ratio: self.config.graph_export.train_ratio,
12693                                val_ratio: self.config.graph_export.validation_ratio,
12694                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12695                            },
12696                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12697                            include_pickle_script: true, // DGL ecosystem standard helper
12698                        };
12699
12700                        let exporter = DGLExporter::new(dgl_config);
12701                        match exporter.export(&graph, &format_dir) {
12702                            Ok(metadata) => {
12703                                snapshot.exports.insert(
12704                                    format!("{}_{}", graph_type.name, "dgl"),
12705                                    GraphExportInfo {
12706                                        name: graph_type.name.clone(),
12707                                        format: "dgl".to_string(),
12708                                        output_path: format_dir.clone(),
12709                                        node_count: metadata.common.num_nodes,
12710                                        edge_count: metadata.common.num_edges,
12711                                    },
12712                                );
12713                                snapshot.graph_count += 1;
12714                            }
12715                            Err(e) => {
12716                                warn!("Failed to export DGL graph: {}", e);
12717                            }
12718                        }
12719                    }
12720                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12721                        use datasynth_graph::{
12722                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12723                        };
12724
12725                        let rustgraph_config = RustGraphExportConfig {
12726                            include_features: true,
12727                            include_temporal: true,
12728                            include_labels: true,
12729                            source_name: "datasynth".to_string(),
12730                            batch_id: None,
12731                            output_format: RustGraphOutputFormat::JsonLines,
12732                            export_node_properties: true,
12733                            export_edge_properties: true,
12734                            pretty_print: false,
12735                        };
12736
12737                        let exporter = RustGraphExporter::new(rustgraph_config);
12738                        match exporter.export(&graph, &format_dir) {
12739                            Ok(metadata) => {
12740                                snapshot.exports.insert(
12741                                    format!("{}_{}", graph_type.name, "rustgraph"),
12742                                    GraphExportInfo {
12743                                        name: graph_type.name.clone(),
12744                                        format: "rustgraph".to_string(),
12745                                        output_path: format_dir.clone(),
12746                                        node_count: metadata.num_nodes,
12747                                        edge_count: metadata.num_edges,
12748                                    },
12749                                );
12750                                snapshot.graph_count += 1;
12751                            }
12752                            Err(e) => {
12753                                warn!("Failed to export RustGraph: {}", e);
12754                            }
12755                        }
12756                    }
12757                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12758                        // Hypergraph export is handled separately in Phase 10b
12759                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12760                    }
12761                }
12762            }
12763
12764            if let Some(pb) = &pb {
12765                pb.inc(40);
12766            }
12767        }
12768
12769        stats.graph_export_count = snapshot.graph_count;
12770        snapshot.exported = snapshot.graph_count > 0;
12771
12772        if let Some(pb) = pb {
12773            pb.finish_with_message(format!(
12774                "Graphs exported: {} graphs ({} nodes, {} edges)",
12775                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12776            ));
12777        }
12778
12779        Ok(snapshot)
12780    }
12781
12782    /// Build additional graph types (banking, approval, entity) when relevant data
12783    /// is available. These run as a late phase because the data they need (banking
12784    /// snapshot, intercompany snapshot) is only generated after the main graph
12785    /// export phase.
12786    fn build_additional_graphs(
12787        &self,
12788        banking: &BankingSnapshot,
12789        intercompany: &IntercompanySnapshot,
12790        entries: &[JournalEntry],
12791        stats: &mut EnhancedGenerationStatistics,
12792    ) {
12793        let output_dir = self
12794            .output_path
12795            .clone()
12796            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12797        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12798
12799        // Banking graph: build when banking customers and transactions exist
12800        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12801            info!("Phase 10c: Building banking network graph");
12802            let config = BankingGraphConfig::default();
12803            let mut builder = BankingGraphBuilder::new(config);
12804            builder.add_customers(&banking.customers);
12805            builder.add_accounts(&banking.accounts, &banking.customers);
12806            builder.add_transactions(&banking.transactions);
12807            let graph = builder.build();
12808
12809            let node_count = graph.node_count();
12810            let edge_count = graph.edge_count();
12811            stats.graph_node_count += node_count;
12812            stats.graph_edge_count += edge_count;
12813
12814            // Export as PyG if configured
12815            for format in &self.config.graph_export.formats {
12816                if matches!(
12817                    format,
12818                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12819                ) {
12820                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12821                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12822                        warn!("Failed to create banking graph output dir: {}", e);
12823                        continue;
12824                    }
12825                    let pyg_config = PyGExportConfig::default();
12826                    let exporter = PyGExporter::new(pyg_config);
12827                    if let Err(e) = exporter.export(&graph, &format_dir) {
12828                        warn!("Failed to export banking graph as PyG: {}", e);
12829                    } else {
12830                        info!(
12831                            "Banking network graph exported: {} nodes, {} edges",
12832                            node_count, edge_count
12833                        );
12834                    }
12835                }
12836            }
12837        }
12838
12839        // Approval graph: build from journal entry approval workflows
12840        let approval_entries: Vec<_> = entries
12841            .iter()
12842            .filter(|je| je.header.approval_workflow.is_some())
12843            .collect();
12844
12845        if !approval_entries.is_empty() {
12846            info!(
12847                "Phase 10c: Building approval network graph ({} entries with approvals)",
12848                approval_entries.len()
12849            );
12850            let config = ApprovalGraphConfig::default();
12851            let mut builder = ApprovalGraphBuilder::new(config);
12852
12853            for je in &approval_entries {
12854                if let Some(ref wf) = je.header.approval_workflow {
12855                    for action in &wf.actions {
12856                        let record = datasynth_core::models::ApprovalRecord {
12857                            approval_id: format!(
12858                                "APR-{}-{}",
12859                                je.header.document_id, action.approval_level
12860                            ),
12861                            document_number: je.header.document_id.to_string(),
12862                            document_type: "JE".to_string(),
12863                            company_code: je.company_code().to_string(),
12864                            requester_id: wf.preparer_id.clone(),
12865                            requester_name: Some(wf.preparer_name.clone()),
12866                            approver_id: action.actor_id.clone(),
12867                            approver_name: action.actor_name.clone(),
12868                            approval_date: je.posting_date(),
12869                            action: format!("{:?}", action.action),
12870                            amount: wf.amount,
12871                            approval_limit: None,
12872                            comments: action.comments.clone(),
12873                            delegation_from: None,
12874                            is_auto_approved: false,
12875                        };
12876                        builder.add_approval(&record);
12877                    }
12878                }
12879            }
12880
12881            let graph = builder.build();
12882            let node_count = graph.node_count();
12883            let edge_count = graph.edge_count();
12884            stats.graph_node_count += node_count;
12885            stats.graph_edge_count += edge_count;
12886
12887            // Export as PyG if configured
12888            for format in &self.config.graph_export.formats {
12889                if matches!(
12890                    format,
12891                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12892                ) {
12893                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12894                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12895                        warn!("Failed to create approval graph output dir: {}", e);
12896                        continue;
12897                    }
12898                    let pyg_config = PyGExportConfig::default();
12899                    let exporter = PyGExporter::new(pyg_config);
12900                    if let Err(e) = exporter.export(&graph, &format_dir) {
12901                        warn!("Failed to export approval graph as PyG: {}", e);
12902                    } else {
12903                        info!(
12904                            "Approval network graph exported: {} nodes, {} edges",
12905                            node_count, edge_count
12906                        );
12907                    }
12908                }
12909            }
12910        }
12911
12912        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12913        if self.config.companies.len() >= 2 {
12914            info!(
12915                "Phase 10c: Building entity relationship graph ({} companies)",
12916                self.config.companies.len()
12917            );
12918
12919            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12920                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12921
12922            // Map CompanyConfig → Company objects
12923            let parent_code = &self.config.companies[0].code;
12924            let mut companies: Vec<datasynth_core::models::Company> =
12925                Vec::with_capacity(self.config.companies.len());
12926
12927            // First company is the parent
12928            let first = &self.config.companies[0];
12929            companies.push(datasynth_core::models::Company::parent(
12930                &first.code,
12931                &first.name,
12932                &first.country,
12933                &first.currency,
12934            ));
12935
12936            // Remaining companies are subsidiaries (100% owned by parent)
12937            for cc in self.config.companies.iter().skip(1) {
12938                companies.push(datasynth_core::models::Company::subsidiary(
12939                    &cc.code,
12940                    &cc.name,
12941                    &cc.country,
12942                    &cc.currency,
12943                    parent_code,
12944                    rust_decimal::Decimal::from(100),
12945                ));
12946            }
12947
12948            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12949            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12950                self.config
12951                    .companies
12952                    .iter()
12953                    .skip(1)
12954                    .enumerate()
12955                    .map(|(i, cc)| {
12956                        let mut rel =
12957                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12958                                format!("REL{:03}", i + 1),
12959                                parent_code.clone(),
12960                                cc.code.clone(),
12961                                rust_decimal::Decimal::from(100),
12962                                start_date,
12963                            );
12964                        rel.functional_currency = cc.currency.clone();
12965                        rel
12966                    })
12967                    .collect();
12968
12969            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12970            builder.add_companies(&companies);
12971            builder.add_ownership_relationships(&relationships);
12972
12973            // Thread IC matched-pair transaction edges into the entity graph
12974            for pair in &intercompany.matched_pairs {
12975                builder.add_intercompany_edge(
12976                    &pair.seller_company,
12977                    &pair.buyer_company,
12978                    pair.amount,
12979                    &format!("{:?}", pair.transaction_type),
12980                );
12981            }
12982
12983            let graph = builder.build();
12984            let node_count = graph.node_count();
12985            let edge_count = graph.edge_count();
12986            stats.graph_node_count += node_count;
12987            stats.graph_edge_count += edge_count;
12988
12989            // Export as PyG if configured
12990            for format in &self.config.graph_export.formats {
12991                if matches!(
12992                    format,
12993                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12994                ) {
12995                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12996                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12997                        warn!("Failed to create entity graph output dir: {}", e);
12998                        continue;
12999                    }
13000                    let pyg_config = PyGExportConfig::default();
13001                    let exporter = PyGExporter::new(pyg_config);
13002                    if let Err(e) = exporter.export(&graph, &format_dir) {
13003                        warn!("Failed to export entity graph as PyG: {}", e);
13004                    } else {
13005                        info!(
13006                            "Entity relationship graph exported: {} nodes, {} edges",
13007                            node_count, edge_count
13008                        );
13009                    }
13010                }
13011            }
13012        } else {
13013            debug!(
13014                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13015                self.config.companies.len()
13016            );
13017        }
13018    }
13019
13020    /// Export a multi-layer hypergraph for RustGraph integration.
13021    ///
13022    /// Builds a 3-layer hypergraph:
13023    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
13024    /// - Layer 2: Process Events (all process family document flows + OCPM events)
13025    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
13026    #[allow(clippy::too_many_arguments)]
13027    fn export_hypergraph(
13028        &self,
13029        coa: &Arc<ChartOfAccounts>,
13030        entries: &[JournalEntry],
13031        document_flows: &DocumentFlowSnapshot,
13032        sourcing: &SourcingSnapshot,
13033        hr: &HrSnapshot,
13034        manufacturing: &ManufacturingSnapshot,
13035        banking: &BankingSnapshot,
13036        audit: &AuditSnapshot,
13037        financial_reporting: &FinancialReportingSnapshot,
13038        ocpm: &OcpmSnapshot,
13039        compliance: &ComplianceRegulationsSnapshot,
13040        stats: &mut EnhancedGenerationStatistics,
13041    ) -> SynthResult<HypergraphExportInfo> {
13042        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13043        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13044        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13045        use datasynth_graph::models::hypergraph::AggregationStrategy;
13046
13047        let hg_settings = &self.config.graph_export.hypergraph;
13048
13049        // Parse aggregation strategy from config string
13050        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13051            "truncate" => AggregationStrategy::Truncate,
13052            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13053            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13054            "importance_sample" => AggregationStrategy::ImportanceSample,
13055            _ => AggregationStrategy::PoolByCounterparty,
13056        };
13057
13058        let builder_config = HypergraphConfig {
13059            max_nodes: hg_settings.max_nodes,
13060            aggregation_strategy,
13061            include_coso: hg_settings.governance_layer.include_coso,
13062            include_controls: hg_settings.governance_layer.include_controls,
13063            include_sox: hg_settings.governance_layer.include_sox,
13064            include_vendors: hg_settings.governance_layer.include_vendors,
13065            include_customers: hg_settings.governance_layer.include_customers,
13066            include_employees: hg_settings.governance_layer.include_employees,
13067            include_p2p: hg_settings.process_layer.include_p2p,
13068            include_o2c: hg_settings.process_layer.include_o2c,
13069            include_s2c: hg_settings.process_layer.include_s2c,
13070            include_h2r: hg_settings.process_layer.include_h2r,
13071            include_mfg: hg_settings.process_layer.include_mfg,
13072            include_bank: hg_settings.process_layer.include_bank,
13073            include_audit: hg_settings.process_layer.include_audit,
13074            include_r2r: hg_settings.process_layer.include_r2r,
13075            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13076            docs_per_counterparty_threshold: hg_settings
13077                .process_layer
13078                .docs_per_counterparty_threshold,
13079            include_accounts: hg_settings.accounting_layer.include_accounts,
13080            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13081            include_cross_layer_edges: hg_settings.cross_layer.enabled,
13082            include_compliance: self.config.compliance_regulations.enabled,
13083            include_tax: true,
13084            include_treasury: true,
13085            include_esg: true,
13086            include_project: true,
13087            include_intercompany: true,
13088            include_temporal_events: true,
13089        };
13090
13091        let mut builder = HypergraphBuilder::new(builder_config);
13092
13093        // Layer 1: Governance & Controls
13094        builder.add_coso_framework();
13095
13096        // Add controls if available (generated during JE generation)
13097        // Controls are generated per-company; we use the standard set
13098        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13099            let controls = InternalControl::standard_controls();
13100            builder.add_controls(&controls);
13101        }
13102
13103        // Add master data
13104        builder.add_vendors(&self.master_data.vendors);
13105        builder.add_customers(&self.master_data.customers);
13106        builder.add_employees(&self.master_data.employees);
13107
13108        // Layer 2: Process Events (all process families)
13109        builder.add_p2p_documents(
13110            &document_flows.purchase_orders,
13111            &document_flows.goods_receipts,
13112            &document_flows.vendor_invoices,
13113            &document_flows.payments,
13114        );
13115        builder.add_o2c_documents(
13116            &document_flows.sales_orders,
13117            &document_flows.deliveries,
13118            &document_flows.customer_invoices,
13119        );
13120        builder.add_s2c_documents(
13121            &sourcing.sourcing_projects,
13122            &sourcing.qualifications,
13123            &sourcing.rfx_events,
13124            &sourcing.bids,
13125            &sourcing.bid_evaluations,
13126            &sourcing.contracts,
13127        );
13128        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13129        builder.add_mfg_documents(
13130            &manufacturing.production_orders,
13131            &manufacturing.quality_inspections,
13132            &manufacturing.cycle_counts,
13133        );
13134        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13135        builder.add_audit_documents(
13136            &audit.engagements,
13137            &audit.workpapers,
13138            &audit.findings,
13139            &audit.evidence,
13140            &audit.risk_assessments,
13141            &audit.judgments,
13142            &audit.materiality_calculations,
13143            &audit.audit_opinions,
13144            &audit.going_concern_assessments,
13145        );
13146        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13147
13148        // OCPM events as hyperedges
13149        if let Some(ref event_log) = ocpm.event_log {
13150            builder.add_ocpm_events(event_log);
13151        }
13152
13153        // Compliance regulations as cross-layer nodes
13154        if self.config.compliance_regulations.enabled
13155            && hg_settings.governance_layer.include_controls
13156        {
13157            // Reconstruct ComplianceStandard objects from the registry
13158            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13159            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13160                .standard_records
13161                .iter()
13162                .filter_map(|r| {
13163                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13164                    registry.get(&sid).cloned()
13165                })
13166                .collect();
13167
13168            builder.add_compliance_regulations(
13169                &standards,
13170                &compliance.findings,
13171                &compliance.filings,
13172            );
13173        }
13174
13175        // Layer 3: Accounting Network
13176        builder.add_accounts(coa);
13177        builder.add_journal_entries_as_hyperedges(entries);
13178
13179        // Build the hypergraph
13180        let hypergraph = builder.build();
13181
13182        // Export
13183        let output_dir = self
13184            .output_path
13185            .clone()
13186            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13187        let hg_dir = output_dir
13188            .join(&self.config.graph_export.output_subdirectory)
13189            .join(&hg_settings.output_subdirectory);
13190
13191        // Branch on output format
13192        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13193            "unified" => {
13194                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13195                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13196                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13197                })?;
13198                (
13199                    metadata.num_nodes,
13200                    metadata.num_edges,
13201                    metadata.num_hyperedges,
13202                )
13203            }
13204            _ => {
13205                // "native" or any unrecognized format → use existing exporter
13206                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13207                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13208                    SynthError::generation(format!("Hypergraph export failed: {e}"))
13209                })?;
13210                (
13211                    metadata.num_nodes,
13212                    metadata.num_edges,
13213                    metadata.num_hyperedges,
13214                )
13215            }
13216        };
13217
13218        // Stream to RustGraph ingest endpoint if configured
13219        #[cfg(feature = "streaming")]
13220        if let Some(ref target_url) = hg_settings.stream_target {
13221            use crate::stream_client::{StreamClient, StreamConfig};
13222            use std::io::Write as _;
13223
13224            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13225            let stream_config = StreamConfig {
13226                target_url: target_url.clone(),
13227                batch_size: hg_settings.stream_batch_size,
13228                api_key,
13229                ..StreamConfig::default()
13230            };
13231
13232            match StreamClient::new(stream_config) {
13233                Ok(mut client) => {
13234                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13235                    match exporter.export_to_writer(&hypergraph, &mut client) {
13236                        Ok(_) => {
13237                            if let Err(e) = client.flush() {
13238                                warn!("Failed to flush stream client: {}", e);
13239                            } else {
13240                                info!("Streamed {} records to {}", client.total_sent(), target_url);
13241                            }
13242                        }
13243                        Err(e) => {
13244                            warn!("Streaming export failed: {}", e);
13245                        }
13246                    }
13247                }
13248                Err(e) => {
13249                    warn!("Failed to create stream client: {}", e);
13250                }
13251            }
13252        }
13253
13254        // Update stats
13255        stats.graph_node_count += num_nodes;
13256        stats.graph_edge_count += num_edges;
13257        stats.graph_export_count += 1;
13258
13259        Ok(HypergraphExportInfo {
13260            node_count: num_nodes,
13261            edge_count: num_edges,
13262            hyperedge_count: num_hyperedges,
13263            output_path: hg_dir,
13264        })
13265    }
13266
13267    /// Generate banking KYC/AML data.
13268    ///
13269    /// Creates banking customers, accounts, and transactions with AML typology injection.
13270    /// Uses the BankingOrchestrator from synth-banking crate.
13271    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13272        let pb = self.create_progress_bar(100, "Generating Banking Data");
13273
13274        // Build the banking orchestrator from config
13275        let orchestrator = BankingOrchestratorBuilder::new()
13276            .config(self.config.banking.clone())
13277            .seed(self.seed + 9000)
13278            .country_pack(self.primary_pack().clone())
13279            .build();
13280
13281        if let Some(pb) = &pb {
13282            pb.inc(10);
13283        }
13284
13285        // Generate the banking data
13286        let result = orchestrator.generate();
13287
13288        if let Some(pb) = &pb {
13289            pb.inc(90);
13290            pb.finish_with_message(format!(
13291                "Banking: {} customers, {} transactions",
13292                result.customers.len(),
13293                result.transactions.len()
13294            ));
13295        }
13296
13297        // Cross-reference banking customers with core master data so that
13298        // banking customer names align with the enterprise customer list.
13299        // We rotate through core customers, overlaying their name and country
13300        // onto the generated banking customers where possible.
13301        let mut banking_customers = result.customers;
13302        let core_customers = &self.master_data.customers;
13303        if !core_customers.is_empty() {
13304            for (i, bc) in banking_customers.iter_mut().enumerate() {
13305                let core = &core_customers[i % core_customers.len()];
13306                bc.name = CustomerName::business(&core.name);
13307                bc.residence_country = core.country.clone();
13308                bc.enterprise_customer_id = Some(core.customer_id.clone());
13309            }
13310            debug!(
13311                "Cross-referenced {} banking customers with {} core customers",
13312                banking_customers.len(),
13313                core_customers.len()
13314            );
13315        }
13316
13317        Ok(BankingSnapshot {
13318            customers: banking_customers,
13319            accounts: result.accounts,
13320            transactions: result.transactions,
13321            transaction_labels: result.transaction_labels,
13322            customer_labels: result.customer_labels,
13323            account_labels: result.account_labels,
13324            relationship_labels: result.relationship_labels,
13325            narratives: result.narratives,
13326            suspicious_count: result.stats.suspicious_count,
13327            scenario_count: result.scenarios.len(),
13328        })
13329    }
13330
13331    /// Calculate total transactions to generate.
13332    fn calculate_total_transactions(&self) -> u64 {
13333        let months = self.config.global.period_months as f64;
13334        self.config
13335            .companies
13336            .iter()
13337            .map(|c| {
13338                let annual = c.annual_transaction_volume.count() as f64;
13339                let weighted = annual * c.volume_weight;
13340                (weighted * months / 12.0) as u64
13341            })
13342            .sum()
13343    }
13344
13345    /// Create a progress bar if progress display is enabled.
13346    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13347        if !self.phase_config.show_progress {
13348            return None;
13349        }
13350
13351        let pb = if let Some(mp) = &self.multi_progress {
13352            mp.add(ProgressBar::new(total))
13353        } else {
13354            ProgressBar::new(total)
13355        };
13356
13357        pb.set_style(
13358            ProgressStyle::default_bar()
13359                .template(&format!(
13360                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13361                ))
13362                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13363                .progress_chars("#>-"),
13364        );
13365
13366        Some(pb)
13367    }
13368
13369    /// Get the generated chart of accounts.
13370    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13371        self.coa.clone()
13372    }
13373
13374    /// Get the generated master data.
13375    pub fn get_master_data(&self) -> &MasterDataSnapshot {
13376        &self.master_data
13377    }
13378
13379    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
13380    fn phase_compliance_regulations(
13381        &mut self,
13382        _stats: &mut EnhancedGenerationStatistics,
13383    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13384        if !self.phase_config.generate_compliance_regulations {
13385            return Ok(ComplianceRegulationsSnapshot::default());
13386        }
13387
13388        info!("Phase: Generating Compliance Regulations Data");
13389
13390        let cr_config = &self.config.compliance_regulations;
13391
13392        // Determine jurisdictions: from config or inferred from companies
13393        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13394            self.config
13395                .companies
13396                .iter()
13397                .map(|c| c.country.clone())
13398                .collect::<std::collections::HashSet<_>>()
13399                .into_iter()
13400                .collect()
13401        } else {
13402            cr_config.jurisdictions.clone()
13403        };
13404
13405        // Determine reference date
13406        let fallback_date =
13407            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13408        let reference_date = cr_config
13409            .reference_date
13410            .as_ref()
13411            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13412            .unwrap_or_else(|| {
13413                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13414                    .unwrap_or(fallback_date)
13415            });
13416
13417        // Generate standards registry data
13418        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13419        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13420        let cross_reference_records = reg_gen.generate_cross_reference_records();
13421        let jurisdiction_records =
13422            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13423
13424        info!(
13425            "  Standards: {} records, {} cross-references, {} jurisdictions",
13426            standard_records.len(),
13427            cross_reference_records.len(),
13428            jurisdiction_records.len()
13429        );
13430
13431        // Generate audit procedures (if enabled)
13432        let audit_procedures = if cr_config.audit_procedures.enabled {
13433            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13434                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13435                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13436                confidence_level: cr_config.audit_procedures.confidence_level,
13437                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13438            };
13439            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13440                self.seed + 9000,
13441                proc_config,
13442            );
13443            let registry = reg_gen.registry();
13444            let mut all_procs = Vec::new();
13445            for jurisdiction in &jurisdictions {
13446                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13447                all_procs.extend(procs);
13448            }
13449            info!("  Audit procedures: {}", all_procs.len());
13450            all_procs
13451        } else {
13452            Vec::new()
13453        };
13454
13455        // Generate compliance findings (if enabled)
13456        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13457            let finding_config =
13458                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13459                    finding_rate: cr_config.findings.finding_rate,
13460                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13461                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13462                    generate_remediation: cr_config.findings.generate_remediation,
13463                };
13464            let mut finding_gen =
13465                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13466                    self.seed + 9100,
13467                    finding_config,
13468                );
13469            let mut all_findings = Vec::new();
13470            for company in &self.config.companies {
13471                let company_findings =
13472                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13473                all_findings.extend(company_findings);
13474            }
13475            info!("  Compliance findings: {}", all_findings.len());
13476            all_findings
13477        } else {
13478            Vec::new()
13479        };
13480
13481        // Generate regulatory filings (if enabled)
13482        let filings = if cr_config.filings.enabled {
13483            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13484                filing_types: cr_config.filings.filing_types.clone(),
13485                generate_status_progression: cr_config.filings.generate_status_progression,
13486            };
13487            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13488                self.seed + 9200,
13489                filing_config,
13490            );
13491            let company_codes: Vec<String> = self
13492                .config
13493                .companies
13494                .iter()
13495                .map(|c| c.code.clone())
13496                .collect();
13497            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13498                .unwrap_or(fallback_date);
13499            let filings = filing_gen.generate_filings(
13500                &company_codes,
13501                &jurisdictions,
13502                start_date,
13503                self.config.global.period_months,
13504            );
13505            info!("  Regulatory filings: {}", filings.len());
13506            filings
13507        } else {
13508            Vec::new()
13509        };
13510
13511        // Build compliance graph (if enabled)
13512        let compliance_graph = if cr_config.graph.enabled {
13513            let graph_config = datasynth_graph::ComplianceGraphConfig {
13514                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13515                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13516                include_cross_references: cr_config.graph.include_cross_references,
13517                include_supersession_edges: cr_config.graph.include_supersession_edges,
13518                include_account_links: cr_config.graph.include_account_links,
13519                include_control_links: cr_config.graph.include_control_links,
13520                include_company_links: cr_config.graph.include_company_links,
13521            };
13522            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13523
13524            // Add standard nodes
13525            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13526                .iter()
13527                .map(|r| datasynth_graph::StandardNodeInput {
13528                    standard_id: r.standard_id.clone(),
13529                    title: r.title.clone(),
13530                    category: r.category.clone(),
13531                    domain: r.domain.clone(),
13532                    is_active: r.is_active,
13533                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13534                    applicable_account_types: r.applicable_account_types.clone(),
13535                    applicable_processes: r.applicable_processes.clone(),
13536                })
13537                .collect();
13538            builder.add_standards(&standard_inputs);
13539
13540            // Add jurisdiction nodes
13541            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13542                jurisdiction_records
13543                    .iter()
13544                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13545                        country_code: r.country_code.clone(),
13546                        country_name: r.country_name.clone(),
13547                        framework: r.accounting_framework.clone(),
13548                        standard_count: r.standard_count,
13549                        tax_rate: r.statutory_tax_rate,
13550                    })
13551                    .collect();
13552            builder.add_jurisdictions(&jurisdiction_inputs);
13553
13554            // Add cross-reference edges
13555            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13556                cross_reference_records
13557                    .iter()
13558                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13559                        from_standard: r.from_standard.clone(),
13560                        to_standard: r.to_standard.clone(),
13561                        relationship: r.relationship.clone(),
13562                        convergence_level: r.convergence_level,
13563                    })
13564                    .collect();
13565            builder.add_cross_references(&xref_inputs);
13566
13567            // Add jurisdiction→standard mappings
13568            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13569                .iter()
13570                .map(|r| datasynth_graph::JurisdictionMappingInput {
13571                    country_code: r.jurisdiction.clone(),
13572                    standard_id: r.standard_id.clone(),
13573                })
13574                .collect();
13575            builder.add_jurisdiction_mappings(&mapping_inputs);
13576
13577            // Add procedure nodes
13578            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13579                .iter()
13580                .map(|p| datasynth_graph::ProcedureNodeInput {
13581                    procedure_id: p.procedure_id.clone(),
13582                    standard_id: p.standard_id.clone(),
13583                    procedure_type: p.procedure_type.clone(),
13584                    sample_size: p.sample_size,
13585                    confidence_level: p.confidence_level,
13586                })
13587                .collect();
13588            builder.add_procedures(&proc_inputs);
13589
13590            // Add finding nodes
13591            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13592                .iter()
13593                .map(|f| datasynth_graph::FindingNodeInput {
13594                    finding_id: f.finding_id.to_string(),
13595                    standard_id: f
13596                        .related_standards
13597                        .first()
13598                        .map(|s| s.as_str().to_string())
13599                        .unwrap_or_default(),
13600                    severity: f.severity.to_string(),
13601                    deficiency_level: f.deficiency_level.to_string(),
13602                    severity_score: f.deficiency_level.severity_score(),
13603                    control_id: f.control_id.clone(),
13604                    affected_accounts: f.affected_accounts.clone(),
13605                })
13606                .collect();
13607            builder.add_findings(&finding_inputs);
13608
13609            // Cross-domain: link standards to accounts from chart of accounts
13610            if cr_config.graph.include_account_links {
13611                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13612                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13613                for std_record in &standard_records {
13614                    if let Some(std_obj) =
13615                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13616                            &std_record.standard_id,
13617                        ))
13618                    {
13619                        for acct_type in &std_obj.applicable_account_types {
13620                            account_links.push(datasynth_graph::AccountLinkInput {
13621                                standard_id: std_record.standard_id.clone(),
13622                                account_code: acct_type.clone(),
13623                                account_name: acct_type.clone(),
13624                            });
13625                        }
13626                    }
13627                }
13628                builder.add_account_links(&account_links);
13629            }
13630
13631            // Cross-domain: link standards to internal controls
13632            if cr_config.graph.include_control_links {
13633                let mut control_links = Vec::new();
13634                // SOX/PCAOB standards link to all controls
13635                let sox_like_ids: Vec<String> = standard_records
13636                    .iter()
13637                    .filter(|r| {
13638                        r.standard_id.starts_with("SOX")
13639                            || r.standard_id.starts_with("PCAOB-AS-2201")
13640                    })
13641                    .map(|r| r.standard_id.clone())
13642                    .collect();
13643                // Get control IDs from config (C001-C060 standard controls)
13644                let control_ids = [
13645                    ("C001", "Cash Controls"),
13646                    ("C002", "Large Transaction Approval"),
13647                    ("C010", "PO Approval"),
13648                    ("C011", "Three-Way Match"),
13649                    ("C020", "Revenue Recognition"),
13650                    ("C021", "Credit Check"),
13651                    ("C030", "Manual JE Approval"),
13652                    ("C031", "Period Close Review"),
13653                    ("C032", "Account Reconciliation"),
13654                    ("C040", "Payroll Processing"),
13655                    ("C050", "Fixed Asset Capitalization"),
13656                    ("C060", "Intercompany Elimination"),
13657                ];
13658                for sox_id in &sox_like_ids {
13659                    for (ctrl_id, ctrl_name) in &control_ids {
13660                        control_links.push(datasynth_graph::ControlLinkInput {
13661                            standard_id: sox_id.clone(),
13662                            control_id: ctrl_id.to_string(),
13663                            control_name: ctrl_name.to_string(),
13664                        });
13665                    }
13666                }
13667                builder.add_control_links(&control_links);
13668            }
13669
13670            // Cross-domain: filing nodes with company links
13671            if cr_config.graph.include_company_links {
13672                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13673                    .iter()
13674                    .enumerate()
13675                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13676                        filing_id: format!("F{:04}", i + 1),
13677                        filing_type: f.filing_type.to_string(),
13678                        company_code: f.company_code.clone(),
13679                        jurisdiction: f.jurisdiction.clone(),
13680                        status: format!("{:?}", f.status),
13681                    })
13682                    .collect();
13683                builder.add_filings(&filing_inputs);
13684            }
13685
13686            let graph = builder.build();
13687            info!(
13688                "  Compliance graph: {} nodes, {} edges",
13689                graph.nodes.len(),
13690                graph.edges.len()
13691            );
13692            Some(graph)
13693        } else {
13694            None
13695        };
13696
13697        self.check_resources_with_log("post-compliance-regulations")?;
13698
13699        Ok(ComplianceRegulationsSnapshot {
13700            standard_records,
13701            cross_reference_records,
13702            jurisdiction_records,
13703            audit_procedures,
13704            findings,
13705            filings,
13706            compliance_graph,
13707        })
13708    }
13709
13710    /// Build a lineage graph describing config → phase → output relationships.
13711    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13712        use super::lineage::LineageGraphBuilder;
13713
13714        let mut builder = LineageGraphBuilder::new();
13715
13716        // Config sections
13717        builder.add_config_section("config:global", "Global Config");
13718        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13719        builder.add_config_section("config:transactions", "Transaction Config");
13720
13721        // Generator phases
13722        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13723        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13724
13725        // Config → phase edges
13726        builder.configured_by("phase:coa", "config:chart_of_accounts");
13727        builder.configured_by("phase:je", "config:transactions");
13728
13729        // Output files
13730        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13731        builder.produced_by("output:je", "phase:je");
13732
13733        // Optional phases based on config
13734        if self.phase_config.generate_master_data {
13735            builder.add_config_section("config:master_data", "Master Data Config");
13736            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13737            builder.configured_by("phase:master_data", "config:master_data");
13738            builder.input_to("phase:master_data", "phase:je");
13739        }
13740
13741        if self.phase_config.generate_document_flows {
13742            builder.add_config_section("config:document_flows", "Document Flow Config");
13743            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13744            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13745            builder.configured_by("phase:p2p", "config:document_flows");
13746            builder.configured_by("phase:o2c", "config:document_flows");
13747
13748            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13749            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13750            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13751            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13752            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13753
13754            builder.produced_by("output:po", "phase:p2p");
13755            builder.produced_by("output:gr", "phase:p2p");
13756            builder.produced_by("output:vi", "phase:p2p");
13757            builder.produced_by("output:so", "phase:o2c");
13758            builder.produced_by("output:ci", "phase:o2c");
13759        }
13760
13761        if self.phase_config.inject_anomalies {
13762            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13763            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13764            builder.configured_by("phase:anomaly", "config:fraud");
13765            builder.add_output_file(
13766                "output:labels",
13767                "Anomaly Labels",
13768                "labels/anomaly_labels.csv",
13769            );
13770            builder.produced_by("output:labels", "phase:anomaly");
13771        }
13772
13773        if self.phase_config.generate_audit {
13774            builder.add_config_section("config:audit", "Audit Config");
13775            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13776            builder.configured_by("phase:audit", "config:audit");
13777        }
13778
13779        if self.phase_config.generate_banking {
13780            builder.add_config_section("config:banking", "Banking Config");
13781            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13782            builder.configured_by("phase:banking", "config:banking");
13783        }
13784
13785        if self.config.llm.enabled {
13786            builder.add_config_section("config:llm", "LLM Enrichment Config");
13787            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13788            builder.configured_by("phase:llm_enrichment", "config:llm");
13789        }
13790
13791        if self.config.diffusion.enabled {
13792            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13793            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13794            builder.configured_by("phase:diffusion", "config:diffusion");
13795        }
13796
13797        if self.config.causal.enabled {
13798            builder.add_config_section("config:causal", "Causal Generation Config");
13799            builder.add_generator_phase("phase:causal", "Causal Overlay");
13800            builder.configured_by("phase:causal", "config:causal");
13801        }
13802
13803        builder.build()
13804    }
13805
13806    // -----------------------------------------------------------------------
13807    // Trial-balance helpers used to replace hardcoded proxy values
13808    // -----------------------------------------------------------------------
13809
13810    /// Compute total revenue for a company from its journal entries.
13811    ///
13812    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13813    /// net credits on all revenue-account lines filtered to `company_code`.
13814    fn compute_company_revenue(
13815        entries: &[JournalEntry],
13816        company_code: &str,
13817    ) -> rust_decimal::Decimal {
13818        use rust_decimal::Decimal;
13819        let mut revenue = Decimal::ZERO;
13820        for je in entries {
13821            if je.header.company_code != company_code {
13822                continue;
13823            }
13824            for line in &je.lines {
13825                if line.gl_account.starts_with('4') {
13826                    // Revenue is credit-normal
13827                    revenue += line.credit_amount - line.debit_amount;
13828                }
13829            }
13830        }
13831        revenue.max(Decimal::ZERO)
13832    }
13833
13834    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13835    ///
13836    /// Asset accounts start with "1"; liability accounts start with "2".
13837    fn compute_entity_net_assets(
13838        entries: &[JournalEntry],
13839        entity_code: &str,
13840    ) -> rust_decimal::Decimal {
13841        use rust_decimal::Decimal;
13842        let mut asset_net = Decimal::ZERO;
13843        let mut liability_net = Decimal::ZERO;
13844        for je in entries {
13845            if je.header.company_code != entity_code {
13846                continue;
13847            }
13848            for line in &je.lines {
13849                if line.gl_account.starts_with('1') {
13850                    asset_net += line.debit_amount - line.credit_amount;
13851                } else if line.gl_account.starts_with('2') {
13852                    liability_net += line.credit_amount - line.debit_amount;
13853                }
13854            }
13855        }
13856        asset_net - liability_net
13857    }
13858}
13859
13860/// Get the directory name for a graph export format.
13861fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13862    match format {
13863        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13864        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13865        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13866        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13867        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13868    }
13869}
13870
13871/// Aggregate journal entry lines into per-account trial balance rows.
13872///
13873/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13874/// debit/credit totals and a net balance (debit minus credit).
13875fn compute_trial_balance_entries(
13876    entries: &[JournalEntry],
13877    entity_code: &str,
13878    fiscal_year: i32,
13879    coa: Option<&ChartOfAccounts>,
13880) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13881    use std::collections::BTreeMap;
13882
13883    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13884        BTreeMap::new();
13885
13886    for je in entries {
13887        for line in &je.lines {
13888            let entry = balances.entry(line.account_code.clone()).or_default();
13889            entry.0 += line.debit_amount;
13890            entry.1 += line.credit_amount;
13891        }
13892    }
13893
13894    balances
13895        .into_iter()
13896        .map(
13897            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13898                account_description: coa
13899                    .and_then(|c| c.get_account(&account_code))
13900                    .map(|a| a.description().to_string())
13901                    .unwrap_or_else(|| account_code.clone()),
13902                account_code,
13903                debit_balance: debit,
13904                credit_balance: credit,
13905                net_balance: debit - credit,
13906                entity_code: entity_code.to_string(),
13907                period: format!("FY{}", fiscal_year),
13908            },
13909        )
13910        .collect()
13911}
13912
13913#[cfg(test)]
13914#[allow(clippy::unwrap_used)]
13915mod tests {
13916    use super::*;
13917    use datasynth_config::schema::*;
13918
13919    fn create_test_config() -> GeneratorConfig {
13920        GeneratorConfig {
13921            global: GlobalConfig {
13922                industry: IndustrySector::Manufacturing,
13923                start_date: "2024-01-01".to_string(),
13924                period_months: 1,
13925                seed: Some(42),
13926                parallel: false,
13927                group_currency: "USD".to_string(),
13928                presentation_currency: None,
13929                worker_threads: 0,
13930                memory_limit_mb: 0,
13931                fiscal_year_months: None,
13932            },
13933            companies: vec![CompanyConfig {
13934                code: "1000".to_string(),
13935                name: "Test Company".to_string(),
13936                currency: "USD".to_string(),
13937                functional_currency: None,
13938                country: "US".to_string(),
13939                annual_transaction_volume: TransactionVolume::TenK,
13940                volume_weight: 1.0,
13941                fiscal_year_variant: "K4".to_string(),
13942            }],
13943            chart_of_accounts: ChartOfAccountsConfig {
13944                complexity: CoAComplexity::Small,
13945                industry_specific: true,
13946                custom_accounts: None,
13947                min_hierarchy_depth: 2,
13948                max_hierarchy_depth: 4,
13949            },
13950            transactions: TransactionConfig::default(),
13951            output: OutputConfig::default(),
13952            fraud: FraudConfig::default(),
13953            internal_controls: InternalControlsConfig::default(),
13954            business_processes: BusinessProcessConfig::default(),
13955            user_personas: UserPersonaConfig::default(),
13956            templates: TemplateConfig::default(),
13957            approval: ApprovalConfig::default(),
13958            departments: DepartmentConfig::default(),
13959            master_data: MasterDataConfig::default(),
13960            document_flows: DocumentFlowConfig::default(),
13961            intercompany: IntercompanyConfig::default(),
13962            balance: BalanceConfig::default(),
13963            ocpm: OcpmConfig::default(),
13964            audit: AuditGenerationConfig::default(),
13965            banking: datasynth_banking::BankingConfig::default(),
13966            data_quality: DataQualitySchemaConfig::default(),
13967            scenario: ScenarioConfig::default(),
13968            temporal: TemporalDriftConfig::default(),
13969            graph_export: GraphExportConfig::default(),
13970            streaming: StreamingSchemaConfig::default(),
13971            rate_limit: RateLimitSchemaConfig::default(),
13972            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13973            relationships: RelationshipSchemaConfig::default(),
13974            accounting_standards: AccountingStandardsConfig::default(),
13975            audit_standards: AuditStandardsConfig::default(),
13976            distributions: Default::default(),
13977            temporal_patterns: Default::default(),
13978            vendor_network: VendorNetworkSchemaConfig::default(),
13979            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13980            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13981            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13982            organizational_events: OrganizationalEventsSchemaConfig::default(),
13983            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13984            market_drift: MarketDriftSchemaConfig::default(),
13985            drift_labeling: DriftLabelingSchemaConfig::default(),
13986            anomaly_injection: Default::default(),
13987            industry_specific: Default::default(),
13988            fingerprint_privacy: Default::default(),
13989            quality_gates: Default::default(),
13990            compliance: Default::default(),
13991            webhooks: Default::default(),
13992            llm: Default::default(),
13993            diffusion: Default::default(),
13994            causal: Default::default(),
13995            source_to_pay: Default::default(),
13996            financial_reporting: Default::default(),
13997            hr: Default::default(),
13998            manufacturing: Default::default(),
13999            sales_quotes: Default::default(),
14000            tax: Default::default(),
14001            treasury: Default::default(),
14002            project_accounting: Default::default(),
14003            esg: Default::default(),
14004            country_packs: None,
14005            scenarios: Default::default(),
14006            session: Default::default(),
14007            compliance_regulations: Default::default(),
14008        }
14009    }
14010
14011    #[test]
14012    fn test_enhanced_orchestrator_creation() {
14013        let config = create_test_config();
14014        let orchestrator = EnhancedOrchestrator::with_defaults(config);
14015        assert!(orchestrator.is_ok());
14016    }
14017
14018    #[test]
14019    fn test_minimal_generation() {
14020        let config = create_test_config();
14021        let phase_config = PhaseConfig {
14022            generate_master_data: false,
14023            generate_document_flows: false,
14024            generate_journal_entries: true,
14025            inject_anomalies: false,
14026            show_progress: false,
14027            ..Default::default()
14028        };
14029
14030        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14031        let result = orchestrator.generate();
14032
14033        assert!(result.is_ok());
14034        let result = result.unwrap();
14035        assert!(!result.journal_entries.is_empty());
14036    }
14037
14038    #[test]
14039    fn test_master_data_generation() {
14040        let config = create_test_config();
14041        let phase_config = PhaseConfig {
14042            generate_master_data: true,
14043            generate_document_flows: false,
14044            generate_journal_entries: false,
14045            inject_anomalies: false,
14046            show_progress: false,
14047            vendors_per_company: 5,
14048            customers_per_company: 5,
14049            materials_per_company: 10,
14050            assets_per_company: 5,
14051            employees_per_company: 10,
14052            ..Default::default()
14053        };
14054
14055        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14056        let result = orchestrator.generate().unwrap();
14057
14058        assert!(!result.master_data.vendors.is_empty());
14059        assert!(!result.master_data.customers.is_empty());
14060        assert!(!result.master_data.materials.is_empty());
14061    }
14062
14063    #[test]
14064    fn test_document_flow_generation() {
14065        let config = create_test_config();
14066        let phase_config = PhaseConfig {
14067            generate_master_data: true,
14068            generate_document_flows: true,
14069            generate_journal_entries: false,
14070            inject_anomalies: false,
14071            inject_data_quality: false,
14072            validate_balances: false,
14073            generate_ocpm_events: false,
14074            show_progress: false,
14075            vendors_per_company: 5,
14076            customers_per_company: 5,
14077            materials_per_company: 10,
14078            assets_per_company: 5,
14079            employees_per_company: 10,
14080            p2p_chains: 5,
14081            o2c_chains: 5,
14082            ..Default::default()
14083        };
14084
14085        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14086        let result = orchestrator.generate().unwrap();
14087
14088        // Should have generated P2P and O2C chains
14089        assert!(!result.document_flows.p2p_chains.is_empty());
14090        assert!(!result.document_flows.o2c_chains.is_empty());
14091
14092        // Flattened documents should be populated
14093        assert!(!result.document_flows.purchase_orders.is_empty());
14094        assert!(!result.document_flows.sales_orders.is_empty());
14095    }
14096
14097    #[test]
14098    fn test_anomaly_injection() {
14099        let config = create_test_config();
14100        let phase_config = PhaseConfig {
14101            generate_master_data: false,
14102            generate_document_flows: false,
14103            generate_journal_entries: true,
14104            inject_anomalies: true,
14105            show_progress: false,
14106            ..Default::default()
14107        };
14108
14109        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14110        let result = orchestrator.generate().unwrap();
14111
14112        // Should have journal entries
14113        assert!(!result.journal_entries.is_empty());
14114
14115        // With ~833 entries and 2% rate, expect some anomalies
14116        // Note: This is probabilistic, so we just verify the structure exists
14117        assert!(result.anomaly_labels.summary.is_some());
14118    }
14119
14120    #[test]
14121    fn test_full_generation_pipeline() {
14122        let config = create_test_config();
14123        let phase_config = PhaseConfig {
14124            generate_master_data: true,
14125            generate_document_flows: true,
14126            generate_journal_entries: true,
14127            inject_anomalies: false,
14128            inject_data_quality: false,
14129            validate_balances: true,
14130            generate_ocpm_events: false,
14131            show_progress: false,
14132            vendors_per_company: 3,
14133            customers_per_company: 3,
14134            materials_per_company: 5,
14135            assets_per_company: 3,
14136            employees_per_company: 5,
14137            p2p_chains: 3,
14138            o2c_chains: 3,
14139            ..Default::default()
14140        };
14141
14142        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14143        let result = orchestrator.generate().unwrap();
14144
14145        // All phases should have results
14146        assert!(!result.master_data.vendors.is_empty());
14147        assert!(!result.master_data.customers.is_empty());
14148        assert!(!result.document_flows.p2p_chains.is_empty());
14149        assert!(!result.document_flows.o2c_chains.is_empty());
14150        assert!(!result.journal_entries.is_empty());
14151        assert!(result.statistics.accounts_count > 0);
14152
14153        // Subledger linking should have run
14154        assert!(!result.subledger.ap_invoices.is_empty());
14155        assert!(!result.subledger.ar_invoices.is_empty());
14156
14157        // Balance validation should have run
14158        assert!(result.balance_validation.validated);
14159        assert!(result.balance_validation.entries_processed > 0);
14160    }
14161
14162    #[test]
14163    fn test_subledger_linking() {
14164        let config = create_test_config();
14165        let phase_config = PhaseConfig {
14166            generate_master_data: true,
14167            generate_document_flows: true,
14168            generate_journal_entries: false,
14169            inject_anomalies: false,
14170            inject_data_quality: false,
14171            validate_balances: false,
14172            generate_ocpm_events: false,
14173            show_progress: false,
14174            vendors_per_company: 5,
14175            customers_per_company: 5,
14176            materials_per_company: 10,
14177            assets_per_company: 3,
14178            employees_per_company: 5,
14179            p2p_chains: 5,
14180            o2c_chains: 5,
14181            ..Default::default()
14182        };
14183
14184        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14185        let result = orchestrator.generate().unwrap();
14186
14187        // Should have document flows
14188        assert!(!result.document_flows.vendor_invoices.is_empty());
14189        assert!(!result.document_flows.customer_invoices.is_empty());
14190
14191        // Subledger should be linked from document flows
14192        assert!(!result.subledger.ap_invoices.is_empty());
14193        assert!(!result.subledger.ar_invoices.is_empty());
14194
14195        // AP invoices count should match vendor invoices count
14196        assert_eq!(
14197            result.subledger.ap_invoices.len(),
14198            result.document_flows.vendor_invoices.len()
14199        );
14200
14201        // AR invoices count should match customer invoices count
14202        assert_eq!(
14203            result.subledger.ar_invoices.len(),
14204            result.document_flows.customer_invoices.len()
14205        );
14206
14207        // Statistics should reflect subledger counts
14208        assert_eq!(
14209            result.statistics.ap_invoice_count,
14210            result.subledger.ap_invoices.len()
14211        );
14212        assert_eq!(
14213            result.statistics.ar_invoice_count,
14214            result.subledger.ar_invoices.len()
14215        );
14216    }
14217
14218    #[test]
14219    fn test_balance_validation() {
14220        let config = create_test_config();
14221        let phase_config = PhaseConfig {
14222            generate_master_data: false,
14223            generate_document_flows: false,
14224            generate_journal_entries: true,
14225            inject_anomalies: false,
14226            validate_balances: true,
14227            show_progress: false,
14228            ..Default::default()
14229        };
14230
14231        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14232        let result = orchestrator.generate().unwrap();
14233
14234        // Balance validation should run
14235        assert!(result.balance_validation.validated);
14236        assert!(result.balance_validation.entries_processed > 0);
14237
14238        // Generated JEs should be balanced (no unbalanced entries)
14239        assert!(!result.balance_validation.has_unbalanced_entries);
14240
14241        // Total debits should equal total credits
14242        assert_eq!(
14243            result.balance_validation.total_debits,
14244            result.balance_validation.total_credits
14245        );
14246    }
14247
14248    #[test]
14249    fn test_statistics_accuracy() {
14250        let config = create_test_config();
14251        let phase_config = PhaseConfig {
14252            generate_master_data: true,
14253            generate_document_flows: false,
14254            generate_journal_entries: true,
14255            inject_anomalies: false,
14256            show_progress: false,
14257            vendors_per_company: 10,
14258            customers_per_company: 20,
14259            materials_per_company: 15,
14260            assets_per_company: 5,
14261            employees_per_company: 8,
14262            ..Default::default()
14263        };
14264
14265        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14266        let result = orchestrator.generate().unwrap();
14267
14268        // Statistics should match actual data
14269        assert_eq!(
14270            result.statistics.vendor_count,
14271            result.master_data.vendors.len()
14272        );
14273        assert_eq!(
14274            result.statistics.customer_count,
14275            result.master_data.customers.len()
14276        );
14277        assert_eq!(
14278            result.statistics.material_count,
14279            result.master_data.materials.len()
14280        );
14281        assert_eq!(
14282            result.statistics.total_entries as usize,
14283            result.journal_entries.len()
14284        );
14285    }
14286
14287    #[test]
14288    fn test_phase_config_defaults() {
14289        let config = PhaseConfig::default();
14290        assert!(config.generate_master_data);
14291        assert!(config.generate_document_flows);
14292        assert!(config.generate_journal_entries);
14293        assert!(!config.inject_anomalies);
14294        assert!(config.validate_balances);
14295        assert!(config.show_progress);
14296        assert!(config.vendors_per_company > 0);
14297        assert!(config.customers_per_company > 0);
14298    }
14299
14300    #[test]
14301    fn test_get_coa_before_generation() {
14302        let config = create_test_config();
14303        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14304
14305        // Before generation, CoA should be None
14306        assert!(orchestrator.get_coa().is_none());
14307    }
14308
14309    #[test]
14310    fn test_get_coa_after_generation() {
14311        let config = create_test_config();
14312        let phase_config = PhaseConfig {
14313            generate_master_data: false,
14314            generate_document_flows: false,
14315            generate_journal_entries: true,
14316            inject_anomalies: false,
14317            show_progress: false,
14318            ..Default::default()
14319        };
14320
14321        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14322        let _ = orchestrator.generate().unwrap();
14323
14324        // After generation, CoA should be available
14325        assert!(orchestrator.get_coa().is_some());
14326    }
14327
14328    #[test]
14329    fn test_get_master_data() {
14330        let config = create_test_config();
14331        let phase_config = PhaseConfig {
14332            generate_master_data: true,
14333            generate_document_flows: false,
14334            generate_journal_entries: false,
14335            inject_anomalies: false,
14336            show_progress: false,
14337            vendors_per_company: 5,
14338            customers_per_company: 5,
14339            materials_per_company: 5,
14340            assets_per_company: 5,
14341            employees_per_company: 5,
14342            ..Default::default()
14343        };
14344
14345        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14346        let result = orchestrator.generate().unwrap();
14347
14348        // After generate(), master_data is moved into the result
14349        assert!(!result.master_data.vendors.is_empty());
14350    }
14351
14352    #[test]
14353    fn test_with_progress_builder() {
14354        let config = create_test_config();
14355        let orchestrator = EnhancedOrchestrator::with_defaults(config)
14356            .unwrap()
14357            .with_progress(false);
14358
14359        // Should still work without progress
14360        assert!(!orchestrator.phase_config.show_progress);
14361    }
14362
14363    #[test]
14364    fn test_multi_company_generation() {
14365        let mut config = create_test_config();
14366        config.companies.push(CompanyConfig {
14367            code: "2000".to_string(),
14368            name: "Subsidiary".to_string(),
14369            currency: "EUR".to_string(),
14370            functional_currency: None,
14371            country: "DE".to_string(),
14372            annual_transaction_volume: TransactionVolume::TenK,
14373            volume_weight: 0.5,
14374            fiscal_year_variant: "K4".to_string(),
14375        });
14376
14377        let phase_config = PhaseConfig {
14378            generate_master_data: true,
14379            generate_document_flows: false,
14380            generate_journal_entries: true,
14381            inject_anomalies: false,
14382            show_progress: false,
14383            vendors_per_company: 5,
14384            customers_per_company: 5,
14385            materials_per_company: 5,
14386            assets_per_company: 5,
14387            employees_per_company: 5,
14388            ..Default::default()
14389        };
14390
14391        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14392        let result = orchestrator.generate().unwrap();
14393
14394        // Should have master data for both companies
14395        assert!(result.statistics.vendor_count >= 10); // 5 per company
14396        assert!(result.statistics.customer_count >= 10);
14397        assert!(result.statistics.companies_count == 2);
14398    }
14399
14400    #[test]
14401    fn test_empty_master_data_skips_document_flows() {
14402        let config = create_test_config();
14403        let phase_config = PhaseConfig {
14404            generate_master_data: false,   // Skip master data
14405            generate_document_flows: true, // Try to generate flows
14406            generate_journal_entries: false,
14407            inject_anomalies: false,
14408            show_progress: false,
14409            ..Default::default()
14410        };
14411
14412        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14413        let result = orchestrator.generate().unwrap();
14414
14415        // Without master data, document flows should be empty
14416        assert!(result.document_flows.p2p_chains.is_empty());
14417        assert!(result.document_flows.o2c_chains.is_empty());
14418    }
14419
14420    #[test]
14421    fn test_journal_entry_line_item_count() {
14422        let config = create_test_config();
14423        let phase_config = PhaseConfig {
14424            generate_master_data: false,
14425            generate_document_flows: false,
14426            generate_journal_entries: true,
14427            inject_anomalies: false,
14428            show_progress: false,
14429            ..Default::default()
14430        };
14431
14432        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14433        let result = orchestrator.generate().unwrap();
14434
14435        // Total line items should match sum of all entry line counts
14436        let calculated_line_items: u64 = result
14437            .journal_entries
14438            .iter()
14439            .map(|e| e.line_count() as u64)
14440            .sum();
14441        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14442    }
14443
14444    #[test]
14445    fn test_audit_generation() {
14446        let config = create_test_config();
14447        let phase_config = PhaseConfig {
14448            generate_master_data: false,
14449            generate_document_flows: false,
14450            generate_journal_entries: true,
14451            inject_anomalies: false,
14452            show_progress: false,
14453            generate_audit: true,
14454            audit_engagements: 2,
14455            workpapers_per_engagement: 5,
14456            evidence_per_workpaper: 2,
14457            risks_per_engagement: 3,
14458            findings_per_engagement: 2,
14459            judgments_per_engagement: 2,
14460            ..Default::default()
14461        };
14462
14463        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14464        let result = orchestrator.generate().unwrap();
14465
14466        // Should have generated audit data
14467        assert_eq!(result.audit.engagements.len(), 2);
14468        assert!(!result.audit.workpapers.is_empty());
14469        assert!(!result.audit.evidence.is_empty());
14470        assert!(!result.audit.risk_assessments.is_empty());
14471        assert!(!result.audit.findings.is_empty());
14472        assert!(!result.audit.judgments.is_empty());
14473
14474        // New ISA entity collections should also be populated
14475        assert!(
14476            !result.audit.confirmations.is_empty(),
14477            "ISA 505 confirmations should be generated"
14478        );
14479        assert!(
14480            !result.audit.confirmation_responses.is_empty(),
14481            "ISA 505 confirmation responses should be generated"
14482        );
14483        assert!(
14484            !result.audit.procedure_steps.is_empty(),
14485            "ISA 330 procedure steps should be generated"
14486        );
14487        // Samples may or may not be generated depending on workpaper sampling methods
14488        assert!(
14489            !result.audit.analytical_results.is_empty(),
14490            "ISA 520 analytical procedures should be generated"
14491        );
14492        assert!(
14493            !result.audit.ia_functions.is_empty(),
14494            "ISA 610 IA functions should be generated (one per engagement)"
14495        );
14496        assert!(
14497            !result.audit.related_parties.is_empty(),
14498            "ISA 550 related parties should be generated"
14499        );
14500
14501        // Statistics should match
14502        assert_eq!(
14503            result.statistics.audit_engagement_count,
14504            result.audit.engagements.len()
14505        );
14506        assert_eq!(
14507            result.statistics.audit_workpaper_count,
14508            result.audit.workpapers.len()
14509        );
14510        assert_eq!(
14511            result.statistics.audit_evidence_count,
14512            result.audit.evidence.len()
14513        );
14514        assert_eq!(
14515            result.statistics.audit_risk_count,
14516            result.audit.risk_assessments.len()
14517        );
14518        assert_eq!(
14519            result.statistics.audit_finding_count,
14520            result.audit.findings.len()
14521        );
14522        assert_eq!(
14523            result.statistics.audit_judgment_count,
14524            result.audit.judgments.len()
14525        );
14526        assert_eq!(
14527            result.statistics.audit_confirmation_count,
14528            result.audit.confirmations.len()
14529        );
14530        assert_eq!(
14531            result.statistics.audit_confirmation_response_count,
14532            result.audit.confirmation_responses.len()
14533        );
14534        assert_eq!(
14535            result.statistics.audit_procedure_step_count,
14536            result.audit.procedure_steps.len()
14537        );
14538        assert_eq!(
14539            result.statistics.audit_sample_count,
14540            result.audit.samples.len()
14541        );
14542        assert_eq!(
14543            result.statistics.audit_analytical_result_count,
14544            result.audit.analytical_results.len()
14545        );
14546        assert_eq!(
14547            result.statistics.audit_ia_function_count,
14548            result.audit.ia_functions.len()
14549        );
14550        assert_eq!(
14551            result.statistics.audit_ia_report_count,
14552            result.audit.ia_reports.len()
14553        );
14554        assert_eq!(
14555            result.statistics.audit_related_party_count,
14556            result.audit.related_parties.len()
14557        );
14558        assert_eq!(
14559            result.statistics.audit_related_party_transaction_count,
14560            result.audit.related_party_transactions.len()
14561        );
14562    }
14563
14564    #[test]
14565    fn test_new_phases_disabled_by_default() {
14566        let config = create_test_config();
14567        // Verify new config fields default to disabled
14568        assert!(!config.llm.enabled);
14569        assert!(!config.diffusion.enabled);
14570        assert!(!config.causal.enabled);
14571
14572        let phase_config = PhaseConfig {
14573            generate_master_data: false,
14574            generate_document_flows: false,
14575            generate_journal_entries: true,
14576            inject_anomalies: false,
14577            show_progress: false,
14578            ..Default::default()
14579        };
14580
14581        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14582        let result = orchestrator.generate().unwrap();
14583
14584        // All new phase statistics should be zero when disabled
14585        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14586        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14587        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14588        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14589        assert_eq!(result.statistics.causal_generation_ms, 0);
14590        assert_eq!(result.statistics.causal_samples_generated, 0);
14591        assert!(result.statistics.causal_validation_passed.is_none());
14592        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14593        assert!(result.counterfactual_pairs.is_empty());
14594    }
14595
14596    #[test]
14597    fn test_counterfactual_generation_enabled() {
14598        let config = create_test_config();
14599        let phase_config = PhaseConfig {
14600            generate_master_data: false,
14601            generate_document_flows: false,
14602            generate_journal_entries: true,
14603            inject_anomalies: false,
14604            show_progress: false,
14605            generate_counterfactuals: true,
14606            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14607            ..Default::default()
14608        };
14609
14610        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14611        let result = orchestrator.generate().unwrap();
14612
14613        // With JE generation enabled, counterfactual pairs should be generated
14614        if !result.journal_entries.is_empty() {
14615            assert_eq!(
14616                result.counterfactual_pairs.len(),
14617                result.journal_entries.len()
14618            );
14619            assert_eq!(
14620                result.statistics.counterfactual_pair_count,
14621                result.journal_entries.len()
14622            );
14623            // Each pair should have a distinct pair_id
14624            let ids: std::collections::HashSet<_> = result
14625                .counterfactual_pairs
14626                .iter()
14627                .map(|p| p.pair_id.clone())
14628                .collect();
14629            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14630        }
14631    }
14632
14633    #[test]
14634    fn test_llm_enrichment_enabled() {
14635        let mut config = create_test_config();
14636        config.llm.enabled = true;
14637        config.llm.max_vendor_enrichments = 3;
14638
14639        let phase_config = PhaseConfig {
14640            generate_master_data: true,
14641            generate_document_flows: false,
14642            generate_journal_entries: false,
14643            inject_anomalies: false,
14644            show_progress: false,
14645            vendors_per_company: 5,
14646            customers_per_company: 3,
14647            materials_per_company: 3,
14648            assets_per_company: 3,
14649            employees_per_company: 3,
14650            ..Default::default()
14651        };
14652
14653        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14654        let result = orchestrator.generate().unwrap();
14655
14656        // LLM enrichment should have run
14657        assert!(result.statistics.llm_vendors_enriched > 0);
14658        assert!(result.statistics.llm_vendors_enriched <= 3);
14659    }
14660
14661    #[test]
14662    fn test_diffusion_enhancement_enabled() {
14663        let mut config = create_test_config();
14664        config.diffusion.enabled = true;
14665        config.diffusion.n_steps = 50;
14666        config.diffusion.sample_size = 20;
14667
14668        let phase_config = PhaseConfig {
14669            generate_master_data: false,
14670            generate_document_flows: false,
14671            generate_journal_entries: true,
14672            inject_anomalies: false,
14673            show_progress: false,
14674            ..Default::default()
14675        };
14676
14677        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14678        let result = orchestrator.generate().unwrap();
14679
14680        // Diffusion phase should have generated samples
14681        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14682    }
14683
14684    #[test]
14685    fn test_causal_overlay_enabled() {
14686        let mut config = create_test_config();
14687        config.causal.enabled = true;
14688        config.causal.template = "fraud_detection".to_string();
14689        config.causal.sample_size = 100;
14690        config.causal.validate = true;
14691
14692        let phase_config = PhaseConfig {
14693            generate_master_data: false,
14694            generate_document_flows: false,
14695            generate_journal_entries: true,
14696            inject_anomalies: false,
14697            show_progress: false,
14698            ..Default::default()
14699        };
14700
14701        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14702        let result = orchestrator.generate().unwrap();
14703
14704        // Causal phase should have generated samples
14705        assert_eq!(result.statistics.causal_samples_generated, 100);
14706        // Validation should have run
14707        assert!(result.statistics.causal_validation_passed.is_some());
14708    }
14709
14710    #[test]
14711    fn test_causal_overlay_revenue_cycle_template() {
14712        let mut config = create_test_config();
14713        config.causal.enabled = true;
14714        config.causal.template = "revenue_cycle".to_string();
14715        config.causal.sample_size = 50;
14716        config.causal.validate = false;
14717
14718        let phase_config = PhaseConfig {
14719            generate_master_data: false,
14720            generate_document_flows: false,
14721            generate_journal_entries: true,
14722            inject_anomalies: false,
14723            show_progress: false,
14724            ..Default::default()
14725        };
14726
14727        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14728        let result = orchestrator.generate().unwrap();
14729
14730        // Causal phase should have generated samples
14731        assert_eq!(result.statistics.causal_samples_generated, 50);
14732        // Validation was disabled
14733        assert!(result.statistics.causal_validation_passed.is_none());
14734    }
14735
14736    #[test]
14737    fn test_all_new_phases_enabled_together() {
14738        let mut config = create_test_config();
14739        config.llm.enabled = true;
14740        config.llm.max_vendor_enrichments = 2;
14741        config.diffusion.enabled = true;
14742        config.diffusion.n_steps = 20;
14743        config.diffusion.sample_size = 10;
14744        config.causal.enabled = true;
14745        config.causal.sample_size = 50;
14746        config.causal.validate = true;
14747
14748        let phase_config = PhaseConfig {
14749            generate_master_data: true,
14750            generate_document_flows: false,
14751            generate_journal_entries: true,
14752            inject_anomalies: false,
14753            show_progress: false,
14754            vendors_per_company: 5,
14755            customers_per_company: 3,
14756            materials_per_company: 3,
14757            assets_per_company: 3,
14758            employees_per_company: 3,
14759            ..Default::default()
14760        };
14761
14762        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14763        let result = orchestrator.generate().unwrap();
14764
14765        // All three phases should have run
14766        assert!(result.statistics.llm_vendors_enriched > 0);
14767        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14768        assert_eq!(result.statistics.causal_samples_generated, 50);
14769        assert!(result.statistics.causal_validation_passed.is_some());
14770    }
14771
14772    #[test]
14773    fn test_statistics_serialization_with_new_fields() {
14774        let stats = EnhancedGenerationStatistics {
14775            total_entries: 100,
14776            total_line_items: 500,
14777            llm_enrichment_ms: 42,
14778            llm_vendors_enriched: 10,
14779            diffusion_enhancement_ms: 100,
14780            diffusion_samples_generated: 50,
14781            causal_generation_ms: 200,
14782            causal_samples_generated: 100,
14783            causal_validation_passed: Some(true),
14784            ..Default::default()
14785        };
14786
14787        let json = serde_json::to_string(&stats).unwrap();
14788        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14789
14790        assert_eq!(deserialized.llm_enrichment_ms, 42);
14791        assert_eq!(deserialized.llm_vendors_enriched, 10);
14792        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14793        assert_eq!(deserialized.diffusion_samples_generated, 50);
14794        assert_eq!(deserialized.causal_generation_ms, 200);
14795        assert_eq!(deserialized.causal_samples_generated, 100);
14796        assert_eq!(deserialized.causal_validation_passed, Some(true));
14797    }
14798
14799    #[test]
14800    fn test_statistics_backward_compat_deserialization() {
14801        // Old JSON without the new fields should still deserialize
14802        let old_json = r#"{
14803            "total_entries": 100,
14804            "total_line_items": 500,
14805            "accounts_count": 50,
14806            "companies_count": 1,
14807            "period_months": 12,
14808            "vendor_count": 10,
14809            "customer_count": 20,
14810            "material_count": 15,
14811            "asset_count": 5,
14812            "employee_count": 8,
14813            "p2p_chain_count": 5,
14814            "o2c_chain_count": 5,
14815            "ap_invoice_count": 5,
14816            "ar_invoice_count": 5,
14817            "ocpm_event_count": 0,
14818            "ocpm_object_count": 0,
14819            "ocpm_case_count": 0,
14820            "audit_engagement_count": 0,
14821            "audit_workpaper_count": 0,
14822            "audit_evidence_count": 0,
14823            "audit_risk_count": 0,
14824            "audit_finding_count": 0,
14825            "audit_judgment_count": 0,
14826            "anomalies_injected": 0,
14827            "data_quality_issues": 0,
14828            "banking_customer_count": 0,
14829            "banking_account_count": 0,
14830            "banking_transaction_count": 0,
14831            "banking_suspicious_count": 0,
14832            "graph_export_count": 0,
14833            "graph_node_count": 0,
14834            "graph_edge_count": 0
14835        }"#;
14836
14837        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14838
14839        // New fields should default to 0 / None
14840        assert_eq!(stats.llm_enrichment_ms, 0);
14841        assert_eq!(stats.llm_vendors_enriched, 0);
14842        assert_eq!(stats.diffusion_enhancement_ms, 0);
14843        assert_eq!(stats.diffusion_samples_generated, 0);
14844        assert_eq!(stats.causal_generation_ms, 0);
14845        assert_eq!(stats.causal_samples_generated, 0);
14846        assert!(stats.causal_validation_passed.is_none());
14847    }
14848}